diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
new file mode 100644
index 000000000..d6cbb3dbe
--- /dev/null
+++ b/.github/pull_request_template.md
@@ -0,0 +1,27 @@
+## Summary
+
+<!-- What does this PR do and why? Link to relevant issues. -->
+
+## Changes
+
+<!-- Bulleted list of key changes. -->
+
+-
+
+## Testing
+
+<!-- How was this tested? Which tests cover this change? -->
+
+- [ ] Ran tests for modified crates
+- [ ] `cargo clippy` and `cargo fmt` pass
+
+## Security Considerations
+
+<!-- Every change to a zkVM has security implications. Describe them.
+     Does this affect proof soundness? Verifier correctness? Private inputs? -->
+
+## Breaking Changes
+
+<!-- List any breaking changes to public APIs, proof formats, or serialization. Write "None" if not applicable. -->
+
+None
diff --git a/.github/workflows/bench-crates.yml b/.github/workflows/bench-crates.yml
new file mode 100644
index 000000000..938cb8e81
--- /dev/null
+++ b/.github/workflows/bench-crates.yml
@@ -0,0 +1,99 @@
+name: Benchmark modular crates
+
+on:
+  push:
+    branches: [main]
+    paths:
+      - "crates/**"
+  pull_request:
+    branches: [main]
+    paths:
+      - "crates/**"
+
+env:
+  CARGO_TERM_COLOR: always
+
+concurrency:
+  group: bench-crates-${{ github.head_ref || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  bench:
+    runs-on: ubuntu-latest
+    name: Criterion benchmarks
+    steps:
+      - uses: actions/checkout@v6
+      - uses: actions-rust-lang/setup-rust-toolchain@v1
+
+      - name: Install critcmp
+        run: cargo install critcmp --locked
+
+      # On main push: save baseline
+      - name: Run benchmarks (baseline)
+        if: github.event_name == 'push'
+        run: >
+          cargo bench
+          -p jolt-field
+          -p jolt-poly
+          -p jolt-transcript
+          -p jolt-sumcheck
+          -p jolt-openings
+          -- --save-baseline main_run
+
+      - name: Upload baseline
+        if: github.event_name == 'push'
+        uses: actions/upload-artifact@v4
+        with:
+          name: criterion-baseline
+          path: target/criterion/
+          retention-days: 30
+
+      # On PR: compare against baseline
+      - name: Download baseline
+        if: github.event_name == 'pull_request'
+        uses: dawidd6/action-download-artifact@v6
+        with:
+          name: criterion-baseline
+          path: target/criterion/
+          branch: main
+          workflow: bench-crates.yml
+          if_no_artifact_found: warn
+
+      - name: Run benchmarks (PR)
+        if: github.event_name == 'pull_request'
+        run: >
+          cargo bench
+          -p jolt-field
+          -p jolt-poly
+          -p jolt-transcript
+          -p jolt-sumcheck
+          -p jolt-openings
+          -- --save-baseline pr_run
+
+      - name: Compare benchmarks
+        if: github.event_name == 'pull_request'
+        id: critcmp
+        run: |
+          if [ -d "target/criterion/main_run" ]; then
+            OUTPUT=$(critcmp main_run pr_run --threshold 5 2>&1) || true
+            echo "comparison<<EOF" >> "$GITHUB_OUTPUT"
+            echo "$OUTPUT" >> "$GITHUB_OUTPUT"
+            echo "EOF" >> "$GITHUB_OUTPUT"
+          else
+            echo "comparison=No baseline found — skipping comparison." >> "$GITHUB_OUTPUT"
+          fi
+
+      - name: Post benchmark comparison
+        if: github.event_name == 'pull_request' && steps.critcmp.outputs.comparison != ''
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const body = `## Benchmark comparison (crates)\n\n\`\`\`\n${process.env.COMPARISON}\n\`\`\``;
+            github.rest.issues.createComment({
+              issue_number: context.issue.number,
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              body,
+            });
+        env:
+          COMPARISON: ${{ steps.critcmp.outputs.comparison }}
diff --git a/Cargo.toml b/Cargo.toml
index 0db26edb0..a5a28cfd8 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -24,6 +24,11 @@ keywords = ["SNARK", "cryptography", "proofs"]
 
 [workspace]
 members = [
+  "crates/jolt-poly",
+  "crates/jolt-instructions",
+  "crates/jolt-transcript",
+  "crates/jolt-profiling",
+  "crates/jolt-field",
     "jolt-core",
     "tracer",
     "common",
@@ -149,6 +154,58 @@ allocative = { git = "https://github.com/facebookexperimental/allocative", rev =
 [workspace.metadata.cargo-machete]
 ignored = ["jolt-sdk"]
 
+
+[workspace.lints.clippy]
+pedantic = { level = "warn", priority = -1 }
+
+# Pedantic overrides — suppressed because they're too noisy for math-heavy ZK code
+missing_errors_doc = "allow"
+missing_panics_doc = "allow"
+must_use_candidate = "allow"
+doc_markdown = "allow"
+similar_names = "allow"
+too_many_lines = "allow"
+module_name_repetitions = "allow"
+struct_excessive_bools = "allow"
+fn_params_excessive_bools = "allow"
+items_after_statements = "allow"
+uninlined_format_args = "allow"
+return_self_not_must_use = "allow"
+default_trait_access = "allow"
+match_same_arms = "allow"
+manual_let_else = "allow"
+used_underscore_binding = "allow"
+no_effect_underscore_binding = "allow"
+needless_pass_by_value = "allow"
+trivially_copy_pass_by_ref = "allow"
+redundant_closure_for_method_calls = "allow"
+unnecessary_wraps = "allow"
+if_not_else = "allow"
+
+# Numeric/math code — ZK cryptography uses these patterns pervasively
+float_cmp = "allow"
+many_single_char_names = "allow"
+wildcard_imports = "allow"
+inline_always = "allow"
+checked_conversions = "allow"
+
+# Cast lints — field arithmetic requires intentional casting
+cast_possible_truncation = "allow"
+cast_sign_loss = "allow"
+cast_precision_loss = "allow"
+cast_possible_wrap = "allow"
+cast_lossless = "allow"
+
+# Code quality — hard denies to catch AI-generated slop
+dbg_macro = "deny"
+todo = "deny"
+unimplemented = "deny"
+print_stdout = "deny"
+print_stderr = "deny"
+undocumented_unsafe_blocks = "deny"
+
+[workspace.lints.rust]
+unused_results = "warn"
 [workspace.dependencies]
 # Cryptography and Math
 ark-bn254 = { git = "https://github.com/a16z/arkworks-algebra", branch = "dev/twist-shout", default-features = false }
@@ -165,6 +222,8 @@ sha2 = "0.10"
 sha3 = "0.10.8"
 blake2 = "0.10"
 blake3 = { version = "1.5.0" }
+light-poseidon = "0.4"
+digest = "0.10"
 jolt-optimizations = { git = "https://github.com/a16z/arkworks-algebra", branch = "dev/twist-shout" }
 dory = { package = "dory-pcs", version = "0.3.0", features = ["backends", "cache", "disk-persistence"] }
 
@@ -214,7 +273,7 @@ memory-stats = { version = "1.0.0", features = ["always_use_statm"] }
 rayon = { version = "^1.8.0" }
 
 # Tracing and Profiling
-tracing = { version = "0.1.37", default-features = false }
+tracing = { version = "0.1.37", default-features = false, features = ["attributes"] }
 tracing-chrome = "0.7.1"
 tracing-subscriber = { version = "0.3.20", features = ["fmt", "env-filter"] }
 inferno = { version = "0.12.3" }
diff --git a/crates/jolt-field/Cargo.toml b/crates/jolt-field/Cargo.toml
new file mode 100644
index 000000000..e9ed4405b
--- /dev/null
+++ b/crates/jolt-field/Cargo.toml
@@ -0,0 +1,38 @@
+[package]
+name = "jolt-field"
+version = "0.1.0"
+authors = ["Jolt Contributors"]
+edition = "2021"
+license = "MIT OR Apache-2.0"
+description = "Field abstractions for the Jolt zkVM"
+repository = "https://github.com/a16z/jolt"
+keywords = ["SNARK", "cryptography", "finite-fields", "BN254"]
+categories = ["cryptography"]
+
+[lints]
+workspace = true
+
+[dependencies]
+ark-ff = { workspace = true }
+ark-serialize = { workspace = true }
+ark-bn254 = { workspace = true, features = ["scalar_field"], optional = true }
+num-traits = { workspace = true }
+serde = { workspace = true, features = ["derive"] }
+allocative = { workspace = true, optional = true }
+dory = { workspace = true, optional = true }
+rand = { workspace = true }
+rand_core = { workspace = true }
+
+[features]
+default = ["bn254"]
+bn254 = ["dep:ark-bn254"]
+dory-pcs = ["dep:dory", "bn254"]
+
+[dev-dependencies]
+ark-std = { workspace = true }
+rand_chacha = { workspace = true }
+criterion = { workspace = true }
+
+[[bench]]
+name = "field_arith"
+harness = false
diff --git a/crates/jolt-field/README.md b/crates/jolt-field/README.md
new file mode 100644
index 000000000..7ca8538e4
--- /dev/null
+++ b/crates/jolt-field/README.md
@@ -0,0 +1,50 @@
+# jolt-field
+
+Field abstractions for the Jolt zkVM.
+
+Part of the [Jolt](https://github.com/a16z/jolt) zkVM.
+
+## Overview
+
+This crate defines the core `Field` trait and associated types used throughout the Jolt proving system. It provides a backend-agnostic interface over prime-order scalar fields, currently implemented for the BN254 scalar field (`Fr`).
+
+The crate also provides optimized arithmetic primitives -- wide accumulators and fused multiply-add -- all tuned for the BN254 field.
+
+## Public API
+
+### Core Traits
+
+- **`Field`** -- Prime field element abstraction. Elements are `Copy`, thread-safe, serializable. Provides conversions from integer types, random sampling, square, inverse, and bit-width queries.
+- **`OptimizedMul<Rhs, Output>`** -- Multiplication with fast-path short-circuits for zero and one.
+- **`MontgomeryConstants`** -- Trait providing Montgomery form constants (modulus limbs, R^2, inverse) for wide arithmetic backends.
+
+### Accumulation
+
+- **`FieldAccumulator`** -- Trait for accumulators that defer modular reduction across multiply-add steps.
+- **`NaiveAccumulator`** -- Simple accumulator using standard field arithmetic (no deferred reduction).
+- **`WideAccumulator`** -- BN254-specific accumulator using unreduced wide limbs for deferred reduction.
+- **`Limbs<N>`** -- Fixed-size limb array type used by wide arithmetic internals.
+
+### Types
+
+- **`Fr`** -- BN254 scalar field element (`#[repr(transparent)]` newtype over `ark_bn254::Fr`).
+
+### Signed Integer Types
+
+The `signed` module provides fixed-width signed big integers (`S64`, `S128`, `S192`, `S256`, etc.) with truncating arithmetic.
+
+## Dependency Position
+
+`jolt-field` is a **leaf crate** with no internal Jolt dependencies. All other `jolt-*` crates depend on it.
+
+## Feature Flags
+
+| Flag | Default | Description |
+|------|---------|-------------|
+| `bn254` | **Yes** | Enable BN254 scalar field implementation |
+| `dory-pcs` | No | Enable Dory PCS interop (implies `bn254`) |
+| `allocative` | No | Enable memory profiling via the `allocative` crate |
+
+## License
+
+MIT OR Apache-2.0
diff --git a/crates/jolt-field/benches/field_arith.rs b/crates/jolt-field/benches/field_arith.rs
new file mode 100644
index 000000000..181043658
--- /dev/null
+++ b/crates/jolt-field/benches/field_arith.rs
@@ -0,0 +1,59 @@
+#![allow(unused_results)]
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use jolt_field::{Field, Fr};
+use rand_chacha::ChaCha20Rng;
+use rand_core::SeedableRng;
+
+fn bench_field_mul(c: &mut Criterion) {
+    let mut rng = ChaCha20Rng::seed_from_u64(0);
+    let a: Fr = Field::random(&mut rng);
+    let b: Fr = Field::random(&mut rng);
+
+    c.bench_function("Fr * Fr", |bench| {
+        bench.iter(|| black_box(a) * black_box(b));
+    });
+}
+
+fn bench_mul_u64(c: &mut Criterion) {
+    let mut rng = ChaCha20Rng::seed_from_u64(1);
+    let a: Fr = Field::random(&mut rng);
+    let n = 0xDEAD_BEEF_CAFE_BABEu64;
+
+    c.bench_function("Fr::mul_u64", |bench| {
+        bench.iter(|| <Fr as Field>::mul_u64(black_box(&a), black_box(n)));
+    });
+}
+
+fn bench_mul_u128(c: &mut Criterion) {
+    let mut rng = ChaCha20Rng::seed_from_u64(2);
+    let a: Fr = Field::random(&mut rng);
+    let n = 0xDEAD_BEEF_CAFE_BABE_1234_5678_9ABC_DEF0u128;
+
+    c.bench_function("Fr::mul_u128", |bench| {
+        bench.iter(|| <Fr as Field>::mul_u128(black_box(&a), black_box(n)));
+    });
+}
+
+fn bench_to_from_bytes(c: &mut Criterion) {
+    let mut rng = ChaCha20Rng::seed_from_u64(4);
+    let a: Fr = Field::random(&mut rng);
+    let bytes = a.to_bytes();
+
+    c.bench_function("Fr::to_bytes", |bench| {
+        bench.iter(|| black_box(a).to_bytes());
+    });
+
+    c.bench_function("Fr::from_bytes", |bench| {
+        bench.iter(|| <Fr as Field>::from_bytes(black_box(&bytes)));
+    });
+}
+
+criterion_group!(
+    benches,
+    bench_field_mul,
+    bench_mul_u64,
+    bench_mul_u128,
+    bench_to_from_bytes,
+);
+criterion_main!(benches);
diff --git a/crates/jolt-field/fuzz/Cargo.toml b/crates/jolt-field/fuzz/Cargo.toml
new file mode 100644
index 000000000..180097ec9
--- /dev/null
+++ b/crates/jolt-field/fuzz/Cargo.toml
@@ -0,0 +1,35 @@
+[workspace]
+
+[package]
+name = "jolt-field-fuzz"
+version = "0.0.0"
+publish = false
+edition = "2021"
+
+[package.metadata]
+cargo-fuzz = true
+
+[dependencies]
+libfuzzer-sys = "0.4"
+jolt-field = { path = ".." }
+num-traits = "0.2"
+
+[[bin]]
+name = "from_bytes"
+path = "fuzz_targets/from_bytes.rs"
+doc = false
+
+[[bin]]
+name = "field_arith"
+path = "fuzz_targets/field_arith.rs"
+doc = false
+
+[[bin]]
+name = "wide_accumulator_fmadd"
+path = "fuzz_targets/wide_accumulator_fmadd.rs"
+doc = false
+
+[[bin]]
+name = "wide_accumulator_merge"
+path = "fuzz_targets/wide_accumulator_merge.rs"
+doc = false
diff --git a/crates/jolt-field/fuzz/fuzz_targets/field_arith.rs b/crates/jolt-field/fuzz/fuzz_targets/field_arith.rs
new file mode 100644
index 000000000..e8dd4901e
--- /dev/null
+++ b/crates/jolt-field/fuzz/fuzz_targets/field_arith.rs
@@ -0,0 +1,34 @@
+#![no_main]
+use jolt_field::{Field, Fr};
+use libfuzzer_sys::fuzz_target;
+use num_traits::Zero;
+
+fuzz_target!(|data: &[u8]| {
+    if data.len() < 64 {
+        return;
+    }
+    let a = <Fr as Field>::from_bytes(&data[..32]);
+    let b = <Fr as Field>::from_bytes(&data[32..64]);
+
+    // Arithmetic operations must not panic
+    let sum = a + b;
+    let diff = a - b;
+    let prod = a * b;
+    let sq = a * a;
+
+    // (a + b) - b == a
+    assert_eq!(sum - b, a);
+    // (a - b) + b == a
+    assert_eq!(diff + b, a);
+    // a * 0 == 0
+    assert!((a * Fr::zero()).is_zero());
+
+    // inverse must not panic
+    if !a.is_zero() {
+        let inv = a.inverse().expect("nonzero element must have inverse");
+        assert_eq!(a * inv, Fr::from_u64(1));
+    }
+
+    // Prevent optimizing away
+    let _ = (prod, sq);
+});
diff --git a/crates/jolt-field/fuzz/fuzz_targets/from_bytes.rs b/crates/jolt-field/fuzz/fuzz_targets/from_bytes.rs
new file mode 100644
index 000000000..6cafc11ef
--- /dev/null
+++ b/crates/jolt-field/fuzz/fuzz_targets/from_bytes.rs
@@ -0,0 +1,14 @@
+#![no_main]
+use jolt_field::{Field, Fr};
+use libfuzzer_sys::fuzz_target;
+
+fuzz_target!(|data: &[u8]| {
+    // from_bytes should never panic on arbitrary input
+    let a = <Fr as Field>::from_bytes(data);
+
+    // Round-trip: from_bytes → to_bytes → from_bytes must be stable
+    let bytes = a.to_bytes();
+    let b = <Fr as Field>::from_bytes(&bytes);
+    let bytes2 = b.to_bytes();
+    assert_eq!(bytes, bytes2, "from_bytes round-trip is not stable");
+});
diff --git a/crates/jolt-field/fuzz/fuzz_targets/wide_accumulator_fmadd.rs b/crates/jolt-field/fuzz/fuzz_targets/wide_accumulator_fmadd.rs
new file mode 100644
index 000000000..58ca65edc
--- /dev/null
+++ b/crates/jolt-field/fuzz/fuzz_targets/wide_accumulator_fmadd.rs
@@ -0,0 +1,31 @@
+#![no_main]
+use jolt_field::{Field, FieldAccumulator, Fr, WideAccumulator};
+use libfuzzer_sys::fuzz_target;
+use num_traits::Zero;
+
+fuzz_target!(|data: &[u8]| {
+    // Each pair of field elements needs 64 bytes (2 x 32-byte chunks).
+    // Silently skip inputs that don't contain at least one complete pair.
+    if data.len() < 64 {
+        return;
+    }
+
+    let mut acc = WideAccumulator::default();
+    let mut naive_sum = Fr::zero();
+
+    let pairs = data.len() / 64;
+    for i in 0..pairs {
+        let offset = i * 64;
+        let a = <Fr as Field>::from_bytes(&data[offset..offset + 32]);
+        let b = <Fr as Field>::from_bytes(&data[offset + 32..offset + 64]);
+
+        acc.fmadd(a, b);
+        naive_sum += a * b;
+    }
+
+    assert_eq!(
+        acc.reduce(),
+        naive_sum,
+        "WideAccumulator diverged from naive field arithmetic after {pairs} fmadd calls"
+    );
+});
diff --git a/crates/jolt-field/fuzz/fuzz_targets/wide_accumulator_merge.rs b/crates/jolt-field/fuzz/fuzz_targets/wide_accumulator_merge.rs
new file mode 100644
index 000000000..e43b3a123
--- /dev/null
+++ b/crates/jolt-field/fuzz/fuzz_targets/wide_accumulator_merge.rs
@@ -0,0 +1,38 @@
+#![no_main]
+use jolt_field::{Field, FieldAccumulator, Fr, WideAccumulator};
+use libfuzzer_sys::fuzz_target;
+
+fuzz_target!(|data: &[u8]| {
+    // Need at least two pairs (128 bytes) so each half gets at least one.
+    if data.len() < 128 {
+        return;
+    }
+
+    let pairs = data.len() / 64;
+    let split = pairs / 2;
+
+    let mut acc1 = WideAccumulator::default();
+    let mut acc2 = WideAccumulator::default();
+    let mut acc_all = WideAccumulator::default();
+
+    for i in 0..pairs {
+        let offset = i * 64;
+        let a = <Fr as Field>::from_bytes(&data[offset..offset + 32]);
+        let b = <Fr as Field>::from_bytes(&data[offset + 32..offset + 64]);
+
+        if i < split {
+            acc1.fmadd(a, b);
+        } else {
+            acc2.fmadd(a, b);
+        }
+        acc_all.fmadd(a, b);
+    }
+
+    acc1.merge(acc2);
+
+    assert_eq!(
+        acc1.reduce(),
+        acc_all.reduce(),
+        "merge+reduce diverged from single-accumulator reduce ({pairs} pairs, split at {split})"
+    );
+});
diff --git a/crates/jolt-field/fuzz/rust-toolchain.toml b/crates/jolt-field/fuzz/rust-toolchain.toml
new file mode 100644
index 000000000..5d56faf9a
--- /dev/null
+++ b/crates/jolt-field/fuzz/rust-toolchain.toml
@@ -0,0 +1,2 @@
+[toolchain]
+channel = "nightly"
diff --git a/crates/jolt-field/src/accumulator.rs b/crates/jolt-field/src/accumulator.rs
new file mode 100644
index 000000000..56e5e320a
--- /dev/null
+++ b/crates/jolt-field/src/accumulator.rs
@@ -0,0 +1,107 @@
+//! Deferred-reduction accumulator for fused multiply-add.
+//!
+//! In sumcheck inner loops, many products are summed before the final result
+//! is needed. [`FieldAccumulator`] lets implementations defer modular reduction
+//! by accumulating in wider integer types, reducing once at the end. This
+//! amortizes the expensive reduction across hundreds of multiply-add steps.
+//!
+//! - [`NaiveAccumulator`] — fallback using standard field arithmetic.
+//! - `WideAccumulator` (BN254, in `arkworks/`) — 9-limb wide integer accumulator
+//!   that defers Montgomery reduction.
+
+use crate::Field;
+use num_traits::One;
+
+/// Accumulates products with potentially deferred modular reduction.
+///
+/// The hot loop pattern `acc += a * b` repeated hundreds of times per output
+/// slot dominates the CPU prover. Standard field arithmetic reduces mod p
+/// after every multiply and every add. Implementations for specific fields
+/// (e.g., BN254 Fr) can instead accumulate unreduced wide products and
+/// reduce once at the end via [`reduce`](Self::reduce).
+///
+/// # Invariants
+///
+/// - [`fmadd`](Self::fmadd) must be equivalent to `result += a * b` in the field.
+/// - [`merge`](Self::merge) must be equivalent to adding another accumulator's
+///   partial result (used for parallel reduction).
+/// - [`reduce`](Self::reduce) must return the field element equal to the
+///   accumulated sum of products.
+pub trait FieldAccumulator: Default + Copy + Send + Sync {
+    /// The field type this accumulator operates over.
+    type Field: crate::Field;
+
+    /// Fused multiply-add: `self += a * b` without intermediate reduction.
+    fn fmadd(&mut self, a: Self::Field, b: Self::Field);
+
+    /// Fused multiply-add with a `u8` scalar: `self += a * F::from(b)`.
+    ///
+    /// Implementations may override for optimized small-scalar multiplication
+    /// (e.g., 4×1 limb schoolbook instead of 4×4).
+    #[inline]
+    fn fmadd_u8(&mut self, a: Self::Field, b: u8) {
+        self.fmadd(a, Self::Field::from_u8(b));
+    }
+
+    /// Fused multiply-add with a `u64` scalar: `self += a * F::from(b)`.
+    #[inline]
+    fn fmadd_u64(&mut self, a: Self::Field, b: u64) {
+        self.fmadd(a, Self::Field::from_u64(b));
+    }
+
+    /// Fused multiply-add with an `i64` scalar: `self += a * F::from(b)`.
+    #[inline]
+    fn fmadd_i64(&mut self, a: Self::Field, b: i64) {
+        self.fmadd(a, Self::Field::from_i64(b));
+    }
+
+    /// Fused multiply-add with a `bool` scalar: `self += a` when `b` is true.
+    #[inline]
+    fn fmadd_bool(&mut self, a: Self::Field, b: bool) {
+        if b {
+            self.fmadd(a, <Self::Field as One>::one());
+        }
+    }
+
+    /// Merge another accumulator's partial sum into this one.
+    ///
+    /// Used in parallel reduction (e.g., Rayon fold+reduce) where each thread
+    /// accumulates independently, then results are combined.
+    fn merge(&mut self, other: Self);
+
+    /// Finalize: reduce the accumulated value to a field element.
+    fn reduce(self) -> Self::Field;
+}
+
+/// Naive accumulator using standard field arithmetic.
+///
+/// Every [`fmadd`](FieldAccumulator::fmadd) performs a full modular multiply
+/// and add. Used as a fallback for fields without wide-integer optimization.
+#[derive(Clone, Copy)]
+pub struct NaiveAccumulator<F: Field>(F);
+
+impl<F: Field> Default for NaiveAccumulator<F> {
+    #[inline]
+    fn default() -> Self {
+        Self(F::zero())
+    }
+}
+
+impl<F: Field> FieldAccumulator for NaiveAccumulator<F> {
+    type Field = F;
+
+    #[inline]
+    fn fmadd(&mut self, a: F, b: F) {
+        self.0 += a * b;
+    }
+
+    #[inline]
+    fn merge(&mut self, other: Self) {
+        self.0 += other.0;
+    }
+
+    #[inline]
+    fn reduce(self) -> F {
+        self.0
+    }
+}
diff --git a/crates/jolt-field/src/arkworks/bn254.rs b/crates/jolt-field/src/arkworks/bn254.rs
new file mode 100644
index 000000000..32e0d6d96
--- /dev/null
+++ b/crates/jolt-field/src/arkworks/bn254.rs
@@ -0,0 +1,466 @@
+//! Newtype wrapper around `ark_bn254::Fr` that decouples the public API from arkworks.
+//!
+//! [`Fr`] is `#[repr(transparent)]` over the inner arkworks scalar field element,
+//! so it has identical layout and can be transmuted where needed.
+use crate::{Field, Limbs};
+use ark_ff::{prelude::*, PrimeField, UniformRand};
+use rand_core::RngCore;
+
+use super::bn254_ops;
+
+type InnerFr = ark_bn254::Fr;
+
+/// BN254 scalar field element.
+///
+/// A `#[repr(transparent)]` newtype over `ark_bn254::Fr`.
+#[derive(Clone, Copy, Default, PartialEq, Eq, Hash)]
+#[repr(transparent)]
+pub struct Fr(pub(crate) InnerFr);
+
+impl From<ark_bn254::Fr> for Fr {
+    #[inline(always)]
+    fn from(inner: ark_bn254::Fr) -> Self {
+        Fr(inner)
+    }
+}
+
+impl From<Fr> for ark_bn254::Fr {
+    #[inline(always)]
+    fn from(wrapper: Fr) -> Self {
+        wrapper.0
+    }
+}
+
+impl From<bool> for Fr {
+    #[inline(always)]
+    fn from(v: bool) -> Self {
+        <Self as Field>::from_bool(v)
+    }
+}
+
+impl From<u8> for Fr {
+    #[inline(always)]
+    fn from(v: u8) -> Self {
+        <Self as Field>::from_u64(v as u64)
+    }
+}
+
+impl From<u16> for Fr {
+    #[inline(always)]
+    fn from(v: u16) -> Self {
+        <Self as Field>::from_u64(v as u64)
+    }
+}
+
+impl From<u32> for Fr {
+    #[inline(always)]
+    fn from(v: u32) -> Self {
+        <Self as Field>::from_u64(v as u64)
+    }
+}
+
+impl From<u64> for Fr {
+    #[inline(always)]
+    fn from(v: u64) -> Self {
+        <Self as Field>::from_u64(v)
+    }
+}
+
+impl From<i64> for Fr {
+    #[inline(always)]
+    fn from(v: i64) -> Self {
+        <Self as Field>::from_i64(v)
+    }
+}
+
+impl From<i128> for Fr {
+    #[inline(always)]
+    fn from(v: i128) -> Self {
+        <Self as Field>::from_i128(v)
+    }
+}
+
+impl From<u128> for Fr {
+    #[inline(always)]
+    fn from(v: u128) -> Self {
+        <Self as Field>::from_u128(v)
+    }
+}
+
+impl std::fmt::Debug for Fr {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        std::fmt::Debug::fmt(&self.0, f)
+    }
+}
+
+impl std::fmt::Display for Fr {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        std::fmt::Display::fmt(&self.0, f)
+    }
+}
+
+macro_rules! delegate_binop {
+    ($Trait:ident, $method:ident) => {
+        impl std::ops::$Trait for Fr {
+            type Output = Fr;
+            #[inline(always)]
+            fn $method(self, rhs: Fr) -> Fr {
+                Fr(std::ops::$Trait::$method(self.0, rhs.0))
+            }
+        }
+
+        impl std::ops::$Trait<&Fr> for Fr {
+            type Output = Fr;
+            #[inline(always)]
+            fn $method(self, rhs: &Fr) -> Fr {
+                Fr(std::ops::$Trait::$method(self.0, &rhs.0))
+            }
+        }
+
+        impl std::ops::$Trait<Fr> for &Fr {
+            type Output = Fr;
+            #[inline(always)]
+            fn $method(self, rhs: Fr) -> Fr {
+                Fr(std::ops::$Trait::$method(self.0, rhs.0))
+            }
+        }
+
+        impl<'a, 'b> std::ops::$Trait<&'b Fr> for &'a Fr {
+            type Output = Fr;
+            #[inline(always)]
+            fn $method(self, rhs: &'b Fr) -> Fr {
+                Fr(std::ops::$Trait::$method(self.0, &rhs.0))
+            }
+        }
+    };
+}
+
+delegate_binop!(Add, add);
+delegate_binop!(Sub, sub);
+delegate_binop!(Mul, mul);
+delegate_binop!(Div, div);
+
+impl std::ops::Neg for Fr {
+    type Output = Fr;
+    #[inline(always)]
+    fn neg(self) -> Fr {
+        Fr(self.0.neg())
+    }
+}
+
+impl std::ops::AddAssign for Fr {
+    #[inline(always)]
+    fn add_assign(&mut self, rhs: Fr) {
+        self.0.add_assign(rhs.0);
+    }
+}
+
+impl std::ops::SubAssign for Fr {
+    #[inline(always)]
+    fn sub_assign(&mut self, rhs: Fr) {
+        self.0.sub_assign(rhs.0);
+    }
+}
+
+impl std::ops::MulAssign for Fr {
+    #[inline(always)]
+    fn mul_assign(&mut self, rhs: Fr) {
+        self.0.mul_assign(rhs.0);
+    }
+}
+
+impl std::iter::Sum for Fr {
+    fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
+        Fr(iter.map(|f| f.0).sum())
+    }
+}
+
+impl<'a> std::iter::Sum<&'a Fr> for Fr {
+    fn sum<I: Iterator<Item = &'a Fr>>(iter: I) -> Self {
+        Fr(iter.map(|f| f.0).sum())
+    }
+}
+
+impl std::iter::Product for Fr {
+    fn product<I: Iterator<Item = Self>>(iter: I) -> Self {
+        Fr(iter.map(|f| f.0).product())
+    }
+}
+
+impl<'a> std::iter::Product<&'a Fr> for Fr {
+    fn product<I: Iterator<Item = &'a Fr>>(iter: I) -> Self {
+        Fr(iter.map(|f| f.0).product())
+    }
+}
+
+impl num_traits::Zero for Fr {
+    #[inline(always)]
+    fn zero() -> Self {
+        Fr(InnerFr::zero())
+    }
+
+    #[inline(always)]
+    fn is_zero(&self) -> bool {
+        self.0.is_zero()
+    }
+}
+
+impl num_traits::One for Fr {
+    #[inline(always)]
+    fn one() -> Self {
+        Fr(InnerFr::one())
+    }
+
+    #[inline(always)]
+    fn is_one(&self) -> bool {
+        self.0.is_one()
+    }
+}
+
+impl serde::Serialize for Fr {
+    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
+        use ark_serialize::CanonicalSerialize;
+        let mut buf = [0u8; 32];
+        self.0
+            .serialize_compressed(&mut buf[..])
+            .map_err(serde::ser::Error::custom)?;
+        <[u8; 32]>::serialize(&buf, serializer)
+    }
+}
+
+impl<'de> serde::Deserialize<'de> for Fr {
+    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
+        use ark_serialize::CanonicalDeserialize;
+        let buf = <[u8; 32]>::deserialize(deserializer)?;
+        let inner = InnerFr::deserialize_compressed(&buf[..]).map_err(serde::de::Error::custom)?;
+        Ok(Fr(inner))
+    }
+}
+
+impl ark_serialize::CanonicalSerialize for Fr {
+    fn serialize_with_mode<W: ark_serialize::Write>(
+        &self,
+        writer: W,
+        compress: ark_serialize::Compress,
+    ) -> Result<(), ark_serialize::SerializationError> {
+        self.0.serialize_with_mode(writer, compress)
+    }
+
+    fn serialized_size(&self, compress: ark_serialize::Compress) -> usize {
+        self.0.serialized_size(compress)
+    }
+}
+
+impl ark_serialize::Valid for Fr {
+    fn check(&self) -> Result<(), ark_serialize::SerializationError> {
+        self.0.check()
+    }
+}
+
+impl ark_serialize::CanonicalDeserialize for Fr {
+    fn deserialize_with_mode<R: ark_serialize::Read>(
+        reader: R,
+        compress: ark_serialize::Compress,
+        validate: ark_serialize::Validate,
+    ) -> Result<Self, ark_serialize::SerializationError> {
+        InnerFr::deserialize_with_mode(reader, compress, validate).map(Fr)
+    }
+}
+
+impl UniformRand for Fr {
+    fn rand<R: RngCore + ?Sized>(rng: &mut R) -> Self {
+        Fr(<InnerFr as UniformRand>::rand(rng))
+    }
+}
+
+#[cfg(feature = "allocative")]
+impl allocative::Allocative for Fr {
+    fn visit<'a, 'b: 'a>(&self, visitor: &'a mut allocative::Visitor<'b>) {
+        visitor.visit_simple_sized::<Self>();
+    }
+}
+
+impl Fr {
+    /// Deserializes from little-endian bytes, reducing modulo the field prime.
+    #[inline]
+    pub fn from_le_bytes_mod_order(bytes: &[u8]) -> Self {
+        Fr(InnerFr::from_le_bytes_mod_order(bytes))
+    }
+
+    /// Converts a limb array to a field element without checking that it is
+    /// less than the modulus.
+    #[inline]
+    pub fn from_bigint_unchecked(limbs: Limbs<4>) -> Option<Self> {
+        Some(Fr(bn254_ops::from_bigint_unchecked(limbs.to_bigint())))
+    }
+
+    /// Access the internal Montgomery-form limbs.
+    ///
+    /// Used by [`WideAccumulator`](super::wide_accumulator::WideAccumulator)
+    /// for deferred-reduction fused multiply-add.
+    #[inline(always)]
+    pub fn inner_limbs(self) -> Limbs<4> {
+        Limbs((self.0).0 .0)
+    }
+
+    /// Construct from the inner arkworks element.
+    #[inline(always)]
+    pub(crate) fn from_inner(inner: InnerFr) -> Self {
+        Fr(inner)
+    }
+}
+
+impl Field for Fr {
+    type Accumulator = super::wide_accumulator::WideAccumulator;
+
+    const NUM_BYTES: usize = 32;
+
+    fn to_bytes(&self) -> [u8; 32] {
+        use ark_serialize::CanonicalSerialize;
+        let mut buf = [0u8; 32];
+        self.0
+            .serialize_compressed(&mut buf[..])
+            .expect("field serialization should not fail");
+        buf
+    }
+
+    fn random<R: RngCore>(rng: &mut R) -> Self {
+        Fr(<InnerFr as UniformRand>::rand(rng))
+    }
+
+    fn from_bytes(bytes: &[u8]) -> Self {
+        Fr::from_le_bytes_mod_order(bytes)
+    }
+
+    fn to_u64(&self) -> Option<u64> {
+        let bigint = <InnerFr as PrimeField>::into_bigint(self.0);
+        let limbs: &[u64] = bigint.as_ref();
+        let result = limbs[0];
+
+        if <Self as Field>::from_u64(result) != *self {
+            None
+        } else {
+            Some(result)
+        }
+    }
+
+    fn num_bits(&self) -> u32 {
+        <InnerFr as PrimeField>::into_bigint(self.0).num_bits()
+    }
+
+    fn square(&self) -> Self {
+        Fr(<InnerFr as ark_ff::Field>::square(&self.0))
+    }
+
+    fn inverse(&self) -> Option<Self> {
+        <InnerFr as ark_ff::Field>::inverse(&self.0).map(Fr)
+    }
+
+    #[inline]
+    fn from_u64(n: u64) -> Self {
+        Fr(bn254_ops::from_u64(n))
+    }
+
+    #[inline]
+    fn from_i64(val: i64) -> Self {
+        if val.is_negative() {
+            -Fr(bn254_ops::from_u64(val.unsigned_abs()))
+        } else {
+            Fr(bn254_ops::from_u64(val as u64))
+        }
+    }
+
+    #[inline]
+    fn from_i128(val: i128) -> Self {
+        if val.is_negative() {
+            -Fr(bn254_ops::from_u128(val.unsigned_abs()))
+        } else {
+            Fr(bn254_ops::from_u128(val as u128))
+        }
+    }
+
+    #[inline]
+    fn from_u128(val: u128) -> Self {
+        Fr(bn254_ops::from_u128(val))
+    }
+
+    #[inline]
+    fn mul_u64(&self, n: u64) -> Self {
+        Fr(bn254_ops::mul_u64(self.0, n))
+    }
+
+    #[inline(always)]
+    fn mul_i64(&self, n: i64) -> Self {
+        Fr(bn254_ops::mul_i64(self.0, n))
+    }
+
+    #[inline(always)]
+    fn mul_u128(&self, n: u128) -> Self {
+        Fr(bn254_ops::mul_u128(self.0, n))
+    }
+
+    #[inline]
+    fn mul_i128(&self, n: i128) -> Self {
+        Fr(bn254_ops::mul_i128(self.0, n))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::Field;
+
+    #[test]
+    fn field_arithmetic_basic() {
+        let a = Fr::from_u64(7);
+        let b = Fr::from_u64(6);
+        assert_eq!(a * b, Fr::from_u64(42));
+        assert_eq!(a + b, Fr::from_u64(13));
+        assert_eq!(b - a, Fr::from_i64(-1));
+    }
+
+    #[test]
+    fn from_signed() {
+        let neg_one = Fr::from_i64(-1);
+        let one = Fr::one();
+        assert_eq!(neg_one + one, Fr::zero());
+
+        let neg_big = Fr::from_i128(-1_000_000_000_000i128);
+        let pos_big = Fr::from_u128(1_000_000_000_000u128);
+        assert_eq!(neg_big + pos_big, Fr::zero());
+    }
+
+    #[test]
+    fn serialization_roundtrip() {
+        let val = Fr::from_u64(123_456_789);
+        let bytes = val.to_bytes();
+        let recovered = Fr::from_bytes(&bytes);
+        assert_eq!(val, recovered);
+    }
+
+    #[test]
+    fn inverse_and_square() {
+        let a = Fr::from_u64(42);
+        let inv = a.inverse().unwrap();
+        assert_eq!(a * inv, Fr::one());
+        assert!(Fr::zero().inverse().is_none());
+
+        assert_eq!(a.square(), a * a);
+    }
+
+    #[test]
+    fn to_u64_roundtrip() {
+        assert_eq!(Fr::from_u64(999).to_u64(), Some(999));
+        // Large field element should not fit in u64
+        let big = Fr::from_u128(u128::MAX / 2);
+        assert_eq!(big.to_u64(), None);
+    }
+
+    #[test]
+    fn inner_limbs_roundtrip() {
+        let val = Fr::from_u64(42);
+        let limbs = val.inner_limbs();
+        let recovered = Fr::from_bigint_unchecked(limbs).unwrap();
+        assert_eq!(val, recovered);
+    }
+}
diff --git a/crates/jolt-field/src/arkworks/bn254_ops.rs b/crates/jolt-field/src/arkworks/bn254_ops.rs
new file mode 100644
index 000000000..e084bf517
--- /dev/null
+++ b/crates/jolt-field/src/arkworks/bn254_ops.rs
@@ -0,0 +1,773 @@
+//! BN254 Fr field arithmetic operations.
+//!
+//! Low-level field arithmetic (Montgomery/Barrett reduction, scalar multiplication,
+//! precomputed lookup tables).
+use ark_bn254::FrConfig;
+use ark_ff::{BigInt, Fp, MontConfig};
+use num_traits::Zero;
+
+type Fr = ark_bn254::Fr;
+
+/// a + b * c + carry → (result, new carry)
+#[inline(always)]
+fn mac_with_carry(a: u64, b: u64, c: u64, carry: &mut u64) -> u64 {
+    let tmp = (a as u128) + (b as u128) * (c as u128) + (*carry as u128);
+    *carry = (tmp >> 64) as u64;
+    tmp as u64
+}
+
+/// a + b * c → (result, carry)  (no input carry)
+#[inline(always)]
+fn mac_no_carry(a: u64, b: u64, c: u64, carry: &mut u64) -> u64 {
+    let tmp = (a as u128) + (b as u128) * (c as u128);
+    *carry = (tmp >> 64) as u64;
+    tmp as u64
+}
+
+/// *a += b + carry → new carry
+#[inline(always)]
+fn adc(a: &mut u64, b: u64, carry: u64) -> u64 {
+    let tmp = (*a as u128) + (b as u128) + (carry as u128);
+    *a = tmp as u64;
+    (tmp >> 64) as u64
+}
+
+/// *a -= b + borrow → new borrow (1 if underflow)
+#[inline(always)]
+fn sbb(a: &mut u64, b: u64, borrow: u64) -> u64 {
+    let tmp = (1u128 << 64) + (*a as u128) - (b as u128) - (borrow as u128);
+    *a = tmp as u64;
+    u64::from(tmp >> 64 == 0)
+}
+
+const N: usize = 4;
+
+const MODULUS: [u64; N] = <FrConfig as MontConfig<N>>::MODULUS.0;
+const INV: u64 = <FrConfig as MontConfig<N>>::INV;
+const R: BigInt<N> = <FrConfig as MontConfig<N>>::R;
+#[allow(dead_code)]
+const R2: BigInt<N> = <FrConfig as MontConfig<N>>::R2;
+
+const MODULUS_HAS_SPARE_BIT: bool = MODULUS[N - 1] >> 63 == 0;
+const MODULUS_NUM_SPARE_BITS: u32 = MODULUS[N - 1].leading_zeros();
+
+/// 2*p as ([u64; 4], u64) — low N limbs and carry
+const MODULUS_TIMES_2: ([u64; N], u64) = {
+    let mut lo = [0u64; N];
+    let mut carry = 0u64;
+    let mut i = 0;
+    while i < N {
+        let doubled = (MODULUS[i] as u128) * 2 + carry as u128;
+        lo[i] = doubled as u64;
+        carry = (doubled >> 64) as u64;
+        i += 1;
+    }
+    (lo, carry)
+};
+
+/// 3*p as ([u64; 4], u64) — low N limbs and carry
+const MODULUS_TIMES_3: ([u64; N], u64) = {
+    let (m2_lo, m2_hi) = MODULUS_TIMES_2;
+    let mut lo = [0u64; N];
+    let mut carry = 0u64;
+    let mut i = 0;
+    while i < N {
+        let sum = (MODULUS[i] as u128) + (m2_lo[i] as u128) + (carry as u128);
+        lo[i] = sum as u64;
+        carry = (sum >> 64) as u64;
+        i += 1;
+    }
+    (lo, m2_hi + carry)
+};
+
+/// Barrett mu = floor(2^(N*64 + 64 - spare_bits - 1) / MODULUS)
+///
+/// Computed via normalized Knuth long division. The quotient fits in a single u64.
+const BARRETT_MU: u64 = {
+    // Dividend = 2^(319 - spare_bits). For BN254 (spare_bits=2): 2^317
+    // Represented as 5 limbs: [0, 0, 0, 0, 1 << (63 - spare_bits)]
+    let shift = MODULUS_NUM_SPARE_BITS;
+
+    // Normalize divisor: shift left by `shift` so MSB of top limb is set
+    let pn3 = if shift > 0 {
+        (MODULUS[3] << shift) | (MODULUS[2] >> (64 - shift))
+    } else {
+        MODULUS[3]
+    };
+    let pn2 = if shift > 0 {
+        (MODULUS[2] << shift) | (MODULUS[1] >> (64 - shift))
+    } else {
+        MODULUS[2]
+    };
+
+    // Normalized dividend top two limbs: [1 << 63, 0]
+    // (original top limb 1<<(63-shift), shifted left by shift → 1<<63)
+    let dn4 = 1u64 << 63;
+
+    // q_hat = floor((dn4 * 2^64) / pn3)
+    let dividend_top = (dn4 as u128) << 64;
+    let mut q = dividend_top / (pn3 as u128);
+
+    // Knuth refinement: while q * pn2 > remainder * 2^64, decrement q
+    let mut r = dividend_top - q * (pn3 as u128);
+    while r < (1u128 << 64) && q * (pn2 as u128) > (r << 64) {
+        q -= 1;
+        r += pn3 as u128;
+    }
+
+    q as u64
+};
+
+/// 16384-entry lookup table mapping small integers to their Montgomery form.
+const PRECOMP_TABLE_SIZE: usize = 1 << 14;
+
+/// `PRECOMP_TABLE[i]` = Montgomery form of `i` for BN254 Fr.
+///
+/// Uses `Fp::new()` which converts standard form → Montgomery form at compile time.
+#[allow(long_running_const_eval)]
+static PRECOMP_TABLE: [Fr; PRECOMP_TABLE_SIZE] = {
+    let mut table: [Fr; PRECOMP_TABLE_SIZE] =
+        [Fp::new_unchecked(BigInt([0u64; N])); PRECOMP_TABLE_SIZE];
+    let mut i = 1usize;
+    while i < PRECOMP_TABLE_SIZE {
+        let mut limbs = [0u64; N];
+        limbs[0] = i as u64;
+        table[i] = Fp::new(BigInt::new(limbs));
+        i += 1;
+    }
+    table
+};
+
+/// Pack (low_limb, [u64; N]) into BigInt<5>: low_limb at index 0, rest at 1..5
+#[inline(always)]
+fn nplus1_from_low_and_high(low: u64, high: [u64; N]) -> BigInt<5> {
+    let mut limbs = [0u64; 5];
+    limbs[0] = low;
+    limbs[1] = high[0];
+    limbs[2] = high[1];
+    limbs[3] = high[2];
+    limbs[4] = high[3];
+    BigInt(limbs)
+}
+
+/// Pack ([u64; N], high_limb) into BigInt<5>: N limbs then high_limb
+#[inline(always)]
+fn nplus1_from_low_n_and_top(low_n: [u64; N], top: u64) -> BigInt<5> {
+    let mut limbs = [0u64; 5];
+    limbs[0] = low_n[0];
+    limbs[1] = low_n[1];
+    limbs[2] = low_n[2];
+    limbs[3] = low_n[3];
+    limbs[4] = top;
+    BigInt(limbs)
+}
+
+/// Conditional subtraction for Barrett reduction: reduce a 5-limb intermediate
+/// that is known to be < 4p down to < p (4 limbs).
+#[inline(always)]
+fn barrett_cond_subtract(r_tmp: BigInt<5>) -> BigInt<N> {
+    let (m2_lo, _m2_hi) = MODULUS_TIMES_2;
+    let (m3_lo, _m3_hi) = MODULUS_TIMES_3;
+
+    // BN254 has MODULUS_NUM_SPARE_BITS = 2, so 2p and 3p both fit in N limbs.
+    // This means r_tmp.0[4] == 0 for all branches below.
+
+    // Compare with 2p (N-limb compare since spare_bits >= 1)
+    let r_n: [u64; N] = r_tmp.0[0..N].try_into().unwrap();
+
+    if compare_4(r_n, m2_lo) != core::cmp::Ordering::Less {
+        // r_tmp >= 2p
+        if compare_4(r_n, m3_lo) != core::cmp::Ordering::Less {
+            // r_tmp >= 3p → subtract 3p
+            BigInt(sub_4(r_n, m3_lo))
+        } else {
+            // 2p <= r_tmp < 3p → subtract 2p
+            BigInt(sub_4(r_n, m2_lo))
+        }
+    } else if compare_4(r_n, MODULUS) != core::cmp::Ordering::Less {
+        // p <= r_tmp < 2p → subtract p
+        BigInt(sub_4(r_n, MODULUS))
+    } else {
+        // r_tmp < p → no subtraction
+        BigInt(r_n)
+    }
+}
+
+/// Compare two 4-limb numbers (big-endian comparison)
+#[inline(always)]
+fn compare_4(a: [u64; N], b: [u64; N]) -> core::cmp::Ordering {
+    let mut i = N;
+    while i > 0 {
+        i -= 1;
+        if a[i] != b[i] {
+            return if a[i] > b[i] {
+                core::cmp::Ordering::Greater
+            } else {
+                core::cmp::Ordering::Less
+            };
+        }
+    }
+    core::cmp::Ordering::Equal
+}
+
+/// Subtract two 4-limb numbers: a - b. Caller guarantees a >= b.
+#[inline(always)]
+fn sub_4(a: [u64; N], b: [u64; N]) -> [u64; N] {
+    let mut result = a;
+    let mut borrow = 0u64;
+    borrow = sbb(&mut result[0], b[0], borrow);
+    borrow = sbb(&mut result[1], b[1], borrow);
+    borrow = sbb(&mut result[2], b[2], borrow);
+    let _ = sbb(&mut result[3], b[3], borrow);
+    result
+}
+
+/// Barrett reduction kernel: reduce 5 limbs → 4 limbs (mod p).
+///
+/// Input `c` is a BigInt<5>. Computes `c mod p` via one Barrett estimate step.
+#[inline(always)]
+fn barrett_reduce_5_to_4(c: BigInt<5>) -> BigInt<N> {
+    // Compute tilde_c = floor(c / R') where R' = 2^modulus_bits
+    let tilde_c: u64 = if MODULUS_HAS_SPARE_BIT {
+        let high = c.0[N];
+        let second_high = c.0[N - 1];
+        (high << MODULUS_NUM_SPARE_BITS) + (second_high >> (64 - MODULUS_NUM_SPARE_BITS))
+    } else {
+        c.0[N]
+    };
+
+    // Estimate m = floor(tilde_c * mu / 2^64)
+    let m: u64 = ((tilde_c as u128 * BARRETT_MU as u128) >> 64) as u64;
+
+    // Compute m * 2p (result fits in 5 limbs)
+    let (m2p_lo, m2p_hi) = MODULUS_TIMES_2;
+    let mut m2p = nplus1_from_low_n_and_top(m2p_lo, m2p_hi);
+    // Multiply m2p by the scalar m in place
+    mul_bigint5_by_u64_in_place(&mut m2p, m);
+
+    // Compute r_tmp = c - m * 2p
+    let mut r_tmp = c.0;
+    let mut borrow = 0u64;
+    for (r, &m) in r_tmp.iter_mut().zip(m2p.0.iter()) {
+        borrow = sbb(r, m, borrow);
+    }
+    debug_assert!(borrow == 0, "Borrow in Barrett c - m*2p");
+
+    barrett_cond_subtract(BigInt(r_tmp))
+}
+
+/// Multiply a BigInt<5> by a u64 scalar in place.
+#[inline(always)]
+fn mul_bigint5_by_u64_in_place(a: &mut BigInt<5>, b: u64) {
+    let mut carry = 0u64;
+    for limb in &mut a.0 {
+        let prod = (*limb as u128) * (b as u128) + (carry as u128);
+        *limb = prod as u64;
+        carry = (prod >> 64) as u64;
+    }
+    // Overflow is discarded (caller ensures result fits in 5 limbs)
+}
+
+/// Barrett reduce an L-limb BigInt to a field element.
+///
+/// Folds from high limb to low, applying the 5→4 kernel at each step.
+#[inline(always)]
+pub fn from_barrett_reduce<const L: usize>(unreduced: BigInt<L>) -> Fr {
+    debug_assert!(L >= N);
+    let mut acc = BigInt::<N>([0u64; N]);
+    let mut i = L;
+    while i > 0 {
+        i -= 1;
+        let c5 = nplus1_from_low_and_high(unreduced.0[i], acc.0);
+        acc = barrett_reduce_5_to_4(c5);
+    }
+    Fp::new_unchecked(acc)
+}
+
+/// Perform N Montgomery reduction steps on a mutable buffer of L >= 2N limbs.
+/// Returns carry from the final step.
+#[inline(always)]
+#[allow(clippy::needless_range_loop)]
+fn montgomery_reduce_in_place<const L: usize>(limbs: &mut [u64; L]) -> u64 {
+    debug_assert!(L >= 2 * N);
+    let mut carry2 = 0u64;
+    for i in 0..N {
+        let tmp = limbs[i].wrapping_mul(INV);
+        let mut carry = 0u64;
+        // Discard low word: limbs[i] + tmp * MODULUS[0] → carry only
+        let _ = mac_with_carry(limbs[i], tmp, MODULUS[0], &mut carry);
+        for j in 1..N {
+            let k = i + j;
+            limbs[k] = mac_with_carry(limbs[k], tmp, MODULUS[j], &mut carry);
+        }
+        carry2 = adc(&mut limbs[i + N], carry, carry2);
+    }
+    carry2
+}
+
+/// Montgomery reduce an L-limb BigInt (L >= 2N) to a field element.
+///
+/// For L > 2N, first folds the tail (indices N..L) via Barrett, then runs
+/// the standard N-step Montgomery REDC.
+#[inline(always)]
+pub fn from_montgomery_reduce<const L: usize>(unreduced: BigInt<L>) -> Fr {
+    debug_assert!(L >= 2 * N, "montgomery_reduce requires L >= 2N");
+    let mut buf = unreduced.0;
+
+    // If L > 2N, fold excess high limbs down via Barrett
+    if L > 2 * N {
+        let mut acc = BigInt::<N>([0u64; N]);
+        let mut i = L;
+        while i > N {
+            i -= 1;
+            let c5 = nplus1_from_low_and_high(buf[i], acc.0);
+            acc = barrett_reduce_5_to_4(c5);
+        }
+        buf[N..N + N].copy_from_slice(&acc.0);
+        for slot in &mut buf[2 * N..L] {
+            *slot = 0;
+        }
+    }
+
+    let carry = montgomery_reduce_in_place(&mut buf);
+
+    let mut result_limbs = [0u64; N];
+    result_limbs.copy_from_slice(&buf[N..N + N]);
+    let mut result = Fp::new_unchecked(BigInt::<N>(result_limbs));
+
+    // Final conditional subtraction
+    let needs_sub = if MODULUS_HAS_SPARE_BIT {
+        compare_4(result.0 .0, MODULUS) != core::cmp::Ordering::Less
+    } else {
+        carry != 0 || compare_4(result.0 .0, MODULUS) != core::cmp::Ordering::Less
+    };
+    if needs_sub {
+        result.0 = BigInt(sub_4(result.0 .0, MODULUS));
+    }
+    result
+}
+
+/// Multiply BigInt<4> by u64, producing BigInt<5>.
+#[inline(always)]
+fn bigint4_mul_u64(a: &BigInt<N>, b: u64) -> BigInt<5> {
+    let mut res = BigInt::<5>([0u64; 5]);
+    let mut carry = 0u64;
+    for i in 0..N {
+        res.0[i] = mac_with_carry(0, a.0[i], b, &mut carry);
+    }
+    res.0[N] = carry;
+    res
+}
+
+/// Multiply BigInt<4> by u128, producing BigInt<6>.
+#[inline(always)]
+fn bigint4_mul_u128(a: &BigInt<N>, b: u128) -> BigInt<6> {
+    if b == 0 {
+        return BigInt::<6>([0u64; 6]);
+    }
+    let b_lo = b as u64;
+    let b_hi = (b >> 64) as u64;
+
+    let mut res = BigInt::<6>([0u64; 6]);
+
+    // Pass 1: res += a * b_lo
+    let mut carry = 0u64;
+    for i in 0..N {
+        res.0[i] = mac_with_carry(res.0[i], a.0[i], b_lo, &mut carry);
+    }
+    res.0[N] = carry;
+
+    // Pass 2: res[1..] += a * b_hi
+    let mut carry2 = 0u64;
+    for i in 0..N {
+        res.0[i + 1] = mac_with_carry(res.0[i + 1], a.0[i], b_hi, &mut carry2);
+    }
+    res.0[N + 1] = carry2;
+
+    res
+}
+
+/// Barrett reduce BigInt<5> → Fr (N+1 → field element)
+#[inline(always)]
+fn from_unchecked_nplus1(element: BigInt<5>) -> Fr {
+    let r = barrett_reduce_5_to_4(element);
+    Fp::new_unchecked(r)
+}
+
+/// Barrett reduce BigInt<6> → Fr via two rounds
+#[inline(always)]
+fn from_unchecked_nplus2(element: BigInt<6>) -> Fr {
+    // Round 1: reduce top 5 limbs (indices 1..6)
+    let c1 = BigInt::<5>(element.0[1..6].try_into().unwrap());
+    let r1 = barrett_reduce_5_to_4(c1);
+
+    // Round 2: reduce [element[0], r1]
+    let c2 = nplus1_from_low_and_high(element.0[0], r1.0);
+    let r2 = barrett_reduce_5_to_4(c2);
+    Fp::new_unchecked(r2)
+}
+
+/// Multiply a field element by u64.
+#[inline(always)]
+pub fn mul_u64(a: Fr, b: u64) -> Fr {
+    if b == 0 || Zero::is_zero(&a) {
+        return Fr::zero();
+    }
+    if b == 1 {
+        return a;
+    }
+    let prod = bigint4_mul_u64(&a.0, b);
+    from_unchecked_nplus1(prod)
+}
+
+/// Multiply a field element by i64.
+#[inline(always)]
+pub fn mul_i64(a: Fr, b: i64) -> Fr {
+    let abs = b.unsigned_abs();
+    let res = mul_u64(a, abs);
+    if b < 0 {
+        -res
+    } else {
+        res
+    }
+}
+
+/// Multiply a field element by u128.
+#[inline(always)]
+pub fn mul_u128(a: Fr, b: u128) -> Fr {
+    if b >> 64 == 0 {
+        mul_u64(a, b as u64)
+    } else {
+        let prod = bigint4_mul_u128(&a.0, b);
+        from_unchecked_nplus2(prod)
+    }
+}
+
+/// Multiply a field element by i128.
+#[inline(always)]
+pub fn mul_i128(a: Fr, b: i128) -> Fr {
+    if b == 0 || Zero::is_zero(&a) {
+        return Fr::zero();
+    }
+    if b == 1 {
+        return a;
+    }
+    let abs = b.unsigned_abs();
+    let res = if abs <= u64::MAX as u128 {
+        mul_u64(a, abs as u64)
+    } else {
+        let prod = bigint4_mul_u128(&a.0, abs);
+        from_unchecked_nplus2(prod)
+    };
+    if b < 0 {
+        -res
+    } else {
+        res
+    }
+}
+
+/// Convert u64 → Fr using precomp table for small values, mul_u64(R, n) otherwise.
+#[inline(always)]
+pub fn from_u64(n: u64) -> Fr {
+    if (n as usize) < PRECOMP_TABLE_SIZE {
+        PRECOMP_TABLE[n as usize]
+    } else {
+        mul_u64(Fp::new_unchecked(R), n)
+    }
+}
+
+/// Convert u128 → Fr using precomp table for small values, mul_u128(R, n) otherwise.
+#[inline(always)]
+pub fn from_u128(n: u128) -> Fr {
+    if n < PRECOMP_TABLE_SIZE as u128 {
+        PRECOMP_TABLE[n as usize]
+    } else {
+        mul_u128(Fp::new_unchecked(R), n)
+    }
+}
+
+/// Multiply by a sparse RHS with exactly 2 non-zero high limbs at positions N-2 and N-1.
+///
+/// This is used in the Challenge × Field hot path where the challenge value
+/// has only its top 2 limbs set (128-bit challenge stored in high position).
+///
+/// Interleaves multiplication with Montgomery reduction for efficiency.
+#[inline(always)]
+pub fn mul_by_hi_2limbs(a: Fr, limb_lo: u64, limb_hi: u64) -> Fr {
+    let a_limbs = a.0 .0;
+    let mut r = [0u64; N];
+
+    // Process limb at position N-2 (limb_lo), with interleaved Montgomery step
+    {
+        let mut carry1 = 0u64;
+        r[0] = mac_no_carry(r[0], a_limbs[0], limb_lo, &mut carry1);
+        let k = r[0].wrapping_mul(INV);
+        let mut carry2 = 0u64;
+        let _ = mac_no_carry(r[0], k, MODULUS[0], &mut carry2);
+        for j in 1..N {
+            let new_rj = mac_with_carry(r[j], a_limbs[j], limb_lo, &mut carry1);
+            let new_rj_minus_1 = mac_with_carry(new_rj, k, MODULUS[j], &mut carry2);
+            r[j] = new_rj;
+            r[j - 1] = new_rj_minus_1;
+        }
+        r[N - 1] = carry1.wrapping_add(carry2);
+    }
+
+    // Process limb at position N-1 (limb_hi), with interleaved Montgomery step
+    {
+        let mut carry1 = 0u64;
+        r[0] = mac_no_carry(r[0], a_limbs[0], limb_hi, &mut carry1);
+        let k = r[0].wrapping_mul(INV);
+        let mut carry2 = 0u64;
+        let _ = mac_no_carry(r[0], k, MODULUS[0], &mut carry2);
+        for j in 1..N {
+            let new_rj = mac_with_carry(r[j], a_limbs[j], limb_hi, &mut carry1);
+            let new_rj_minus_1 = mac_with_carry(new_rj, k, MODULUS[j], &mut carry2);
+            r[j] = new_rj;
+            r[j - 1] = new_rj_minus_1;
+        }
+        r[N - 1] = carry1.wrapping_add(carry2);
+    }
+
+    let mut out = Fp::new_unchecked(BigInt::<N>(r));
+    if compare_4(out.0 .0, MODULUS) != core::cmp::Ordering::Less {
+        out.0 = BigInt(sub_4(out.0 .0, MODULUS));
+    }
+    out
+}
+
+/// Wrap a raw BigInt<4> as Fr without any reduction (caller guarantees it's valid).
+#[inline(always)]
+pub fn from_bigint_unchecked(r: BigInt<N>) -> Fr {
+    Fp::new_unchecked(r)
+}
+
+/// Multiply `BigInt<N>` by `u64` and accumulate into `BigInt<5>`.
+#[inline(always)]
+pub(crate) fn mul_u64_accumulate(acc: &mut BigInt<5>, a: &BigInt<N>, b: u64) {
+    let mut carry = 0u64;
+    for i in 0..N {
+        acc.0[i] = mac_with_carry(acc.0[i], a.0[i], b, &mut carry);
+    }
+    let final_carry = adc(&mut acc.0[N], carry, 0);
+    debug_assert!(final_carry == 0, "overflow in mul_u64_accumulate");
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use ark_ff::{PrimeField, UniformRand};
+    use ark_std::test_rng;
+    use rand::Rng;
+
+    #[test]
+    fn barrett_mu_sanity() {
+        assert_ne!(BARRETT_MU, 0);
+    }
+
+    #[test]
+    fn modulus_times_2_correct() {
+        let (lo, hi) = MODULUS_TIMES_2;
+        // Verify 2*MODULUS by manual doubling
+        let mut expected = [0u64; N];
+        let mut carry = 0u128;
+        for i in 0..N {
+            let doubled = (MODULUS[i] as u128) * 2 + carry;
+            expected[i] = doubled as u64;
+            carry = doubled >> 64;
+        }
+        assert_eq!(lo, expected);
+        assert_eq!(hi, carry as u64);
+    }
+
+    #[test]
+    fn modulus_times_3_correct() {
+        let (lo, hi) = MODULUS_TIMES_3;
+        // Verify 3*MODULUS by tripling
+        let mut expected = [0u64; N];
+        let mut carry = 0u128;
+        for i in 0..N {
+            let tripled = (MODULUS[i] as u128) * 3 + carry;
+            expected[i] = tripled as u64;
+            carry = tripled >> 64;
+        }
+        assert_eq!(lo, expected);
+        assert_eq!(hi, carry as u64);
+    }
+
+    #[test]
+    fn precomp_table_spot_check() {
+        // PRECOMP_TABLE[i] should equal Montgomery form of i
+        assert_eq!(PRECOMP_TABLE[0], Fr::from(0u64));
+        assert_eq!(PRECOMP_TABLE[1], Fr::from(1u64));
+        assert_eq!(PRECOMP_TABLE[42], Fr::from(42u64));
+        assert_eq!(PRECOMP_TABLE[16383], Fr::from(16383u64));
+    }
+
+    #[test]
+    fn from_u64_matches() {
+        let mut rng = test_rng();
+        for _ in 0..200 {
+            let val: u64 = rng.gen();
+            let expected = Fr::from(val);
+            let got = from_u64(val);
+            assert_eq!(got, expected, "from_u64 mismatch for {}", val);
+        }
+        assert_eq!(from_u64(0), Fr::from(0u64));
+        assert_eq!(from_u64(1), Fr::from(1u64));
+        assert_eq!(from_u64(u64::MAX), Fr::from(u64::MAX));
+    }
+
+    #[test]
+    fn from_u128_matches() {
+        let mut rng = test_rng();
+        for _ in 0..200 {
+            let val: u128 = ((rng.gen::<u64>() as u128) << 64) | (rng.gen::<u64>() as u128);
+            let expected = {
+                let bigint = BigInt::new([val as u64, (val >> 64) as u64, 0, 0]);
+                Fr::from_bigint(bigint).unwrap()
+            };
+            let got = from_u128(val);
+            assert_eq!(got, expected, "from_u128 mismatch for {}", val);
+        }
+    }
+
+    #[test]
+    fn mul_u64_correct() {
+        let mut rng = test_rng();
+        for _ in 0..200 {
+            let a = Fr::rand(&mut rng);
+            let b: u64 = rng.gen();
+            let expected = a * Fr::from(b);
+            let got = mul_u64(a, b);
+            assert_eq!(got, expected, "mul_u64 mismatch: b={}", b);
+        }
+        // Edge cases
+        let a = Fr::rand(&mut rng);
+        assert_eq!(mul_u64(a, 0), Fr::zero());
+        assert_eq!(mul_u64(a, 1), a);
+    }
+
+    #[test]
+    fn mul_i64_correct() {
+        let mut rng = test_rng();
+        for _ in 0..200 {
+            let a = Fr::rand(&mut rng);
+            let b: i64 = rng.gen();
+            let expected = if b >= 0 {
+                a * Fr::from(b as u64)
+            } else {
+                -(a * Fr::from((-b) as u64))
+            };
+            let got = mul_i64(a, b);
+            assert_eq!(got, expected, "mul_i64 mismatch: b={}", b);
+        }
+    }
+
+    #[test]
+    fn mul_u128_correct() {
+        let mut rng = test_rng();
+        for _ in 0..200 {
+            let a = Fr::rand(&mut rng);
+            let b: u128 = ((rng.gen::<u64>() as u128) << 64) | (rng.gen::<u64>() as u128);
+            let b_fr = {
+                let bigint = BigInt::new([b as u64, (b >> 64) as u64, 0, 0]);
+                Fr::from_bigint(bigint).unwrap()
+            };
+            let expected = a * b_fr;
+            let got = mul_u128(a, b);
+            assert_eq!(got, expected, "mul_u128 mismatch");
+        }
+    }
+
+    #[test]
+    fn mul_i128_correct() {
+        let mut rng = test_rng();
+        for _ in 0..200 {
+            let a = Fr::rand(&mut rng);
+            let b: i128 = rng.gen();
+            let abs_b = b.unsigned_abs();
+            let b_fr = {
+                let bigint = BigInt::new([abs_b as u64, (abs_b >> 64) as u64, 0, 0]);
+                Fr::from_bigint(bigint).unwrap()
+            };
+            let expected = if b >= 0 { a * b_fr } else { -(a * b_fr) };
+            let got = mul_i128(a, b);
+            assert_eq!(got, expected, "mul_i128 mismatch");
+        }
+    }
+
+    #[test]
+    fn barrett_reduce_correct() {
+        let mut rng = test_rng();
+        // Barrett reduce of a product a*b should equal a*b in the field
+        for _ in 0..200 {
+            let a = Fr::rand(&mut rng);
+            let b = Fr::rand(&mut rng);
+            // Compute unreduced product in 8 limbs
+            let a_bigint = a.into_bigint();
+            let b_bigint = b.into_bigint();
+            let mut prod = BigInt::<8>::zero();
+            for i in 0..N {
+                let mut carry = 0u64;
+                for j in 0..N {
+                    prod.0[i + j] =
+                        mac_with_carry(prod.0[i + j], a_bigint.0[i], b_bigint.0[j], &mut carry);
+                }
+                prod.0[i + N] = carry;
+            }
+            // Barrett reduce should give the same result as Montgomery reduce
+            // (both map from standard 8-limb → 4-limb Montgomery)
+            let reduced = from_barrett_reduce::<8>(prod);
+            // Verify it's a valid field element by roundtripping
+            let _ = reduced.into_bigint();
+        }
+        // Barrett reduce of zero should give zero
+        assert_eq!(from_barrett_reduce::<5>(BigInt::<5>::zero()), Fr::zero());
+    }
+
+    #[test]
+    fn montgomery_reduce_roundtrip() {
+        let mut rng = test_rng();
+        // Multiply the raw Montgomery-form BigInts: a_mont * b_mont = (aR)(bR).
+        // Montgomery reduce divides by R → abR = Montgomery form of a*b.
+        for _ in 0..200 {
+            let a = Fr::rand(&mut rng);
+            let b = Fr::rand(&mut rng);
+            let expected = a * b;
+
+            // Access internal Montgomery representation directly
+            let a_mont = (a.0).0;
+            let b_mont = (b.0).0;
+            let mut prod = BigInt::<8>::zero();
+            for (i, &ai) in a_mont.iter().enumerate() {
+                let mut carry = 0u64;
+                for (j, &bj) in b_mont.iter().enumerate() {
+                    prod.0[i + j] = mac_with_carry(prod.0[i + j], ai, bj, &mut carry);
+                }
+                prod.0[i + N] = carry;
+            }
+            let got = from_montgomery_reduce::<8>(prod);
+            assert_eq!(got, expected, "Montgomery reduce roundtrip mismatch");
+        }
+    }
+
+    #[test]
+    fn mul_by_hi_2limbs_correct() {
+        let mut rng = test_rng();
+        for _ in 0..200 {
+            let a = Fr::rand(&mut rng);
+            let lo: u64 = rng.gen();
+            let hi: u64 = rng.gen();
+            // mul_by_hi_2limbs treats [0, 0, lo, hi] as a raw Montgomery-form scalar
+            let scalar = Fp::new_unchecked(BigInt::new([0, 0, lo, hi]));
+            let expected = a * scalar;
+            let got = mul_by_hi_2limbs(a, lo, hi);
+            assert_eq!(
+                got, expected,
+                "mul_by_hi_2limbs mismatch: lo={}, hi={}",
+                lo, hi
+            );
+        }
+    }
+}
diff --git a/crates/jolt-field/src/arkworks/mod.rs b/crates/jolt-field/src/arkworks/mod.rs
new file mode 100644
index 000000000..e1568ce77
--- /dev/null
+++ b/crates/jolt-field/src/arkworks/mod.rs
@@ -0,0 +1,10 @@
+//! Arkworks-backed field implementations.
+//!
+//! Provides the BN254 scalar field (`Fr`) and its low-level arithmetic
+//! (Montgomery/Barrett reduction, precomputed lookup tables, sparse multiplication).
+
+pub mod bn254;
+#[allow(dead_code)]
+pub(crate) mod bn254_ops;
+pub mod montgomery_impl;
+pub mod wide_accumulator;
diff --git a/crates/jolt-field/src/arkworks/montgomery_impl.rs b/crates/jolt-field/src/arkworks/montgomery_impl.rs
new file mode 100644
index 000000000..3945fc381
--- /dev/null
+++ b/crates/jolt-field/src/arkworks/montgomery_impl.rs
@@ -0,0 +1,129 @@
+use ark_bn254::FrConfig;
+use ark_ff::MontConfig;
+
+use crate::{Fr, MontgomeryConstants};
+
+// The u32 limbs are derived from arkworks' FrConfig u64 limbs by splitting
+// each u64 into (lo, hi) u32 pairs. This matches the little-endian byte layout
+// on ARM64 — the same bytes that represent [u64; 4] on CPU are read as [u32; 8]
+// by the Metal shader.
+
+const MODULUS: [u64; 4] = <FrConfig as MontConfig<4>>::MODULUS.0;
+const R: [u64; 4] = <FrConfig as MontConfig<4>>::R.0;
+const R2: [u64; 4] = <FrConfig as MontConfig<4>>::R2.0;
+const INV64: u64 = <FrConfig as MontConfig<4>>::INV;
+
+const fn u64s_to_u32s(limbs: &[u64; 4]) -> [u32; 8] {
+    [
+        limbs[0] as u32,
+        (limbs[0] >> 32) as u32,
+        limbs[1] as u32,
+        (limbs[1] >> 32) as u32,
+        limbs[2] as u32,
+        (limbs[2] >> 32) as u32,
+        limbs[3] as u32,
+        (limbs[3] >> 32) as u32,
+    ]
+}
+
+static MODULUS_U32: [u32; 8] = u64s_to_u32s(&MODULUS);
+static R2_U32: [u32; 8] = u64s_to_u32s(&R2);
+static ONE_U32: [u32; 8] = u64s_to_u32s(&R);
+
+/// `-r^{-1} mod 2^{32}` derived from the arkworks 64-bit `INV` value.
+/// arkworks stores `-r^{-1} mod 2^{64}`; the low 32 bits give `mod 2^{32}`.
+const INV32: u32 = INV64 as u32;
+
+impl MontgomeryConstants for Fr {
+    const NUM_U32_LIMBS: usize = 8;
+    const ACC_U32_LIMBS: usize = 18; // 2*8 + 2
+    const FIELD_BYTE_SIZE: usize = 32; // 8 * 4
+
+    fn modulus_u32() -> &'static [u32] {
+        &MODULUS_U32
+    }
+
+    fn inv32() -> u32 {
+        INV32
+    }
+
+    fn r2_u32() -> &'static [u32] {
+        &R2_U32
+    }
+
+    fn one_u32() -> &'static [u32] {
+        &ONE_U32
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn bn254_modulus_matches_shader() {
+        // These are the constants from the original bn254_fr.metal shader.
+        let expected: [u32; 8] = [
+            0xf000_0001,
+            0x43e1_f593,
+            0x79b9_7091,
+            0x2833_e848,
+            0x8181_585d,
+            0xb850_45b6,
+            0xe131_a029,
+            0x3064_4e72,
+        ];
+        assert_eq!(MODULUS_U32, expected);
+    }
+
+    #[test]
+    fn bn254_inv32_matches_shader() {
+        assert_eq!(INV32, 0xefff_ffff);
+    }
+
+    #[test]
+    fn bn254_r2_matches_shader() {
+        let expected: [u32; 8] = [
+            0xae21_6da7,
+            0x1bb8_e645,
+            0xe35c_59e3,
+            0x53fe_3ab1,
+            0x53bb_8085,
+            0x8c49_833d,
+            0x7f4e_44a5,
+            0x0216_d0b1,
+        ];
+        assert_eq!(R2_U32, expected);
+    }
+
+    #[test]
+    fn bn254_one_matches_shader() {
+        let expected: [u32; 8] = [
+            0x4fff_fffb,
+            0xac96_341c,
+            0x9f60_cd29,
+            0x36fc_7695,
+            0x7879_462e,
+            0x666e_a36f,
+            0x9a07_df2f,
+            0x0e0a_77c1,
+        ];
+        assert_eq!(ONE_U32, expected);
+    }
+
+    #[test]
+    fn acc_limbs_invariant() {
+        assert_eq!(
+            <Fr as MontgomeryConstants>::ACC_U32_LIMBS,
+            2 * <Fr as MontgomeryConstants>::NUM_U32_LIMBS + 2
+        );
+    }
+
+    #[test]
+    fn field_byte_size_invariant() {
+        assert_eq!(
+            <Fr as MontgomeryConstants>::FIELD_BYTE_SIZE,
+            <Fr as MontgomeryConstants>::NUM_U32_LIMBS * 4
+        );
+    }
+}
diff --git a/crates/jolt-field/src/arkworks/wide_accumulator.rs b/crates/jolt-field/src/arkworks/wide_accumulator.rs
new file mode 100644
index 000000000..befa677ad
--- /dev/null
+++ b/crates/jolt-field/src/arkworks/wide_accumulator.rs
@@ -0,0 +1,113 @@
+//! Wide-integer accumulator for BN254 Fr deferred reduction.
+//!
+//! Accumulates `sum += a * b` as 9-limb (576-bit) schoolbook products,
+//! deferring the Montgomery reduction to a single call at the end.
+//!
+//! # Capacity
+//!
+//! Each Fr element is 4 limbs (256 bits). The unreduced product of two
+//! elements is 8 limbs (512 bits). A 9-limb accumulator (576 bits) can
+//! hold up to 2^64 such products without overflow.
+
+use crate::accumulator::FieldAccumulator;
+use crate::arkworks::bn254::Fr;
+use crate::Limbs;
+
+use super::bn254_ops;
+
+/// Wide 9-limb accumulator for BN254 Fr deferred reduction.
+///
+/// Stores the running sum of Montgomery-form products as a 576-bit integer.
+/// Converting to a field element requires a single Montgomery reduction
+/// via [`reduce`](FieldAccumulator::reduce).
+#[derive(Clone, Copy)]
+pub struct WideAccumulator {
+    /// 9 limbs = 2×4 (product width) + 1 (addition headroom).
+    limbs: Limbs<9>,
+}
+
+impl Default for WideAccumulator {
+    #[inline]
+    fn default() -> Self {
+        Self {
+            limbs: Limbs::zero(),
+        }
+    }
+}
+
+impl FieldAccumulator for WideAccumulator {
+    type Field = Fr;
+
+    #[inline(always)]
+    fn fmadd(&mut self, a: Fr, b: Fr) {
+        self.limbs.fmadd::<4, 4>(&a.inner_limbs(), &b.inner_limbs());
+    }
+
+    #[inline(always)]
+    fn merge(&mut self, other: Self) {
+        self.limbs.add_assign_trunc::<9>(&other.limbs);
+    }
+
+    fn reduce(self) -> Fr {
+        // The accumulator holds sum_i (a_i_mont × b_i_mont).
+        // Montgomery reduction divides by R, yielding the Montgomery form
+        // of sum_i (a_i × b_i).
+        let bigint = self.limbs.to_bigint();
+        Fr::from_inner(bn254_ops::from_montgomery_reduce(bigint))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::Field;
+
+    #[test]
+    fn single_fmadd() {
+        let a = Fr::from_u64(7);
+        let b = Fr::from_u64(6);
+        let mut acc = WideAccumulator::default();
+        acc.fmadd(a, b);
+        assert_eq!(acc.reduce(), Fr::from_u64(42));
+    }
+
+    #[test]
+    fn multiple_fmadd() {
+        let mut acc = WideAccumulator::default();
+        acc.fmadd(Fr::from_u64(3), Fr::from_u64(4));
+        acc.fmadd(Fr::from_u64(5), Fr::from_u64(6));
+        // 3*4 + 5*6 = 12 + 30 = 42
+        assert_eq!(acc.reduce(), Fr::from_u64(42));
+    }
+
+    #[test]
+    fn merge_two_accumulators() {
+        let mut acc1 = WideAccumulator::default();
+        acc1.fmadd(Fr::from_u64(10), Fr::from_u64(10));
+
+        let mut acc2 = WideAccumulator::default();
+        acc2.fmadd(Fr::from_u64(20), Fr::from_u64(20));
+
+        acc1.merge(acc2);
+        // 10*10 + 20*20 = 100 + 400 = 500
+        assert_eq!(acc1.reduce(), Fr::from_u64(500));
+    }
+
+    #[test]
+    fn empty_reduces_to_zero() {
+        let acc = WideAccumulator::default();
+        assert_eq!(acc.reduce(), Fr::from_u64(0));
+    }
+
+    #[test]
+    fn large_accumulation() {
+        let mut acc = WideAccumulator::default();
+        let n = 10_000u64;
+        let a = Fr::from_u64(1);
+        let b = Fr::from_u64(1);
+        for _ in 0..n {
+            acc.fmadd(a, b);
+        }
+        assert_eq!(acc.reduce(), Fr::from_u64(n));
+    }
+}
diff --git a/crates/jolt-field/src/dory_interop.rs b/crates/jolt-field/src/dory_interop.rs
new file mode 100644
index 000000000..371e08ec2
--- /dev/null
+++ b/crates/jolt-field/src/dory_interop.rs
@@ -0,0 +1,172 @@
+use crate::arkworks::bn254::Fr;
+use crate::Field;
+
+use ark_serialize::CanonicalDeserialize;
+use ark_serialize::CanonicalSerialize;
+use dory::primitives::arithmetic;
+use dory::primitives::serialization::{
+    Compress, DoryDeserialize, DorySerialize, SerializationError, Valid, Validate,
+};
+use rand_core::OsRng;
+use std::io::{Read, Write};
+
+type InnerFr = ark_bn254::Fr;
+
+#[inline(always)]
+fn to_ark_compress(c: Compress) -> ark_serialize::Compress {
+    match c {
+        Compress::Yes => ark_serialize::Compress::Yes,
+        Compress::No => ark_serialize::Compress::No,
+    }
+}
+
+#[inline(always)]
+fn to_ark_validate(v: Validate) -> ark_serialize::Validate {
+    match v {
+        Validate::Yes => ark_serialize::Validate::Yes,
+        Validate::No => ark_serialize::Validate::No,
+    }
+}
+
+fn ark_err_to_dory(e: ark_serialize::SerializationError) -> SerializationError {
+    match e {
+        ark_serialize::SerializationError::IoError(io) => SerializationError::IoError(io),
+        ark_serialize::SerializationError::InvalidData => {
+            SerializationError::InvalidData("arkworks: invalid data".into())
+        }
+        ark_serialize::SerializationError::UnexpectedFlags => SerializationError::UnexpectedData,
+        ark_serialize::SerializationError::NotEnoughSpace => {
+            SerializationError::InvalidData("arkworks: not enough space".into())
+        }
+    }
+}
+
+impl Valid for Fr {
+    fn check(&self) -> Result<(), SerializationError> {
+        ark_serialize::Valid::check(&self.0).map_err(ark_err_to_dory)
+    }
+}
+
+impl DorySerialize for Fr {
+    fn serialize_with_mode<W: Write>(
+        &self,
+        writer: W,
+        compress: Compress,
+    ) -> Result<(), SerializationError> {
+        self.0
+            .serialize_with_mode(writer, to_ark_compress(compress))
+            .map_err(ark_err_to_dory)
+    }
+
+    fn serialized_size(&self, compress: Compress) -> usize {
+        self.0.serialized_size(to_ark_compress(compress))
+    }
+}
+
+impl DoryDeserialize for Fr {
+    fn deserialize_with_mode<R: Read>(
+        reader: R,
+        compress: Compress,
+        validate: Validate,
+    ) -> Result<Self, SerializationError> {
+        InnerFr::deserialize_with_mode(reader, to_ark_compress(compress), to_ark_validate(validate))
+            .map(Fr)
+            .map_err(ark_err_to_dory)
+    }
+}
+
+impl arithmetic::Field for Fr {
+    #[inline(always)]
+    fn zero() -> Self {
+        Field::from_u64(0)
+    }
+
+    #[inline(always)]
+    fn one() -> Self {
+        Field::from_u64(1)
+    }
+
+    #[inline(always)]
+    fn is_zero(&self) -> bool {
+        *self == <Self as arithmetic::Field>::zero()
+    }
+
+    #[inline(always)]
+    fn add(&self, rhs: &Self) -> Self {
+        *self + *rhs
+    }
+
+    #[inline(always)]
+    fn sub(&self, rhs: &Self) -> Self {
+        *self - *rhs
+    }
+
+    #[inline(always)]
+    fn mul(&self, rhs: &Self) -> Self {
+        *self * *rhs
+    }
+
+    #[inline(always)]
+    fn inv(self) -> Option<Self> {
+        Field::inverse(&self)
+    }
+
+    #[inline]
+    fn random() -> Self {
+        Field::random(&mut OsRng)
+    }
+
+    #[inline(always)]
+    fn from_u64(val: u64) -> Self {
+        Field::from_u64(val)
+    }
+
+    #[inline(always)]
+    fn from_i64(val: i64) -> Self {
+        Field::from_i64(val)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use dory::primitives::arithmetic::Field as DoryField;
+
+    #[test]
+    fn field_zero_one() {
+        let zero = <Fr as DoryField>::zero();
+        let one = <Fr as DoryField>::one();
+        assert!(DoryField::is_zero(&zero));
+        assert!(!DoryField::is_zero(&one));
+        assert_eq!(DoryField::add(&zero, &one), one);
+        assert_eq!(DoryField::mul(&zero, &one), zero);
+    }
+
+    #[test]
+    fn field_add_sub_mul() {
+        let a = <Fr as DoryField>::from_u64(7);
+        let b = <Fr as DoryField>::from_u64(11);
+        assert_eq!(DoryField::add(&a, &b), <Fr as DoryField>::from_u64(18));
+        assert_eq!(DoryField::sub(&b, &a), <Fr as DoryField>::from_u64(4));
+        assert_eq!(DoryField::mul(&a, &b), <Fr as DoryField>::from_u64(77));
+    }
+
+    #[test]
+    fn field_inv() {
+        let a = <Fr as DoryField>::from_u64(42);
+        let inv_a = DoryField::inv(a).expect("nonzero element must have inverse");
+        assert_eq!(DoryField::mul(&a, &inv_a), <Fr as DoryField>::one());
+
+        let zero = <Fr as DoryField>::zero();
+        assert!(DoryField::inv(zero).is_none());
+    }
+
+    #[test]
+    fn serialization_roundtrip() {
+        let val = <Fr as DoryField>::from_u64(123_456_789);
+        let mut buf = Vec::new();
+        DorySerialize::serialize_compressed(&val, &mut buf).unwrap();
+        let recovered: Fr = DoryDeserialize::deserialize_compressed(&buf[..]).unwrap();
+        assert_eq!(val, recovered);
+    }
+}
diff --git a/crates/jolt-field/src/field.rs b/crates/jolt-field/src/field.rs
new file mode 100644
index 000000000..afd885430
--- /dev/null
+++ b/crates/jolt-field/src/field.rs
@@ -0,0 +1,184 @@
+#[cfg(feature = "allocative")]
+use allocative::Allocative;
+use num_traits::{One, Zero};
+use rand_core::RngCore;
+use serde::{Deserialize, Serialize};
+use std::fmt::{Debug, Display};
+use std::hash::Hash;
+use std::ops::{Add, AddAssign, Div, Mul, MulAssign, Neg, Sub, SubAssign};
+
+/// Prime field element abstraction used throughout Jolt.
+///
+/// This trait provides a backend-agnostic interface over a prime-order scalar
+/// field.
+///
+/// All arithmetic is modular over the field's prime order. Elements are `Copy`,
+/// thread-safe, and cheaply serializable. Negative integers are mapped via
+/// their canonical representative modulo `p`.
+pub trait Field:
+    'static
+    + Sized
+    + Zero
+    + One
+    + Neg<Output = Self>
+    + Add<Output = Self>
+    + for<'a> Add<&'a Self, Output = Self>
+    + Sub<Output = Self>
+    + for<'a> Sub<&'a Self, Output = Self>
+    + Mul<Output = Self>
+    + for<'a> Mul<&'a Self, Output = Self>
+    + Div<Output = Self>
+    + for<'a> Div<&'a Self, Output = Self>
+    + AddAssign<Self>
+    + SubAssign<Self>
+    + MulAssign<Self>
+    + core::iter::Sum<Self>
+    + for<'a> core::iter::Sum<&'a Self>
+    + core::iter::Product<Self>
+    + for<'a> core::iter::Product<&'a Self>
+    + Eq
+    + Copy
+    + Sync
+    + Send
+    + Display
+    + Debug
+    + Default
+    + Hash
+    + Serialize
+    + for<'de> Deserialize<'de>
+    + MaybeAllocative
+{
+    /// Accumulator for deferred-reduction fused multiply-add.
+    ///
+    /// For BN254 Fr, this is a wide 9-limb integer that defers Montgomery
+    /// reduction. For other fields, use [`NaiveAccumulator`](crate::NaiveAccumulator).
+    type Accumulator: crate::FieldAccumulator<Field = Self>;
+
+    /// Byte length of a canonical (compressed) serialized element.
+    const NUM_BYTES: usize;
+
+    /// Serializes to compressed canonical form (little-endian, 32 bytes).
+    fn to_bytes(&self) -> [u8; 32];
+
+    /// Samples a uniformly random field element.
+    fn random<R: RngCore>(rng: &mut R) -> Self;
+    /// Deserializes from little-endian bytes, reducing modulo the field prime.
+    fn from_bytes(bytes: &[u8]) -> Self;
+    /// Returns the value as `u64` if it fits, or `None` if >= 2^64.
+    fn to_u64(&self) -> Option<u64>;
+    /// Number of significant bits in the canonical representation.
+    fn num_bits(&self) -> u32;
+    /// Returns `self * self`.
+    fn square(&self) -> Self;
+    /// Multiplicative inverse, or `None` for the zero element.
+    fn inverse(&self) -> Option<Self>;
+
+    fn from_bool(val: bool) -> Self {
+        if val {
+            Self::one()
+        } else {
+            Self::zero()
+        }
+    }
+    fn from_u8(n: u8) -> Self {
+        Self::from_u64(n as u64)
+    }
+    fn from_u16(n: u16) -> Self {
+        Self::from_u64(n as u64)
+    }
+    fn from_u32(n: u32) -> Self {
+        Self::from_u64(n as u64)
+    }
+    fn from_u64(n: u64) -> Self;
+    /// Maps a signed integer to its canonical field representative: negative
+    /// values become `p - |val|`.
+    fn from_i64(val: i64) -> Self;
+    /// Maps a signed integer to its canonical field representative: negative
+    /// values become `p - |val|`.
+    fn from_i128(val: i128) -> Self;
+    fn from_u128(val: u128) -> Self;
+
+    fn mul_u64(&self, n: u64) -> Self {
+        *self * Self::from_u64(n)
+    }
+
+    fn mul_i64(&self, n: i64) -> Self {
+        *self * Self::from_i64(n)
+    }
+
+    fn mul_u128(&self, n: u128) -> Self {
+        *self * Self::from_u128(n)
+    }
+
+    fn mul_i128(&self, n: i128) -> Self {
+        *self * Self::from_i128(n)
+    }
+
+    /// Multiplication of a field element and a power of 2.
+    /// Split into chunks of 63 bits, then multiply and accumulate.
+    fn mul_pow_2(&self, mut pow: usize) -> Self {
+        assert!(pow <= 255, "pow > 255");
+        let mut res = *self;
+        while pow >= 64 {
+            res = res.mul_u64(1 << 63);
+            pow -= 63;
+        }
+        res.mul_u64(1 << pow)
+    }
+}
+
+#[cfg(feature = "allocative")]
+pub trait MaybeAllocative: Allocative {}
+#[cfg(feature = "allocative")]
+impl<T: Allocative> MaybeAllocative for T {}
+#[cfg(not(feature = "allocative"))]
+pub trait MaybeAllocative {}
+#[cfg(not(feature = "allocative"))]
+impl<T> MaybeAllocative for T {}
+
+/// Multiplication with fast-path short-circuits for zero and one.
+///
+/// In sumcheck hot loops many evaluations multiply by 0 or 1.
+/// These methods avoid the full Montgomery multiplication in those cases.
+pub trait OptimizedMul<Rhs, Output>: Sized + Mul<Rhs, Output = Output> {
+    /// Returns `zero()` immediately if either operand is zero.
+    fn mul_0_optimized(self, other: Rhs) -> Self::Output;
+    /// Returns the other operand immediately if either is one.
+    fn mul_1_optimized(self, other: Rhs) -> Self::Output;
+    /// Combined: short-circuits on both zero and one.
+    fn mul_01_optimized(self, other: Rhs) -> Self::Output;
+}
+
+impl<F> OptimizedMul<F, F> for F
+where
+    F: Field,
+{
+    #[inline(always)]
+    fn mul_0_optimized(self, other: F) -> F {
+        if self.is_zero() || other.is_zero() {
+            Self::zero()
+        } else {
+            self * other
+        }
+    }
+
+    #[inline(always)]
+    fn mul_1_optimized(self, other: F) -> F {
+        if self.is_one() {
+            other
+        } else if other.is_one() {
+            self
+        } else {
+            self * other
+        }
+    }
+
+    #[inline(always)]
+    fn mul_01_optimized(self, other: F) -> F {
+        if self.is_zero() || other.is_zero() {
+            Self::zero()
+        } else {
+            self.mul_1_optimized(other)
+        }
+    }
+}
diff --git a/crates/jolt-field/src/lib.rs b/crates/jolt-field/src/lib.rs
new file mode 100644
index 000000000..e7196b2bd
--- /dev/null
+++ b/crates/jolt-field/src/lib.rs
@@ -0,0 +1,26 @@
+//! Field abstractions for the Jolt zkVM
+//!
+//! This crate provides the core field trait (`Field`) and associated types
+//! used throughout the Jolt zkVM ecosystem.
+
+mod field;
+pub use field::{Field, MaybeAllocative, OptimizedMul};
+mod accumulator;
+pub use accumulator::{FieldAccumulator, NaiveAccumulator};
+mod montgomery_constants;
+pub use montgomery_constants::MontgomeryConstants;
+
+pub mod limbs;
+pub use limbs::Limbs;
+
+pub mod signed;
+
+#[cfg(feature = "bn254")]
+pub mod arkworks;
+#[cfg(feature = "bn254")]
+pub use arkworks::bn254::Fr;
+#[cfg(feature = "bn254")]
+pub use arkworks::wide_accumulator::WideAccumulator;
+
+#[cfg(feature = "dory-pcs")]
+mod dory_interop;
diff --git a/crates/jolt-field/src/limbs.rs b/crates/jolt-field/src/limbs.rs
new file mode 100644
index 000000000..755796c2b
--- /dev/null
+++ b/crates/jolt-field/src/limbs.rs
@@ -0,0 +1,537 @@
+//! Fixed-width limb array for multi-precision arithmetic.
+//!
+//! [`Limbs<N>`] is a `#[repr(transparent)]` newtype over `[u64; N]` that
+//! decouples the public API from `ark_ff::BigInt`. All truncated arithmetic
+//! previously on `BigIntExt` lives here as inherent methods.
+
+use ark_ff::BigInt;
+use core::cmp::Ordering;
+
+/// Fixed-width array of `N` 64-bit limbs in little-endian order.
+///
+/// Used as the magnitude type for [`SignedBigInt`](crate::signed::SignedBigInt)
+/// and as the output of truncated multiplication in unreduced arithmetic.
+/// Provides the same multi-precision operations that `BigIntExt` offered on
+/// `ark_ff::BigInt`, without leaking arkworks types into the public API.
+#[derive(Clone, Copy, PartialEq, Eq, Hash)]
+#[repr(transparent)]
+pub struct Limbs<const N: usize>(pub [u64; N]);
+
+impl<const N: usize> Default for Limbs<N> {
+    #[inline]
+    fn default() -> Self {
+        Self([0u64; N])
+    }
+}
+
+impl<const N: usize> Limbs<N> {
+    #[inline]
+    pub const fn new(limbs: [u64; N]) -> Self {
+        Self(limbs)
+    }
+
+    #[inline]
+    pub const fn zero() -> Self {
+        Self([0u64; N])
+    }
+
+    #[inline]
+    pub fn is_zero(&self) -> bool {
+        self.0.iter().all(|&l| l == 0)
+    }
+
+    /// Number of significant bits in the value.
+    #[inline]
+    pub fn num_bits(&self) -> u32 {
+        let mut i = N;
+        while i > 0 {
+            i -= 1;
+            if self.0[i] != 0 {
+                return (i as u32) * 64 + (64 - self.0[i].leading_zeros());
+            }
+        }
+        0
+    }
+
+    /// Constructs from a single `u64`, placed in the lowest limb.
+    #[inline]
+    pub fn from_u64(val: u64) -> Self {
+        let mut limbs = [0u64; N];
+        if N > 0 {
+            limbs[0] = val;
+        }
+        Self(limbs)
+    }
+
+    #[inline]
+    pub(crate) fn to_bigint(self) -> BigInt<N> {
+        BigInt(self.0)
+    }
+
+    /// In-place addition with carry propagation.
+    /// Returns `true` if the final carry overflowed.
+    #[inline]
+    pub fn add_with_carry(&mut self, other: &Self) -> bool {
+        let mut carry = 0u64;
+        for i in 0..N {
+            let sum = (self.0[i] as u128) + (other.0[i] as u128) + (carry as u128);
+            self.0[i] = sum as u64;
+            carry = (sum >> 64) as u64;
+        }
+        carry != 0
+    }
+
+    /// In-place subtraction with borrow propagation.
+    /// Returns `true` if the final borrow underflowed.
+    #[inline]
+    pub fn sub_with_borrow(&mut self, other: &Self) -> bool {
+        let mut borrow = false;
+        for i in 0..N {
+            let (d1, b1) = self.0[i].overflowing_sub(other.0[i]);
+            let (d2, b2) = d1.overflowing_sub(u64::from(borrow));
+            self.0[i] = d2;
+            borrow = b1 || b2;
+        }
+        borrow
+    }
+
+    /// Truncated multiplication: `self * other`, keeping the low `P` limbs.
+    #[inline(always)]
+    pub fn mul_trunc<const M: usize, const P: usize>(&self, other: &Limbs<M>) -> Limbs<P> {
+        let mut res = Limbs::<P>::zero();
+        fm_limbs_into::<N, M, P>(&self.0, &other.0, &mut res.0);
+        res
+    }
+
+    /// Truncated addition: `self + other`, keeping the low `P` limbs.
+    #[inline]
+    pub fn add_trunc<const M: usize, const P: usize>(&self, other: &Limbs<M>) -> Limbs<P> {
+        let mut acc = Limbs::<P>::zero();
+        let copy_len = if P < N { P } else { N };
+        acc.0[..copy_len].copy_from_slice(&self.0[..copy_len]);
+        acc.add_assign_trunc::<M>(other);
+        acc
+    }
+
+    /// Truncated subtraction: `self - other`, keeping the low `P` limbs.
+    #[inline]
+    pub fn sub_trunc<const M: usize, const P: usize>(&self, other: &Limbs<M>) -> Limbs<P> {
+        let mut acc = Limbs::<P>::zero();
+        let copy_len = if P < N { P } else { N };
+        acc.0[..copy_len].copy_from_slice(&self.0[..copy_len]);
+        acc.sub_assign_trunc::<M>(other);
+        acc
+    }
+
+    /// In-place truncated addition: `self += other`, keeping `N` limbs.
+    #[inline]
+    pub fn add_assign_trunc<const M: usize>(&mut self, other: &Limbs<M>) {
+        debug_assert!(M <= N, "add_assign_trunc: right operand wider than self");
+        let mut carry = 0u64;
+        for i in 0..N {
+            let rhs = if i < M { other.0[i] } else { 0 };
+            let sum = (self.0[i] as u128) + (rhs as u128) + (carry as u128);
+            self.0[i] = sum as u64;
+            carry = (sum >> 64) as u64;
+        }
+    }
+
+    /// In-place truncated subtraction: `self -= other`, keeping `N` limbs.
+    #[inline]
+    pub fn sub_assign_trunc<const M: usize>(&mut self, other: &Limbs<M>) {
+        debug_assert!(M <= N, "sub_assign_trunc: right operand wider than self");
+        let mut borrow = 0u64;
+        for i in 0..N {
+            let rhs = if i < M { other.0[i] } else { 0 };
+            let diff = (self.0[i] as u128)
+                .wrapping_sub(rhs as u128)
+                .wrapping_sub(borrow as u128);
+            self.0[i] = diff as u64;
+            borrow = u64::from(diff > u64::MAX as u128);
+        }
+    }
+
+    /// Truncated fused multiply-add: `self += a * b`, keeping `N` limbs.
+    ///
+    /// WARNING: The carry at the spill position is NOT fully propagated.
+    /// Use [`fmadd`](Self::fmadd) if many products will be accumulated and intermediate
+    /// limbs may overflow.
+    #[inline]
+    pub fn fmadd_trunc<const A: usize, const B: usize>(&mut self, a: &Limbs<A>, b: &Limbs<B>) {
+        let i_limit = if A < N { A } else { N };
+        for i in 0..i_limit {
+            let mut carry = 0u64;
+            let j_limit = if B < (N - i) { B } else { N - i };
+            for j in 0..j_limit {
+                let idx = i + j;
+                let prod =
+                    (a.0[i] as u128) * (b.0[j] as u128) + (self.0[idx] as u128) + (carry as u128);
+                self.0[idx] = prod as u64;
+                carry = (prod >> 64) as u64;
+            }
+            let spill = i + j_limit;
+            if spill < N {
+                let (new_val, _) = self.0[spill].overflowing_add(carry);
+                self.0[spill] = new_val;
+            }
+        }
+    }
+
+    /// Fused multiply-add: `self += a * b`, keeping `N` limbs, with full carry propagation.
+    ///
+    /// Unlike [`fmadd_trunc`](Self::fmadd_trunc), the carry from each row's
+    /// spill position is propagated through all remaining higher limbs.
+    /// This is required when accumulating many products to avoid silent overflow.
+    #[inline]
+    pub fn fmadd<const A: usize, const B: usize>(&mut self, a: &Limbs<A>, b: &Limbs<B>) {
+        let i_limit = if A < N { A } else { N };
+        for i in 0..i_limit {
+            let mut carry = 0u64;
+            let j_limit = if B < (N - i) { B } else { N - i };
+            for j in 0..j_limit {
+                let idx = i + j;
+                let prod =
+                    (a.0[i] as u128) * (b.0[j] as u128) + (self.0[idx] as u128) + (carry as u128);
+                self.0[idx] = prod as u64;
+                carry = (prod >> 64) as u64;
+            }
+            let mut k = i + j_limit;
+            while carry != 0 && k < N {
+                let sum = (self.0[k] as u128) + (carry as u128);
+                self.0[k] = sum as u64;
+                carry = (sum >> 64) as u64;
+                k += 1;
+            }
+        }
+    }
+
+    /// Multiply and keep only the low `N` limbs (same width as self).
+    #[inline(always)]
+    pub fn mul_low(&self, other: &Self) -> Self {
+        let mut res = Limbs::<N>::zero();
+        fm_limbs_into::<N, N, N>(&self.0, &other.0, &mut res.0);
+        res
+    }
+
+    /// Zero-extend a narrower `Limbs<M>` into `Limbs<N>`.
+    #[inline]
+    pub fn zero_extend_from<const M: usize>(smaller: &Limbs<M>) -> Limbs<N> {
+        debug_assert!(
+            M <= N,
+            "cannot zero-extend: source has more limbs than destination"
+        );
+        let mut limbs = [0u64; N];
+        let copy_len = if M < N { M } else { N };
+        limbs[..copy_len].copy_from_slice(&smaller.0[..copy_len]);
+        Limbs(limbs)
+    }
+}
+
+impl<const N: usize> From<u64> for Limbs<N> {
+    #[inline]
+    fn from(val: u64) -> Self {
+        Self::from_u64(val)
+    }
+}
+
+impl<const N: usize> From<BigInt<N>> for Limbs<N> {
+    #[inline]
+    fn from(bigint: BigInt<N>) -> Self {
+        Limbs(bigint.0)
+    }
+}
+
+impl<const N: usize> From<Limbs<N>> for BigInt<N> {
+    #[inline]
+    fn from(limbs: Limbs<N>) -> Self {
+        BigInt(limbs.0)
+    }
+}
+
+impl<const N: usize> AsRef<[u64]> for Limbs<N> {
+    #[inline]
+    fn as_ref(&self) -> &[u64] {
+        &self.0
+    }
+}
+
+impl<const N: usize> PartialOrd for Limbs<N> {
+    #[inline]
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl<const N: usize> Ord for Limbs<N> {
+    #[inline]
+    fn cmp(&self, other: &Self) -> Ordering {
+        let mut i = N;
+        while i > 0 {
+            i -= 1;
+            match self.0[i].cmp(&other.0[i]) {
+                Ordering::Equal => {}
+                ord => return ord,
+            }
+        }
+        Ordering::Equal
+    }
+}
+
+impl<const N: usize> core::fmt::Debug for Limbs<N> {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        write!(f, "Limbs([")?;
+        for (i, limb) in self.0.iter().enumerate() {
+            if i > 0 {
+                write!(f, ", ")?;
+            }
+            write!(f, "{limb:#018x}")?;
+        }
+        write!(f, "])")
+    }
+}
+
+impl<const N: usize> core::fmt::Display for Limbs<N> {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        let mut started = false;
+        for &limb in self.0.iter().rev() {
+            if !started {
+                if limb != 0 {
+                    write!(f, "{limb:x}")?;
+                    started = true;
+                }
+            } else {
+                write!(f, "{limb:016x}")?;
+            }
+        }
+        if !started {
+            write!(f, "0")?;
+        }
+        Ok(())
+    }
+}
+
+#[cfg(feature = "allocative")]
+impl<const N: usize> allocative::Allocative for Limbs<N> {
+    fn visit<'a, 'b: 'a>(&self, visitor: &'a mut allocative::Visitor<'b>) {
+        visitor.visit_simple_sized::<Self>();
+    }
+}
+
+impl<const N: usize> ark_serialize::CanonicalSerialize for Limbs<N> {
+    #[inline]
+    fn serialize_with_mode<W: ark_serialize::Write>(
+        &self,
+        writer: W,
+        compress: ark_serialize::Compress,
+    ) -> Result<(), ark_serialize::SerializationError> {
+        self.to_bigint().serialize_with_mode(writer, compress)
+    }
+
+    #[inline]
+    fn serialized_size(&self, compress: ark_serialize::Compress) -> usize {
+        self.to_bigint().serialized_size(compress)
+    }
+}
+
+impl<const N: usize> ark_serialize::Valid for Limbs<N> {
+    #[inline]
+    fn check(&self) -> Result<(), ark_serialize::SerializationError> {
+        self.to_bigint().check()
+    }
+}
+
+impl<const N: usize> ark_serialize::CanonicalDeserialize for Limbs<N> {
+    #[inline]
+    fn deserialize_with_mode<R: ark_serialize::Read>(
+        reader: R,
+        compress: ark_serialize::Compress,
+        validate: ark_serialize::Validate,
+    ) -> Result<Self, ark_serialize::SerializationError> {
+        BigInt::<N>::deserialize_with_mode(reader, compress, validate).map(Limbs::from)
+    }
+}
+
+/// Core schoolbook multiplication accumulator.
+///
+/// Computes `acc += a[0..N] * b[0..M]`, keeping only the low `P` limbs.
+#[inline(always)]
+fn fm_limbs_into<const N: usize, const M: usize, const P: usize>(
+    a: &[u64; N],
+    b: &[u64; M],
+    acc: &mut [u64; P],
+) {
+    for (j, &mul_limb) in b.iter().enumerate() {
+        if mul_limb == 0 {
+            continue;
+        }
+        let base = j;
+        let mut carry = 0u64;
+        for (i, &a_limb) in a.iter().enumerate() {
+            let idx = base + i;
+            if idx < P {
+                let prod =
+                    (a_limb as u128) * (mul_limb as u128) + (acc[idx] as u128) + (carry as u128);
+                acc[idx] = prod as u64;
+                carry = (prod >> 64) as u64;
+            }
+        }
+        let next = base + N;
+        if next < P {
+            let (v, _) = acc[next].overflowing_add(carry);
+            acc[next] = v;
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn mul_trunc_small() {
+        let a = Limbs::<1>([7u64]);
+        let b = Limbs::<1>([6u64]);
+        let c: Limbs<1> = a.mul_trunc::<1, 1>(&b);
+        assert_eq!(c.0[0], 42);
+    }
+
+    #[test]
+    fn mul_trunc_wider_output() {
+        let a = Limbs::<1>([u64::MAX]);
+        let b = Limbs::<1>([2u64]);
+        let c: Limbs<2> = a.mul_trunc::<1, 2>(&b);
+        let expected = (u64::MAX as u128) * 2;
+        assert_eq!(c.0[0], expected as u64);
+        assert_eq!(c.0[1], (expected >> 64) as u64);
+    }
+
+    #[test]
+    fn add_trunc_basic() {
+        let a = Limbs::<2>([u64::MAX, 0]);
+        let b = Limbs::<2>([1u64, 0]);
+        let c: Limbs<2> = a.add_trunc::<2, 2>(&b);
+        assert_eq!(c.0[0], 0);
+        assert_eq!(c.0[1], 1);
+    }
+
+    #[test]
+    fn sub_trunc_basic() {
+        let a = Limbs::<2>([0, 1]);
+        let b = Limbs::<2>([1, 0]);
+        let c: Limbs<2> = a.sub_trunc::<2, 2>(&b);
+        assert_eq!(c.0[0], u64::MAX);
+        assert_eq!(c.0[1], 0);
+    }
+
+    #[test]
+    fn zero_extend() {
+        let small = Limbs::<1>([42u64]);
+        let big: Limbs<4> = Limbs::<4>::zero_extend_from::<1>(&small);
+        assert_eq!(big.0[0], 42);
+        assert_eq!(big.0[1], 0);
+        assert_eq!(big.0[2], 0);
+        assert_eq!(big.0[3], 0);
+    }
+
+    #[test]
+    fn mul_low_basic() {
+        let a = Limbs::<2>([3, 0]);
+        let b = Limbs::<2>([5, 0]);
+        let c = a.mul_low(&b);
+        assert_eq!(c.0[0], 15);
+        assert_eq!(c.0[1], 0);
+    }
+
+    #[test]
+    fn fmadd_trunc_basic() {
+        let a = Limbs::<1>([3u64]);
+        let b = Limbs::<1>([4u64]);
+        let mut acc = Limbs::<2>([10, 0]);
+        acc.fmadd_trunc::<1, 1>(&a, &b);
+        assert_eq!(acc.0[0], 22); // 10 + 3*4
+    }
+
+    #[test]
+    fn fmadd_basic() {
+        let a = Limbs::<1>([3u64]);
+        let b = Limbs::<1>([4u64]);
+        let mut acc = Limbs::<2>([10, 0]);
+        acc.fmadd::<1, 1>(&a, &b);
+        assert_eq!(acc.0[0], 22); // 10 + 3*4
+    }
+
+    #[test]
+    fn fmadd_carry_propagation() {
+        // Accumulate many large products into a wide accumulator.
+        // Use fmadd (full carry) vs add_with_carry reference to verify correctness.
+        let a = Limbs::<2>([u64::MAX, u64::MAX >> 1]);
+        let b = Limbs::<2>([u64::MAX, u64::MAX >> 1]);
+
+        // Compute a single product via mul_trunc as reference
+        let single_product: Limbs<5> = a.mul_trunc::<2, 5>(&b);
+
+        let mut acc = Limbs::<5>::zero();
+        let count = 10_000u64;
+        for _ in 0..count {
+            acc.fmadd::<2, 2>(&a, &b);
+        }
+
+        // Build expected: single_product * count via repeated addition
+        let mut expected = Limbs::<5>::zero();
+        // Multiply single_product by count using schoolbook with u64 scalar
+        let mut carry = 0u128;
+        for i in 0..5 {
+            let prod = (single_product.0[i] as u128) * (count as u128) + carry;
+            expected.0[i] = prod as u64;
+            carry = prod >> 64;
+        }
+
+        assert_eq!(
+            acc, expected,
+            "fmadd should match reference after {count} products"
+        );
+    }
+
+    #[test]
+    fn add_sub_with_carry_borrow() {
+        let mut a = Limbs::<2>([u64::MAX, 0]);
+        let b = Limbs::<2>([1, 0]);
+        let carry = a.add_with_carry(&b);
+        assert!(!carry);
+        assert_eq!(a.0[0], 0);
+        assert_eq!(a.0[1], 1);
+
+        let borrow = a.sub_with_borrow(&b);
+        assert!(!borrow);
+        assert_eq!(a.0[0], u64::MAX);
+        assert_eq!(a.0[1], 0);
+    }
+
+    #[test]
+    fn ordering() {
+        let a = Limbs::<2>([0, 1]);
+        let b = Limbs::<2>([u64::MAX, 0]);
+        assert!(a > b);
+        assert_eq!(a.cmp(&a), Ordering::Equal);
+    }
+
+    #[test]
+    fn bigint_roundtrip() {
+        let limbs = Limbs::<4>([1, 2, 3, 4]);
+        let bigint: BigInt<4> = limbs.into();
+        let back: Limbs<4> = bigint.into();
+        assert_eq!(limbs, back);
+    }
+
+    #[test]
+    fn display_formatting() {
+        let z = Limbs::<2>([0, 0]);
+        assert_eq!(format!("{z}"), "0");
+
+        let one = Limbs::<1>([1]);
+        assert_eq!(format!("{one}"), "1");
+
+        let big = Limbs::<2>([0, 1]);
+        assert_eq!(format!("{big}"), "10000000000000000");
+    }
+}
diff --git a/crates/jolt-field/src/montgomery_constants.rs b/crates/jolt-field/src/montgomery_constants.rs
new file mode 100644
index 000000000..818d43072
--- /dev/null
+++ b/crates/jolt-field/src/montgomery_constants.rs
@@ -0,0 +1,37 @@
+/// Montgomery field constants
+///
+/// Provides the constants needed to generate field-arithmetic shaders for any
+/// Montgomery-form prime field. Values are in little-endian u32 limbs matching
+/// the shader representation.
+///
+/// # Safety invariants
+///
+/// Implementations must guarantee the CIOS unreduced chaining property:
+/// `4 * r^2 / R < 2r` where `R = 2^(32 * NUM_U32_LIMBS)`. This ensures that
+/// intermediate products from `fr_mul_unreduced` remain in `[0, 2r)` and can
+/// be safely fed into the next CIOS multiplication without explicit reduction.
+pub trait MontgomeryConstants: 'static {
+    /// Number of 32-bit limbs in the Montgomery representation.
+    /// 4 for 128-bit fields, 8 for BN254 (256-bit).
+    const NUM_U32_LIMBS: usize;
+
+    /// Number of 32-bit limbs in the wide accumulator: `2 * NUM_U32_LIMBS + 2`.
+    /// Provides headroom for accumulating ~2^32 unreduced products.
+    const ACC_U32_LIMBS: usize;
+
+    /// Byte size of a single field element: `NUM_U32_LIMBS * 4`.
+    const FIELD_BYTE_SIZE: usize;
+
+    /// The field modulus `r` as little-endian u32 limbs.
+    fn modulus_u32() -> &'static [u32];
+
+    /// `-r^{-1} mod 2^{32}` — the Montgomery reduction constant.
+    fn inv32() -> u32;
+
+    /// `R^2 mod r` as little-endian u32 limbs, where `R = 2^(32 * NUM_U32_LIMBS)`.
+    /// Used for converting standard-form integers to Montgomery form.
+    fn r2_u32() -> &'static [u32];
+
+    /// `R mod r` as little-endian u32 limbs — the Montgomery representation of 1.
+    fn one_u32() -> &'static [u32];
+}
diff --git a/crates/jolt-field/src/signed/mod.rs b/crates/jolt-field/src/signed/mod.rs
new file mode 100644
index 000000000..9c50077b2
--- /dev/null
+++ b/crates/jolt-field/src/signed/mod.rs
@@ -0,0 +1,73 @@
+//! Signed big integer types for the Jolt prover.
+//!
+//! These types represent signed integers with configurable bit widths using
+//! sign-magnitude representation.
+//!
+//! Two families are provided:
+//!
+//! - [`SignedBigInt<N>`]: magnitude stored as `Limbs<N>` (width = `N * 64` bits)
+//! - [`SignedBigIntHi32<N>`]: magnitude stored as `[u64; N]` + `u32` tail (width = `N * 64 + 32` bits)
+//!
+//! Common type aliases:
+//! - `S64`, `S128`, `S192`, `S256` (from `SignedBigInt`)
+//! - `S96`, `S160`, `S224` (from `SignedBigIntHi32`)
+
+mod signed_bigint;
+mod signed_bigint_hi32;
+
+pub use signed_bigint::*;
+pub use signed_bigint_hi32::*;
+
+/// Generates the 5 standard operator impls for each `(Op, OpAssign)` pair:
+/// val-val, OpAssign-val, val-ref, OpAssign-ref, ref-ref.
+///
+/// Each operator delegates to an `&self`-taking `_assign_in_place` method.
+macro_rules! impl_signed_assign_ops {
+    ($T:ident {
+        $($Op:ident, $OpAssign:ident, $method:ident, $assign_method:ident => $assign_fn:ident;)*
+    }) => { $(
+        impl<const N: usize> $Op for $T<N> {
+            type Output = Self;
+            #[inline]
+            fn $method(mut self, rhs: Self) -> Self {
+                self.$assign_fn(&rhs);
+                self
+            }
+        }
+
+        impl<const N: usize> $OpAssign for $T<N> {
+            #[inline]
+            fn $assign_method(&mut self, rhs: Self) {
+                self.$assign_fn(&rhs);
+            }
+        }
+
+        impl<const N: usize> $Op<&$T<N>> for $T<N> {
+            type Output = $T<N>;
+            #[inline]
+            fn $method(mut self, rhs: &$T<N>) -> $T<N> {
+                self.$assign_fn(rhs);
+                self
+            }
+        }
+
+        impl<const N: usize> $OpAssign<&$T<N>> for $T<N> {
+            #[inline]
+            fn $assign_method(&mut self, rhs: &$T<N>) {
+                self.$assign_fn(rhs);
+            }
+        }
+
+        impl<const N: usize> $Op for &$T<N> {
+            type Output = $T<N>;
+            #[inline]
+            fn $method(self, rhs: Self) -> $T<N> {
+                let mut out = *self;
+                out.$assign_fn(rhs);
+                out
+            }
+        }
+    )* };
+}
+
+pub(crate) use impl_signed_assign_ops;
diff --git a/crates/jolt-field/src/signed/signed_bigint.rs b/crates/jolt-field/src/signed/signed_bigint.rs
new file mode 100644
index 000000000..88b794ba8
--- /dev/null
+++ b/crates/jolt-field/src/signed/signed_bigint.rs
@@ -0,0 +1,740 @@
+//! Sign-magnitude big integer with `N * 64`-bit width.
+
+use ark_serialize::{
+    CanonicalDeserialize, CanonicalSerialize, Compress, Read, SerializationError, Valid, Validate,
+    Write,
+};
+use core::cmp::Ordering;
+use core::ops::{Add, AddAssign, Mul, MulAssign, Neg, Sub, SubAssign};
+use num_traits::Zero;
+
+use crate::Limbs;
+
+/// A signed big integer using `Limbs<N>` for magnitude and a sign bit.
+///
+/// Zero is not canonicalized: a zero magnitude can be paired with either sign.
+/// Structural equality distinguishes `+0` and `-0`, but ordering treats them
+/// as equal.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct SignedBigInt<const N: usize> {
+    pub magnitude: Limbs<N>,
+    pub is_positive: bool,
+}
+
+#[cfg(feature = "allocative")]
+impl<const N: usize> allocative::Allocative for SignedBigInt<N> {
+    fn visit<'a, 'b: 'a>(&self, visitor: &'a mut allocative::Visitor<'b>) {
+        visitor.visit_simple_sized::<Self>();
+    }
+}
+
+impl<const N: usize> Default for SignedBigInt<N> {
+    #[inline]
+    fn default() -> Self {
+        Self::zero()
+    }
+}
+
+impl<const N: usize> Zero for SignedBigInt<N> {
+    #[inline]
+    fn zero() -> Self {
+        Self::zero()
+    }
+
+    #[inline]
+    fn is_zero(&self) -> bool {
+        self.magnitude.is_zero()
+    }
+}
+
+pub type S64 = SignedBigInt<1>;
+pub type S128 = SignedBigInt<2>;
+pub type S192 = SignedBigInt<3>;
+pub type S256 = SignedBigInt<4>;
+
+impl<const N: usize> SignedBigInt<N> {
+    #[inline]
+    fn cmp_magnitude_mixed<const M: usize>(&self, rhs: &SignedBigInt<M>) -> Ordering {
+        let max_limbs = if N > M { N } else { M };
+        let mut i = max_limbs;
+        while i > 0 {
+            let idx = i - 1;
+            let a = if idx < N { self.magnitude.0[idx] } else { 0u64 };
+            let b = if idx < M { rhs.magnitude.0[idx] } else { 0u64 };
+            if a > b {
+                return Ordering::Greater;
+            }
+            if a < b {
+                return Ordering::Less;
+            }
+            i -= 1;
+        }
+        Ordering::Equal
+    }
+
+    #[inline]
+    pub fn new(limbs: [u64; N], is_positive: bool) -> Self {
+        Self {
+            magnitude: Limbs::new(limbs),
+            is_positive,
+        }
+    }
+
+    #[inline]
+    pub fn from_limbs(magnitude: Limbs<N>, is_positive: bool) -> Self {
+        Self {
+            magnitude,
+            is_positive,
+        }
+    }
+
+    #[inline]
+    pub fn zero() -> Self {
+        Self {
+            magnitude: Limbs::from_u64(0),
+            is_positive: true,
+        }
+    }
+
+    #[inline]
+    pub fn one() -> Self {
+        Self {
+            magnitude: Limbs::from_u64(1),
+            is_positive: true,
+        }
+    }
+
+    #[inline]
+    pub fn as_magnitude(&self) -> &Limbs<N> {
+        &self.magnitude
+    }
+
+    #[inline]
+    pub fn magnitude_limbs(&self) -> [u64; N] {
+        self.magnitude.0
+    }
+
+    #[inline]
+    pub fn magnitude_slice(&self) -> &[u64] {
+        self.magnitude.as_ref()
+    }
+
+    #[inline]
+    pub fn sign(&self) -> bool {
+        self.is_positive
+    }
+
+    #[inline]
+    pub fn negate(self) -> Self {
+        Self::from_limbs(self.magnitude, !self.is_positive)
+    }
+
+    #[inline(always)]
+    fn add_assign_in_place(&mut self, rhs: &Self) {
+        if self.is_positive == rhs.is_positive {
+            let _carry = self.magnitude.add_with_carry(&rhs.magnitude);
+        } else {
+            match self.magnitude.cmp(&rhs.magnitude) {
+                Ordering::Greater | Ordering::Equal => {
+                    let _borrow = self.magnitude.sub_with_borrow(&rhs.magnitude);
+                }
+                Ordering::Less => {
+                    let old = core::mem::replace(&mut self.magnitude, rhs.magnitude);
+                    let _borrow = self.magnitude.sub_with_borrow(&old);
+                    self.is_positive = rhs.is_positive;
+                }
+            }
+        }
+    }
+
+    #[inline(always)]
+    fn sub_assign_in_place(&mut self, rhs: &Self) {
+        if self.is_positive != rhs.is_positive {
+            let _carry = self.magnitude.add_with_carry(&rhs.magnitude);
+        } else {
+            match self.magnitude.cmp(&rhs.magnitude) {
+                Ordering::Greater | Ordering::Equal => {
+                    let _borrow = self.magnitude.sub_with_borrow(&rhs.magnitude);
+                }
+                Ordering::Less => {
+                    let old = core::mem::replace(&mut self.magnitude, rhs.magnitude);
+                    let _borrow = self.magnitude.sub_with_borrow(&old);
+                    self.is_positive = !self.is_positive;
+                }
+            }
+        }
+    }
+
+    #[inline(always)]
+    fn mul_assign_in_place(&mut self, rhs: &Self) {
+        let low = self.magnitude.mul_low(&rhs.magnitude);
+        self.magnitude = low;
+        self.is_positive = self.is_positive == rhs.is_positive;
+    }
+
+    #[inline]
+    pub fn zero_extend_from<const M: usize>(smaller: &SignedBigInt<M>) -> SignedBigInt<N> {
+        debug_assert!(
+            M <= N,
+            "cannot zero-extend: source has more limbs than destination"
+        );
+        let widened_mag = Limbs::<N>::zero_extend_from::<M>(&smaller.magnitude);
+        SignedBigInt::from_limbs(widened_mag, smaller.is_positive)
+    }
+}
+
+impl<const N: usize> SignedBigInt<N> {
+    /// Adds two values and truncates the result to `M` limbs.
+    #[inline]
+    pub fn add_trunc<const M: usize>(&self, rhs: &SignedBigInt<N>) -> SignedBigInt<M> {
+        if self.is_positive == rhs.is_positive {
+            let mag = self.magnitude.add_trunc::<N, M>(&rhs.magnitude);
+            return SignedBigInt::<M> {
+                magnitude: mag,
+                is_positive: self.is_positive,
+            };
+        }
+        match self.magnitude.cmp(&rhs.magnitude) {
+            Ordering::Greater | Ordering::Equal => {
+                let mag = self.magnitude.sub_trunc::<N, M>(&rhs.magnitude);
+                SignedBigInt::<M> {
+                    magnitude: mag,
+                    is_positive: self.is_positive,
+                }
+            }
+            Ordering::Less => {
+                let mag = rhs.magnitude.sub_trunc::<N, M>(&self.magnitude);
+                SignedBigInt::<M> {
+                    magnitude: mag,
+                    is_positive: rhs.is_positive,
+                }
+            }
+        }
+    }
+
+    /// Subtracts and truncates the result to `M` limbs.
+    #[inline]
+    pub fn sub_trunc<const M: usize>(&self, rhs: &SignedBigInt<N>) -> SignedBigInt<M> {
+        if self.is_positive != rhs.is_positive {
+            let mag = self.magnitude.add_trunc::<N, M>(&rhs.magnitude);
+            return SignedBigInt::<M> {
+                magnitude: mag,
+                is_positive: self.is_positive,
+            };
+        }
+        match self.magnitude.cmp(&rhs.magnitude) {
+            Ordering::Greater | Ordering::Equal => {
+                let mag = self.magnitude.sub_trunc::<N, M>(&rhs.magnitude);
+                SignedBigInt::<M> {
+                    magnitude: mag,
+                    is_positive: self.is_positive,
+                }
+            }
+            Ordering::Less => {
+                let mag = rhs.magnitude.sub_trunc::<N, M>(&self.magnitude);
+                SignedBigInt::<M> {
+                    magnitude: mag,
+                    is_positive: !self.is_positive,
+                }
+            }
+        }
+    }
+
+    /// Adds values of different widths (`N` and `M` limbs) and truncates to `P` limbs.
+    #[inline]
+    pub fn add_trunc_mixed<const M: usize, const P: usize>(
+        &self,
+        rhs: &SignedBigInt<M>,
+    ) -> SignedBigInt<P> {
+        if self.is_positive == rhs.is_positive {
+            let mag = self.magnitude.add_trunc::<M, P>(&rhs.magnitude);
+            return SignedBigInt::<P> {
+                magnitude: mag,
+                is_positive: self.is_positive,
+            };
+        }
+        match self.cmp_magnitude_mixed(rhs) {
+            Ordering::Greater | Ordering::Equal => {
+                let mag = self.magnitude.sub_trunc::<M, P>(&rhs.magnitude);
+                SignedBigInt::<P> {
+                    magnitude: mag,
+                    is_positive: self.is_positive,
+                }
+            }
+            Ordering::Less => {
+                let mag = rhs.magnitude.sub_trunc::<N, P>(&self.magnitude);
+                SignedBigInt::<P> {
+                    magnitude: mag,
+                    is_positive: rhs.is_positive,
+                }
+            }
+        }
+    }
+
+    /// Subtracts values of different widths and truncates to `P` limbs.
+    #[inline]
+    pub fn sub_trunc_mixed<const M: usize, const P: usize>(
+        &self,
+        rhs: &SignedBigInt<M>,
+    ) -> SignedBigInt<P> {
+        if self.is_positive != rhs.is_positive {
+            let mag = self.magnitude.add_trunc::<M, P>(&rhs.magnitude);
+            return SignedBigInt::<P> {
+                magnitude: mag,
+                is_positive: self.is_positive,
+            };
+        }
+        match self.cmp_magnitude_mixed(rhs) {
+            Ordering::Greater | Ordering::Equal => {
+                let mag = self.magnitude.sub_trunc::<M, P>(&rhs.magnitude);
+                SignedBigInt::<P> {
+                    magnitude: mag,
+                    is_positive: self.is_positive,
+                }
+            }
+            Ordering::Less => {
+                let mag = rhs.magnitude.sub_trunc::<N, P>(&self.magnitude);
+                SignedBigInt::<P> {
+                    magnitude: mag,
+                    is_positive: !self.is_positive,
+                }
+            }
+        }
+    }
+
+    /// Multiplies and truncates the result to `P` limbs.
+    #[inline]
+    pub fn mul_trunc<const M: usize, const P: usize>(
+        &self,
+        rhs: &SignedBigInt<M>,
+    ) -> SignedBigInt<P> {
+        let mag = self.magnitude.mul_trunc::<M, P>(&rhs.magnitude);
+        let sign = self.is_positive == rhs.is_positive;
+        SignedBigInt::<P> {
+            magnitude: mag,
+            is_positive: sign,
+        }
+    }
+
+    /// Fused multiply-add: `acc += self * rhs`, truncated to `P` limbs.
+    #[inline]
+    pub fn fmadd_trunc<const M: usize, const P: usize>(
+        &self,
+        rhs: &SignedBigInt<M>,
+        acc: &mut SignedBigInt<P>,
+    ) {
+        let prod_mag = self.magnitude.mul_trunc::<M, P>(&rhs.magnitude);
+        let prod_sign = self.is_positive == rhs.is_positive;
+        if acc.is_positive == prod_sign {
+            let _ = acc.magnitude.add_with_carry(&prod_mag);
+        } else {
+            match acc.magnitude.cmp(&prod_mag) {
+                Ordering::Greater | Ordering::Equal => {
+                    let _ = acc.magnitude.sub_with_borrow(&prod_mag);
+                }
+                Ordering::Less => {
+                    let old = core::mem::replace(&mut acc.magnitude, prod_mag);
+                    let _ = acc.magnitude.sub_with_borrow(&old);
+                    acc.is_positive = prod_sign;
+                }
+            }
+        }
+    }
+}
+
+impl<const N: usize> SignedBigInt<N> {
+    #[inline]
+    pub fn from_u64(value: u64) -> Self {
+        Self::from_limbs(Limbs::from_u64(value), true)
+    }
+
+    #[inline]
+    pub fn from_u64_with_sign(value: u64, is_positive: bool) -> Self {
+        Self::from_limbs(Limbs::from_u64(value), is_positive)
+    }
+
+    #[inline]
+    pub fn from_i64(value: i64) -> Self {
+        if value >= 0 {
+            Self::from_limbs(Limbs::from_u64(value as u64), true)
+        } else {
+            Self::from_limbs(Limbs::from_u64(value.wrapping_neg() as u64), false)
+        }
+    }
+
+    #[inline]
+    pub fn from_u128(value: u128) -> Self {
+        debug_assert!(N >= 2, "from_u128 requires at least 2 limbs");
+        let mut limbs = [0u64; N];
+        limbs[0] = value as u64;
+        limbs[1] = (value >> 64) as u64;
+        Self::from_limbs(Limbs::new(limbs), true)
+    }
+
+    #[inline]
+    pub fn from_i128(value: i128) -> Self {
+        debug_assert!(N >= 2, "from_i128 requires at least 2 limbs");
+        if value >= 0 {
+            let mut limbs = [0u64; N];
+            let v = value as u128;
+            limbs[0] = v as u64;
+            limbs[1] = (v >> 64) as u64;
+            Self::from_limbs(Limbs::new(limbs), true)
+        } else {
+            let mag = value.unsigned_abs();
+            let mut limbs = [0u64; N];
+            limbs[0] = mag as u64;
+            limbs[1] = (mag >> 64) as u64;
+            Self::from_limbs(Limbs::new(limbs), false)
+        }
+    }
+}
+
+impl<const N: usize> From<u64> for SignedBigInt<N> {
+    #[inline]
+    fn from(value: u64) -> Self {
+        Self::from_u64(value)
+    }
+}
+
+impl<const N: usize> From<i64> for SignedBigInt<N> {
+    #[inline]
+    fn from(value: i64) -> Self {
+        Self::from_i64(value)
+    }
+}
+
+impl<const N: usize> From<(u64, bool)> for SignedBigInt<N> {
+    #[inline]
+    fn from(value_and_sign: (u64, bool)) -> Self {
+        Self::from_u64_with_sign(value_and_sign.0, value_and_sign.1)
+    }
+}
+
+impl<const N: usize> From<u128> for SignedBigInt<N> {
+    #[inline]
+    fn from(value: u128) -> Self {
+        debug_assert!(N >= 2, "From<u128> requires at least 2 limbs");
+        Self::from_u128(value)
+    }
+}
+
+impl<const N: usize> From<i128> for SignedBigInt<N> {
+    #[inline]
+    fn from(value: i128) -> Self {
+        debug_assert!(N >= 2, "From<i128> requires at least 2 limbs");
+        Self::from_i128(value)
+    }
+}
+
+impl S64 {
+    #[inline]
+    pub fn to_i128(&self) -> i128 {
+        let magnitude = self.magnitude.0[0];
+        if self.is_positive {
+            magnitude as i128
+        } else {
+            -(magnitude as i128)
+        }
+    }
+
+    #[inline]
+    pub fn magnitude_as_u64(&self) -> u64 {
+        self.magnitude.0[0]
+    }
+
+    #[inline(always)]
+    pub fn from_diff_u64s(a: u64, b: u64) -> Self {
+        if a < b {
+            Self::new([b - a], false)
+        } else {
+            Self::new([a - b], true)
+        }
+    }
+}
+
+impl S128 {
+    #[inline]
+    pub fn to_i128(&self) -> Option<i128> {
+        let hi = self.magnitude.0[1];
+        let lo = self.magnitude.0[0];
+        let hi_top_bit = hi >> 63;
+        if self.is_positive {
+            if hi_top_bit != 0 {
+                return None;
+            }
+            let mag = ((hi as u128) << 64) | (lo as u128);
+            Some(mag as i128)
+        } else if hi_top_bit == 0 {
+            let mag = ((hi as u128) << 64) | (lo as u128);
+            Some(-(mag as i128))
+        } else if hi == (1u64 << 63) && lo == 0 {
+            Some(i128::MIN)
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    pub fn magnitude_as_u128(&self) -> u128 {
+        (self.magnitude.0[1] as u128) << 64 | (self.magnitude.0[0] as u128)
+    }
+
+    #[inline]
+    pub fn from_u128_and_sign(value: u128, is_positive: bool) -> Self {
+        Self::new([value as u64, (value >> 64) as u64], is_positive)
+    }
+
+    #[inline]
+    pub fn from_u64_mul_i64(u: u64, s: i64) -> Self {
+        let mag = (u as u128) * (s.unsigned_abs() as u128);
+        Self::from_u128_and_sign(mag, s >= 0)
+    }
+
+    #[inline]
+    pub fn from_i64_mul_u64(s: i64, u: u64) -> Self {
+        Self::from_u64_mul_i64(u, s)
+    }
+
+    #[inline]
+    pub fn from_u64_mul_u64(a: u64, b: u64) -> Self {
+        let mag = (a as u128) * (b as u128);
+        Self::from_u128_and_sign(mag, true)
+    }
+
+    #[inline]
+    pub fn from_i64_mul_i64(a: i64, b: i64) -> Self {
+        let mag = (a.unsigned_abs() as u128) * (b.unsigned_abs() as u128);
+        let is_positive = (a >= 0) == (b >= 0);
+        Self::from_u128_and_sign(mag, is_positive)
+    }
+}
+
+super::impl_signed_assign_ops!(SignedBigInt {
+    Add, AddAssign, add, add_assign => add_assign_in_place;
+    Sub, SubAssign, sub, sub_assign => sub_assign_in_place;
+    Mul, MulAssign, mul, mul_assign => mul_assign_in_place;
+});
+
+impl<const N: usize> Neg for SignedBigInt<N> {
+    type Output = Self;
+    #[inline]
+    fn neg(self) -> Self::Output {
+        self.negate()
+    }
+}
+
+impl<const N: usize> PartialOrd for SignedBigInt<N> {
+    #[inline]
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl<const N: usize> Ord for SignedBigInt<N> {
+    #[inline]
+    fn cmp(&self, other: &Self) -> Ordering {
+        if self.magnitude.is_zero() && other.magnitude.is_zero() {
+            return Ordering::Equal;
+        }
+        match (self.is_positive, other.is_positive) {
+            (true, false) => Ordering::Greater,
+            (false, true) => Ordering::Less,
+            _ => {
+                let ord = self.magnitude.cmp(&other.magnitude);
+                if self.is_positive {
+                    ord
+                } else {
+                    ord.reverse()
+                }
+            }
+        }
+    }
+}
+
+impl<const N: usize> CanonicalSerialize for SignedBigInt<N> {
+    #[inline]
+    fn serialize_with_mode<W: Write>(
+        &self,
+        mut w: W,
+        compress: Compress,
+    ) -> Result<(), SerializationError> {
+        (self.is_positive as u8).serialize_with_mode(&mut w, compress)?;
+        self.magnitude.serialize_with_mode(w, compress)
+    }
+
+    #[inline]
+    fn serialized_size(&self, compress: Compress) -> usize {
+        (self.is_positive as u8).serialized_size(compress)
+            + self.magnitude.serialized_size(compress)
+    }
+}
+
+impl<const N: usize> CanonicalDeserialize for SignedBigInt<N> {
+    #[inline]
+    fn deserialize_with_mode<R: Read>(
+        mut r: R,
+        compress: Compress,
+        validate: Validate,
+    ) -> Result<Self, SerializationError> {
+        let sign_u8 = u8::deserialize_with_mode(&mut r, compress, validate)?;
+        let mag = Limbs::<N>::deserialize_with_mode(r, compress, validate)?;
+        Ok(SignedBigInt {
+            magnitude: mag,
+            is_positive: sign_u8 != 0,
+        })
+    }
+}
+
+impl<const N: usize> Valid for SignedBigInt<N> {
+    #[inline]
+    fn check(&self) -> Result<(), SerializationError> {
+        self.magnitude.check()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn s64_basic_arithmetic() {
+        let a = S64::from_i64(10);
+        let b = S64::from_i64(-3);
+        let c = a + b;
+        assert_eq!(c.to_i128(), 7);
+
+        let d = a - b;
+        assert_eq!(d.to_i128(), 13);
+    }
+
+    #[test]
+    fn s64_from_diff() {
+        let d = S64::from_diff_u64s(5, 10);
+        assert!(!d.is_positive);
+        assert_eq!(d.magnitude_as_u64(), 5);
+
+        let d2 = S64::from_diff_u64s(10, 5);
+        assert!(d2.is_positive);
+        assert_eq!(d2.magnitude_as_u64(), 5);
+    }
+
+    #[test]
+    fn s128_mul_i64() {
+        let r = S128::from_i64_mul_i64(-3, 7);
+        assert_eq!(r.to_i128(), Some(-21));
+
+        let r2 = S128::from_u64_mul_u64(100, 200);
+        assert_eq!(r2.to_i128(), Some(20000));
+    }
+
+    #[test]
+    fn s128_magnitude() {
+        let v = S128::from_i128(-12_345_678_901_234_567_890_i128);
+        assert!(!v.is_positive);
+        assert_eq!(v.magnitude_as_u128(), 12_345_678_901_234_567_890_u128);
+    }
+
+    #[test]
+    fn mul_trunc_s64_to_s128() {
+        let a = S64::from_i64(-5);
+        let b = S64::from_i64(7);
+        let c: S128 = a.mul_trunc::<1, 2>(&b);
+        assert_eq!(c.to_i128(), Some(-35));
+    }
+
+    #[test]
+    fn ordering() {
+        let a = S64::from_i64(5);
+        let b = S64::from_i64(-5);
+        let z1 = S64::from_u64(0);
+        let z2 = S64::new([0], false); // negative zero
+        assert!(a > b);
+        assert_eq!(z1.cmp(&z2), Ordering::Equal);
+    }
+
+    #[test]
+    fn zero_extend() {
+        let s = S64::from_i64(-42);
+        let wide: S128 = SignedBigInt::zero_extend_from(&s);
+        assert!(!wide.is_positive);
+        assert_eq!(wide.magnitude.0[0], 42);
+        assert_eq!(wide.magnitude.0[1], 0);
+    }
+
+    #[test]
+    fn add_trunc_mixed() {
+        let a = S64::from_i64(100);
+        let b = S128::from_i128(200);
+        let c: S128 = a.add_trunc_mixed::<2, 2>(&b);
+        assert_eq!(c.to_i128(), Some(300));
+    }
+
+    #[test]
+    fn fmadd_trunc() {
+        let a = S64::from_i64(3);
+        let b = S64::from_i64(4);
+        let mut acc = S128::from_i128(10);
+        a.fmadd_trunc::<1, 2>(&b, &mut acc);
+        assert_eq!(acc.to_i128(), Some(22)); // 10 + 3*4
+    }
+
+    #[test]
+    fn serialization_roundtrip() {
+        use ark_serialize::{CanonicalDeserialize, CanonicalSerialize};
+
+        let val = S128::from_i128(-999_999);
+        let mut bytes = Vec::new();
+        val.serialize_compressed(&mut bytes).unwrap();
+        let restored = S128::deserialize_compressed(&bytes[..]).unwrap();
+        assert_eq!(val, restored);
+    }
+
+    #[test]
+    fn s64_serialization_roundtrip() {
+        use ark_serialize::{CanonicalDeserialize, CanonicalSerialize};
+
+        for &v in &[0i64, 1, -1, i64::MAX, i64::MIN] {
+            let val = S64::from_i64(v);
+            let mut bytes = Vec::new();
+            val.serialize_compressed(&mut bytes).unwrap();
+            let restored = S64::deserialize_compressed(&bytes[..]).unwrap();
+            assert_eq!(val, restored);
+        }
+    }
+
+    #[test]
+    fn s128_to_i128_out_of_range() {
+        // Magnitude exceeding i128::MAX for positive
+        let big_positive = S128::new([0, 1u64 << 63], true);
+        assert_eq!(big_positive.to_i128(), None);
+
+        // Magnitude exceeding i128::MIN for negative (not exactly MIN)
+        let big_negative = S128::new([1, 1u64 << 63], false);
+        assert_eq!(big_negative.to_i128(), None);
+
+        // Exactly i128::MIN is representable
+        let min_val = S128::new([0, 1u64 << 63], false);
+        assert_eq!(min_val.to_i128(), Some(i128::MIN));
+    }
+
+    #[test]
+    fn fmadd_trunc_sign_flip() {
+        // Positive accumulator, subtract larger product → sign flips
+        let a = S64::from_i64(-10);
+        let b = S64::from_i64(5);
+        let mut acc = S128::from_i128(3);
+        a.fmadd_trunc::<1, 2>(&b, &mut acc);
+        // 3 + (-10 * 5) = 3 - 50 = -47
+        assert_eq!(acc.to_i128(), Some(-47));
+        assert!(!acc.is_positive);
+    }
+
+    #[test]
+    fn s64_from_diff_u64s_zero_zero() {
+        let d = S64::from_diff_u64s(0, 0);
+        assert!(d.is_positive);
+        assert!(d.is_zero());
+        assert_eq!(d.magnitude_as_u64(), 0);
+    }
+}
diff --git a/crates/jolt-field/src/signed/signed_bigint_hi32.rs b/crates/jolt-field/src/signed/signed_bigint_hi32.rs
new file mode 100644
index 000000000..6c8553913
--- /dev/null
+++ b/crates/jolt-field/src/signed/signed_bigint_hi32.rs
@@ -0,0 +1,641 @@
+//! Sign-magnitude big integer with `N * 64 + 32`-bit width.
+
+#[cfg(feature = "allocative")]
+use allocative::Allocative;
+
+use ark_serialize::{
+    CanonicalDeserialize, CanonicalSerialize, Compress, Read, SerializationError, Valid, Validate,
+    Write,
+};
+use core::cmp::Ordering;
+use core::ops::{Add, AddAssign, Mul, MulAssign, Neg, Sub, SubAssign};
+
+use super::{SignedBigInt, S128, S64};
+use crate::Limbs;
+
+/// Compact signed big-integer with width `N * 64 + 32` bits.
+///
+/// Uses `[u64; N]` for the low limbs and a `u32` for the high tail.
+/// This representation saves 4 bytes per value compared to using `N + 1`
+/// full 64-bit limbs, which matters when millions of these are stored
+/// in witness polynomials.
+///
+/// Zero is not normalized: a zero magnitude can have either sign.
+/// Structural equality distinguishes `+0` and `-0`, but ordering treats
+/// them as equal.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+#[cfg_attr(feature = "allocative", derive(Allocative))]
+pub struct SignedBigIntHi32<const N: usize> {
+    magnitude_lo: [u64; N],
+    magnitude_hi: u32,
+    is_positive: bool,
+}
+
+pub type S96 = SignedBigIntHi32<1>;
+pub type S160 = SignedBigIntHi32<2>;
+pub type S224 = SignedBigIntHi32<3>;
+
+impl<const N: usize> SignedBigIntHi32<N> {
+    pub const fn new(magnitude_lo: [u64; N], magnitude_hi: u32, is_positive: bool) -> Self {
+        Self {
+            magnitude_lo,
+            magnitude_hi,
+            is_positive,
+        }
+    }
+
+    pub const fn zero() -> Self {
+        Self {
+            magnitude_lo: [0; N],
+            magnitude_hi: 0,
+            is_positive: true,
+        }
+    }
+
+    pub fn one() -> Self {
+        let mut magnitude_lo = [0; N];
+        let magnitude_hi;
+        if N == 0 {
+            magnitude_hi = 1;
+        } else {
+            magnitude_lo[0] = 1;
+            magnitude_hi = 0;
+        }
+        Self {
+            magnitude_lo,
+            magnitude_hi,
+            is_positive: true,
+        }
+    }
+
+    pub const fn magnitude_lo(&self) -> &[u64; N] {
+        &self.magnitude_lo
+    }
+
+    pub const fn magnitude_hi(&self) -> u32 {
+        self.magnitude_hi
+    }
+
+    pub const fn is_positive(&self) -> bool {
+        self.is_positive
+    }
+
+    pub const fn is_zero(&self) -> bool {
+        let mut lo_is_zero = true;
+        let mut i = 0;
+        while i < N {
+            if self.magnitude_lo[i] != 0 {
+                lo_is_zero = false;
+                break;
+            }
+            i += 1;
+        }
+        self.magnitude_hi == 0 && lo_is_zero
+    }
+
+    fn compare_magnitudes(&self, other: &Self) -> Ordering {
+        if self.magnitude_hi != other.magnitude_hi {
+            return self.magnitude_hi.cmp(&other.magnitude_hi);
+        }
+        for i in (0..N).rev() {
+            if self.magnitude_lo[i] != other.magnitude_lo[i] {
+                return self.magnitude_lo[i].cmp(&other.magnitude_lo[i]);
+            }
+        }
+        Ordering::Equal
+    }
+
+    #[inline(always)]
+    fn add_assign_in_place(&mut self, rhs: &Self) {
+        if self.is_positive == rhs.is_positive {
+            let (lo, hi, _carry) = self.add_magnitudes_with_carry(rhs);
+            self.magnitude_lo = lo;
+            self.magnitude_hi = hi;
+        } else {
+            match self.compare_magnitudes(rhs) {
+                Ordering::Greater | Ordering::Equal => {
+                    let (lo, hi, _borrow) = self.sub_magnitudes_with_borrow(rhs);
+                    self.magnitude_lo = lo;
+                    self.magnitude_hi = hi;
+                }
+                Ordering::Less => {
+                    let (lo, hi, _borrow) = rhs.sub_magnitudes_with_borrow(self);
+                    self.magnitude_lo = lo;
+                    self.magnitude_hi = hi;
+                    self.is_positive = rhs.is_positive;
+                }
+            }
+        }
+    }
+
+    #[inline(always)]
+    fn sub_assign_in_place(&mut self, rhs: &Self) {
+        let neg_rhs = -*rhs;
+        self.add_assign_in_place(&neg_rhs);
+    }
+
+    #[inline(always)]
+    fn mul_assign_in_place(&mut self, rhs: &Self) {
+        let (lo, hi) = self.mul_magnitudes(rhs);
+        self.is_positive = self.is_positive == rhs.is_positive;
+        self.magnitude_lo = lo;
+        self.magnitude_hi = hi;
+    }
+
+    fn mul_magnitudes(&self, other: &Self) -> ([u64; N], u32) {
+        if N == 0 {
+            let a2 = self.magnitude_hi as u64;
+            let b2 = other.magnitude_hi as u64;
+            let prod = a2.wrapping_mul(b2);
+            let hi = (prod & 0xFFFF_FFFF) as u32;
+            let lo: [u64; N] = [0u64; N];
+            return (lo, hi);
+        }
+
+        if N == 1 {
+            let a0 = self.magnitude_lo[0];
+            let a1 = self.magnitude_hi as u64;
+            let b0 = other.magnitude_lo[0];
+            let b1 = other.magnitude_hi as u64;
+
+            let t0 = (a0 as u128) * (b0 as u128);
+            let lo0 = t0 as u64;
+            let cross = (t0 >> 64) + (a0 as u128) * (b1 as u128) + (a1 as u128) * (b0 as u128);
+            let hi = (cross as u64 & 0xFFFF_FFFF) as u32;
+            let mut lo = [0u64; N];
+            lo[0] = lo0;
+            return (lo, hi);
+        }
+
+        if N == 2 {
+            let a0 = self.magnitude_lo[0];
+            let a1 = self.magnitude_lo[1];
+            let a2 = self.magnitude_hi as u64;
+            let b0 = other.magnitude_lo[0];
+            let b1 = other.magnitude_lo[1];
+            let b2 = other.magnitude_hi as u64;
+
+            let t0 = (a0 as u128) * (b0 as u128);
+            let r0 = t0 as u64;
+            let carry0 = t0 >> 64;
+
+            let sum1 = carry0 + (a0 as u128) * (b1 as u128) + (a1 as u128) * (b0 as u128);
+            let r1 = sum1 as u64;
+            let carry1 = sum1 >> 64;
+
+            let sum2 = carry1
+                + (a0 as u128) * (b2 as u128)
+                + (a1 as u128) * (b1 as u128)
+                + (a2 as u128) * (b0 as u128);
+            let r2 = sum2 as u64;
+
+            let hi = (r2 & 0xFFFF_FFFF) as u32;
+            let mut lo = [0u64; N];
+            lo[0] = r0;
+            lo[1] = r1;
+            return (lo, hi);
+        }
+
+        // General path — reads limbs inline to avoid heap allocation.
+        // Stack buffer covers up to N=7 (2*(7+1) = 16 entries).
+        let num_limbs = N + 1;
+        let mut prod = [0u64; 16];
+        debug_assert!(
+            2 * num_limbs <= prod.len(),
+            "N too large for stack-allocated product buffer"
+        );
+
+        let limb_a = |i: usize| -> u64 {
+            if i < N {
+                self.magnitude_lo[i]
+            } else {
+                self.magnitude_hi as u64
+            }
+        };
+        let limb_b = |j: usize| -> u64 {
+            if j < N {
+                other.magnitude_lo[j]
+            } else {
+                other.magnitude_hi as u64
+            }
+        };
+
+        for i in 0..num_limbs {
+            let a_limb = limb_a(i);
+            let mut carry: u128 = 0;
+            for j in 0..num_limbs {
+                let idx = i + j;
+                let p = (a_limb as u128) * (limb_b(j) as u128) + (prod[idx] as u128) + carry;
+                prod[idx] = p as u64;
+                carry = p >> 64;
+            }
+            if carry > 0 {
+                let spill = i + num_limbs;
+                if spill < prod.len() {
+                    prod[spill] = prod[spill].wrapping_add(carry as u64);
+                }
+            }
+        }
+
+        let mut magnitude_lo = [0u64; N];
+        magnitude_lo[..N].copy_from_slice(&prod[..N]);
+        let magnitude_hi = (prod[N] & 0xFFFF_FFFF) as u32;
+        (magnitude_lo, magnitude_hi)
+    }
+
+    fn add_magnitudes_with_carry(&self, other: &Self) -> ([u64; N], u32, bool) {
+        let mut magnitude_lo = [0; N];
+        let mut carry: u128 = 0;
+        for (i, out) in magnitude_lo.iter_mut().enumerate() {
+            let sum = (self.magnitude_lo[i] as u128) + (other.magnitude_lo[i] as u128) + carry;
+            *out = sum as u64;
+            carry = sum >> 64;
+        }
+        let sum_hi = (self.magnitude_hi as u128) + (other.magnitude_hi as u128) + carry;
+        let magnitude_hi = sum_hi as u32;
+        let final_carry = (sum_hi >> 32) != 0;
+        (magnitude_lo, magnitude_hi, final_carry)
+    }
+
+    fn sub_magnitudes_with_borrow(&self, other: &Self) -> ([u64; N], u32, bool) {
+        let mut magnitude_lo = [0u64; N];
+        let mut borrow = false;
+        for (i, out) in magnitude_lo.iter_mut().enumerate() {
+            let (d1, b1) = self.magnitude_lo[i].overflowing_sub(other.magnitude_lo[i]);
+            let (d2, b2) = d1.overflowing_sub(u64::from(borrow));
+            *out = d2;
+            borrow = b1 || b2;
+        }
+        let (hi1, b1) = self.magnitude_hi.overflowing_sub(other.magnitude_hi);
+        let (hi2, b2) = hi1.overflowing_sub(u32::from(borrow));
+        let final_borrow = b1 || b2;
+        (magnitude_lo, hi2, final_borrow)
+    }
+
+    /// Return the unsigned magnitude as `Limbs<NPLUS1>`.
+    /// Debug-asserts `NPLUS1 == N + 1`.
+    #[inline]
+    pub fn magnitude_as_limbs_nplus1<const NPLUS1: usize>(&self) -> Limbs<NPLUS1> {
+        debug_assert!(
+            NPLUS1 == N + 1,
+            "NPLUS1 must be N+1 for SignedBigIntHi32 magnitude pack"
+        );
+        let mut limbs = [0u64; NPLUS1];
+        if N > 0 {
+            limbs[..N].copy_from_slice(&self.magnitude_lo);
+        }
+        limbs[N] = self.magnitude_hi as u64;
+        Limbs::new(limbs)
+    }
+
+    #[inline]
+    pub fn zero_extend_from<const M: usize>(smaller: &SignedBigIntHi32<M>) -> SignedBigIntHi32<N> {
+        debug_assert!(
+            M <= N,
+            "cannot zero-extend: source has more limbs than destination"
+        );
+        if N == M {
+            let mut lo = [0u64; N];
+            if N > 0 {
+                lo.copy_from_slice(smaller.magnitude_lo());
+            }
+            return SignedBigIntHi32::<N>::new(lo, smaller.magnitude_hi(), smaller.is_positive());
+        }
+        // N > M: place hi32 into limb M
+        let mut lo = [0u64; N];
+        if M > 0 {
+            lo[..M].copy_from_slice(smaller.magnitude_lo());
+        }
+        lo[M] = smaller.magnitude_hi() as u64;
+        SignedBigIntHi32::<N>::new(lo, 0u32, smaller.is_positive())
+    }
+
+    /// Convert into a `SignedBigInt<NPLUS1>`.
+    /// Debug-asserts `NPLUS1 == N + 1`.
+    #[inline]
+    pub fn to_signed_bigint_nplus1<const NPLUS1: usize>(&self) -> SignedBigInt<NPLUS1> {
+        debug_assert!(
+            NPLUS1 == N + 1,
+            "to_signed_bigint_nplus1 requires NPLUS1 = N + 1"
+        );
+        let mut limbs = [0u64; NPLUS1];
+        if N > 0 {
+            limbs[..N].copy_from_slice(self.magnitude_lo());
+        }
+        limbs[N] = self.magnitude_hi() as u64;
+        SignedBigInt::from_limbs(Limbs::new(limbs), self.is_positive())
+    }
+}
+
+impl<const N: usize> Neg for SignedBigIntHi32<N> {
+    type Output = Self;
+    fn neg(self) -> Self::Output {
+        Self::new(self.magnitude_lo, self.magnitude_hi, !self.is_positive)
+    }
+}
+
+impl<const N: usize> Neg for &SignedBigIntHi32<N> {
+    type Output = SignedBigIntHi32<N>;
+    fn neg(self) -> Self::Output {
+        SignedBigIntHi32::new(self.magnitude_lo, self.magnitude_hi, !self.is_positive)
+    }
+}
+
+super::impl_signed_assign_ops!(SignedBigIntHi32 {
+    Add, AddAssign, add, add_assign => add_assign_in_place;
+    Sub, SubAssign, sub, sub_assign => sub_assign_in_place;
+    Mul, MulAssign, mul, mul_assign => mul_assign_in_place;
+});
+
+impl<const N: usize> PartialOrd for SignedBigIntHi32<N> {
+    #[inline]
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl<const N: usize> Ord for SignedBigIntHi32<N> {
+    #[inline]
+    fn cmp(&self, other: &Self) -> Ordering {
+        if self.is_zero() && other.is_zero() {
+            return Ordering::Equal;
+        }
+        match (self.is_positive, other.is_positive) {
+            (true, false) => Ordering::Greater,
+            (false, true) => Ordering::Less,
+            _ => {
+                let ord = self.compare_magnitudes(other);
+                if self.is_positive {
+                    ord
+                } else {
+                    ord.reverse()
+                }
+            }
+        }
+    }
+}
+
+impl<const N: usize> CanonicalSerialize for SignedBigIntHi32<N> {
+    #[inline]
+    fn serialize_with_mode<W: Write>(
+        &self,
+        mut w: W,
+        compress: Compress,
+    ) -> Result<(), SerializationError> {
+        (self.is_positive as u8).serialize_with_mode(&mut w, compress)?;
+        self.magnitude_hi.serialize_with_mode(&mut w, compress)?;
+        for i in 0..N {
+            self.magnitude_lo[i].serialize_with_mode(&mut w, compress)?;
+        }
+        Ok(())
+    }
+
+    #[inline]
+    fn serialized_size(&self, compress: Compress) -> usize {
+        (self.is_positive as u8).serialized_size(compress)
+            + self.magnitude_hi.serialized_size(compress)
+            + (0u64).serialized_size(compress) * N
+    }
+}
+
+impl<const N: usize> CanonicalDeserialize for SignedBigIntHi32<N> {
+    #[inline]
+    fn deserialize_with_mode<R: Read>(
+        mut r: R,
+        compress: Compress,
+        validate: Validate,
+    ) -> Result<Self, SerializationError> {
+        let sign_u8 = u8::deserialize_with_mode(&mut r, compress, validate)?;
+        let hi = u32::deserialize_with_mode(&mut r, compress, validate)?;
+        let mut lo = [0u64; N];
+        for limb in &mut lo {
+            *limb = u64::deserialize_with_mode(&mut r, compress, validate)?;
+        }
+        Ok(SignedBigIntHi32::new(lo, hi, sign_u8 != 0))
+    }
+}
+
+impl<const N: usize> Valid for SignedBigIntHi32<N> {
+    #[inline]
+    fn check(&self) -> Result<(), SerializationError> {
+        Ok(())
+    }
+}
+
+impl From<i64> for S96 {
+    #[inline]
+    fn from(val: i64) -> Self {
+        Self::new([val.unsigned_abs()], 0, val.is_positive())
+    }
+}
+
+impl From<u64> for S96 {
+    #[inline]
+    fn from(val: u64) -> Self {
+        Self::new([val], 0, true)
+    }
+}
+
+impl From<S64> for S96 {
+    #[inline]
+    fn from(val: S64) -> Self {
+        Self::new([val.magnitude.0[0]], 0, val.is_positive)
+    }
+}
+
+impl From<i64> for S160 {
+    #[inline]
+    fn from(val: i64) -> Self {
+        Self::new([val.unsigned_abs(), 0], 0, val.is_positive())
+    }
+}
+
+impl From<u64> for S160 {
+    #[inline]
+    fn from(val: u64) -> Self {
+        Self::new([val, 0], 0, true)
+    }
+}
+
+impl From<S64> for S160 {
+    #[inline]
+    fn from(val: S64) -> Self {
+        Self::new([val.magnitude.0[0], 0], 0, val.is_positive)
+    }
+}
+
+impl From<u128> for S160 {
+    #[inline]
+    fn from(val: u128) -> Self {
+        let lo = val as u64;
+        let hi = (val >> 64) as u64;
+        Self::new([lo, hi], 0, true)
+    }
+}
+
+impl From<i128> for S160 {
+    #[inline]
+    fn from(val: i128) -> Self {
+        let is_positive = val.is_positive();
+        let mag = val.unsigned_abs();
+        let lo = mag as u64;
+        let hi = (mag >> 64) as u64;
+        Self::new([lo, hi], 0, is_positive)
+    }
+}
+
+impl From<S128> for S160 {
+    #[inline]
+    fn from(val: S128) -> Self {
+        Self::new([val.magnitude.0[0], val.magnitude.0[1]], 0, val.is_positive)
+    }
+}
+
+impl<const N: usize> From<S224> for Limbs<N> {
+    #[inline]
+    #[allow(unsafe_code)]
+    fn from(val: S224) -> Self {
+        assert!(
+            N == 4,
+            "From<S224> for Limbs<N> only supports N=4, got N={N}"
+        );
+        let lo = val.magnitude_lo();
+        let hi = val.magnitude_hi() as u64;
+        let limbs4 = Limbs::<4>([lo[0], lo[1], lo[2], hi]);
+
+        // SAFETY: Limbs<4> and Limbs<N> have identical layout when N=4
+        // (asserted above).
+        unsafe { (&raw const limbs4).cast::<Limbs<N>>().read() }
+    }
+}
+
+impl S160 {
+    /// Computes the signed difference `a - b` as an `S160`.
+    #[inline]
+    pub fn from_diff_u64(a: u64, b: u64) -> Self {
+        let mag = a.abs_diff(b);
+        let is_positive = a >= b;
+        S160::new([mag, 0], 0, is_positive)
+    }
+
+    /// Creates an `S160` from a `u128` magnitude and sign.
+    #[inline]
+    pub fn from_magnitude_u128(mag: u128, is_positive: bool) -> Self {
+        let lo = mag as u64;
+        let hi = (mag >> 64) as u64;
+        S160::new([lo, hi], 0, is_positive)
+    }
+
+    /// Computes the signed difference `u1 - u2` as an `S160`.
+    #[inline]
+    pub fn from_diff_u128(u1: u128, u2: u128) -> Self {
+        if u1 >= u2 {
+            S160::from_magnitude_u128(u1 - u2, true)
+        } else {
+            S160::from_magnitude_u128(u2 - u1, false)
+        }
+    }
+
+    /// Computes `u1 + u2` as an `S160`, handling carry into the hi32 limb.
+    #[inline]
+    pub fn from_sum_u128(u1: u128, u2: u128) -> Self {
+        let u1_lo = u1 as u64;
+        let u1_hi = (u1 >> 64) as u64;
+        let u2_lo = u2 as u64;
+        let u2_hi = (u2 >> 64) as u64;
+        let (sum_lo, carry0) = u1_lo.overflowing_add(u2_lo);
+        let (sum_hi1, carry1) = u1_hi.overflowing_add(u2_hi);
+        let (sum_hi, carry2) = sum_hi1.overflowing_add(u64::from(carry0));
+        let carry_out = carry1 || carry2;
+        S160::new([sum_lo, sum_hi], u32::from(carry_out), true)
+    }
+
+    /// Computes `u - i` as an `S160`.
+    #[inline]
+    pub fn from_u128_minus_i128(u: u128, i: i128) -> Self {
+        if i >= 0 {
+            S160::from_diff_u128(u, i as u128)
+        } else {
+            let abs_i: u128 = i.unsigned_abs();
+            S160::from_sum_u128(u, abs_i)
+        }
+    }
+}
+
+impl Default for S160 {
+    fn default() -> Self {
+        Self::zero()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn s160_from_diff_u64() {
+        let d = S160::from_diff_u64(10, 3);
+        assert!(d.is_positive());
+        assert_eq!(d.magnitude_lo()[0], 7);
+
+        let d2 = S160::from_diff_u64(3, 10);
+        assert!(!d2.is_positive());
+        assert_eq!(d2.magnitude_lo()[0], 7);
+    }
+
+    #[test]
+    fn s160_addition() {
+        let a = S160::from(100u64);
+        let b = S160::from(200u64);
+        let c = a + b;
+        assert!(c.is_positive());
+        assert_eq!(c.magnitude_lo()[0], 300);
+    }
+
+    #[test]
+    fn s160_subtraction() {
+        let a = S160::from(100u64);
+        let b = S160::from(200u64);
+        let c = a - b;
+        assert!(!c.is_positive());
+        assert_eq!(c.magnitude_lo()[0], 100);
+    }
+
+    #[test]
+    fn s160_to_signed_bigint() {
+        let v = S160::new([42, 0], 7, false);
+        let sb: SignedBigInt<3> = v.to_signed_bigint_nplus1::<3>();
+        assert!(!sb.is_positive);
+        assert_eq!(sb.magnitude.0[0], 42);
+        assert_eq!(sb.magnitude.0[1], 0);
+        assert_eq!(sb.magnitude.0[2], 7);
+    }
+
+    #[test]
+    fn serialization_roundtrip() {
+        let val = S160::new([123_456_789, 987_654_321], 42, false);
+        let mut bytes = Vec::new();
+        val.serialize_compressed(&mut bytes).unwrap();
+        let restored = S160::deserialize_compressed(&bytes[..]).unwrap();
+        assert_eq!(val, restored);
+    }
+
+    #[test]
+    fn s160_from_u128_minus_i128() {
+        let v = S160::from_u128_minus_i128(100, -50);
+        assert!(v.is_positive());
+        assert_eq!(v.magnitude_lo()[0], 150);
+
+        let v2 = S160::from_u128_minus_i128(100, 150);
+        assert!(!v2.is_positive());
+        assert_eq!(v2.magnitude_lo()[0], 50);
+    }
+
+    #[test]
+    fn zero_extend() {
+        let s = S96::from(42u64);
+        let wide: S160 = SignedBigIntHi32::zero_extend_from(&s);
+        assert!(wide.is_positive());
+        assert_eq!(wide.magnitude_lo()[0], 42);
+    }
+}
diff --git a/crates/jolt-field/tests/coverage.rs b/crates/jolt-field/tests/coverage.rs
new file mode 100644
index 000000000..5b9d2c6c0
--- /dev/null
+++ b/crates/jolt-field/tests/coverage.rs
@@ -0,0 +1,1058 @@
+//! Targeted tests to improve code coverage across the jolt-field crate.
+//!
+//! Covers: NaiveAccumulator, WideAccumulator, OptimizedMul blanket impl,
+//! Field default methods, SignedBigInt uncovered paths,
+//! SignedBigIntHi32 uncovered paths, and macro-generated operator variants.
+
+use ark_serialize::{CanonicalDeserialize, CanonicalSerialize};
+use ark_std::test_rng;
+use jolt_field::signed::*;
+use jolt_field::{Field, FieldAccumulator, Fr, Limbs, NaiveAccumulator, OptimizedMul};
+use num_traits::{One, Zero};
+
+#[test]
+fn naive_accumulator_fmadd() {
+    let a = <Fr as Field>::from_u64(7);
+    let b = <Fr as Field>::from_u64(11);
+    let c = <Fr as Field>::from_u64(3);
+    let d = <Fr as Field>::from_u64(5);
+
+    let mut acc = NaiveAccumulator::<Fr>::default();
+    acc.fmadd(a, b);
+    acc.fmadd(c, d);
+    // 7*11 + 3*5 = 77 + 15 = 92
+    assert_eq!(acc.reduce(), <Fr as Field>::from_u64(92));
+}
+
+#[test]
+fn naive_accumulator_merge() {
+    let mut acc1 = NaiveAccumulator::<Fr>::default();
+    acc1.fmadd(<Fr as Field>::from_u64(2), <Fr as Field>::from_u64(3));
+
+    let mut acc2 = NaiveAccumulator::<Fr>::default();
+    acc2.fmadd(<Fr as Field>::from_u64(4), <Fr as Field>::from_u64(5));
+
+    acc1.merge(acc2);
+    // 2*3 + 4*5 = 6 + 20 = 26
+    assert_eq!(acc1.reduce(), <Fr as Field>::from_u64(26));
+}
+
+#[test]
+fn naive_accumulator_reduce_empty() {
+    let acc = NaiveAccumulator::<Fr>::default();
+    assert!(acc.reduce().is_zero());
+}
+
+#[test]
+fn wide_accumulator_fmadd() {
+    use jolt_field::WideAccumulator;
+
+    let a = <Fr as Field>::from_u64(13);
+    let b = <Fr as Field>::from_u64(17);
+
+    let mut acc = WideAccumulator::default();
+    acc.fmadd(a, b);
+    assert_eq!(acc.reduce(), <Fr as Field>::from_u64(13 * 17));
+}
+
+#[test]
+fn wide_accumulator_merge() {
+    use jolt_field::WideAccumulator;
+
+    let mut acc1 = WideAccumulator::default();
+    acc1.fmadd(<Fr as Field>::from_u64(10), <Fr as Field>::from_u64(20));
+
+    let mut acc2 = WideAccumulator::default();
+    acc2.fmadd(<Fr as Field>::from_u64(30), <Fr as Field>::from_u64(40));
+
+    acc1.merge(acc2);
+    // 10*20 + 30*40 = 200 + 1200 = 1400
+    assert_eq!(acc1.reduce(), <Fr as Field>::from_u64(1400));
+}
+
+#[test]
+fn wide_accumulator_reduce_empty() {
+    use jolt_field::WideAccumulator;
+
+    let acc = WideAccumulator::default();
+    assert!(acc.reduce().is_zero());
+}
+
+#[test]
+fn wide_accumulator_many_fmadds() {
+    use jolt_field::WideAccumulator;
+
+    let mut acc = WideAccumulator::default();
+    let mut expected = Fr::zero();
+    let mut rng = test_rng();
+    for _ in 0..500 {
+        let a: Fr = Field::random(&mut rng);
+        let b: Fr = Field::random(&mut rng);
+        acc.fmadd(a, b);
+        expected += a * b;
+    }
+    assert_eq!(acc.reduce(), expected);
+}
+
+#[test]
+fn optimized_mul_blanket_impl() {
+    let mut rng = test_rng();
+    let a: Fr = Field::random(&mut rng);
+    let b: Fr = Field::random(&mut rng);
+
+    // mul_0_optimized: both nonzero
+    assert_eq!(a.mul_0_optimized(b), a * b);
+
+    // mul_0_optimized: first is zero
+    assert!(Fr::zero().mul_0_optimized(b).is_zero());
+
+    // mul_0_optimized: second is zero
+    assert!(a.mul_0_optimized(Fr::zero()).is_zero());
+
+    // mul_1_optimized: first is one
+    assert_eq!(Fr::one().mul_1_optimized(b), b);
+
+    // mul_1_optimized: second is one
+    assert_eq!(a.mul_1_optimized(Fr::one()), a);
+
+    // mul_1_optimized: neither is one
+    assert_eq!(a.mul_1_optimized(b), a * b);
+
+    // mul_01_optimized: zero path
+    assert!(Fr::zero().mul_01_optimized(b).is_zero());
+    assert!(a.mul_01_optimized(Fr::zero()).is_zero());
+
+    // mul_01_optimized: one path
+    assert_eq!(Fr::one().mul_01_optimized(b), b);
+    assert_eq!(a.mul_01_optimized(Fr::one()), a);
+
+    // mul_01_optimized: general path
+    assert_eq!(a.mul_01_optimized(b), a * b);
+}
+
+#[test]
+fn field_from_bool_edge() {
+    assert_eq!(<Fr as Field>::from_bool(true), Fr::one());
+    assert_eq!(<Fr as Field>::from_bool(false), Fr::zero());
+}
+
+#[test]
+fn field_from_small_types_boundary() {
+    assert_eq!(<Fr as Field>::from_u8(0), Fr::zero());
+    assert_eq!(<Fr as Field>::from_u8(255), <Fr as Field>::from_u64(255));
+    assert_eq!(<Fr as Field>::from_u16(0), Fr::zero());
+    assert_eq!(
+        <Fr as Field>::from_u16(65535),
+        <Fr as Field>::from_u64(65535)
+    );
+    assert_eq!(<Fr as Field>::from_u32(0), Fr::zero());
+    assert_eq!(
+        <Fr as Field>::from_u32(u32::MAX),
+        <Fr as Field>::from_u64(u32::MAX as u64)
+    );
+}
+
+#[test]
+fn field_mul_pow_2_boundary() {
+    let f = <Fr as Field>::from_u64(1);
+    // pow=0 -> f * 1 = f
+    assert_eq!(<Fr as Field>::mul_pow_2(&f, 0), f);
+    // pow=1 -> f * 2
+    assert_eq!(<Fr as Field>::mul_pow_2(&f, 1), <Fr as Field>::from_u64(2));
+    // pow=64 -> goes through while loop at least once
+    let result = <Fr as Field>::mul_pow_2(&f, 64);
+    let mut expected = f;
+    for _ in 0..64 {
+        expected = expected + expected;
+    }
+    assert_eq!(result, expected);
+}
+
+#[test]
+#[should_panic(expected = "pow > 255")]
+fn field_mul_pow_2_overflow() {
+    let f = <Fr as Field>::from_u64(1);
+    let _ = <Fr as Field>::mul_pow_2(&f, 256);
+}
+
+#[test]
+fn signed_bigint_neg() {
+    let a = S64::from_i64(42);
+    let b = -a;
+    assert!(!b.is_positive);
+    assert_eq!(b.magnitude_as_u64(), 42);
+
+    let c = -b;
+    assert!(c.is_positive);
+}
+
+#[test]
+fn signed_bigint_from_u128() {
+    let v = 0xDEAD_BEEF_CAFE_BABEu128;
+    let s = S128::from_u128(v);
+    assert!(s.is_positive);
+    assert_eq!(s.magnitude_as_u128(), v);
+}
+
+#[test]
+fn signed_bigint_from_i128_positive() {
+    let v = 123_456_789_012_345_678i128;
+    let s = S128::from_i128(v);
+    assert!(s.is_positive);
+    assert_eq!(s.to_i128(), Some(v));
+}
+
+#[test]
+fn signed_bigint_from_i128_negative() {
+    let v = -123_456_789_012_345_678i128;
+    let s = S128::from_i128(v);
+    assert!(!s.is_positive);
+    assert_eq!(s.to_i128(), Some(v));
+}
+
+#[test]
+fn signed_bigint_from_u128_trait() {
+    let v = 42u128;
+    let s: S128 = v.into();
+    assert!(s.is_positive);
+    assert_eq!(s.magnitude_as_u128(), 42);
+}
+
+#[test]
+fn signed_bigint_from_i128_trait() {
+    let s: S128 = (-99i128).into();
+    assert!(!s.is_positive);
+    assert_eq!(s.to_i128(), Some(-99));
+}
+
+#[test]
+fn signed_bigint_sub_trunc() {
+    // Same sign, |self| > |rhs|
+    let a = S128::from_i128(100);
+    let b = S128::from_i128(30);
+    let c: S128 = a.sub_trunc::<2>(&b);
+    assert_eq!(c.to_i128(), Some(70));
+
+    // Same sign, |self| < |rhs| => sign flips
+    let d: S128 = b.sub_trunc::<2>(&a);
+    assert_eq!(d.to_i128(), Some(-70));
+
+    // Different signs: positive - negative = add magnitudes
+    let e = S128::from_i128(50);
+    let f = S128::from_i128(-30);
+    let g: S128 = e.sub_trunc::<2>(&f);
+    assert_eq!(g.to_i128(), Some(80));
+}
+
+#[test]
+fn signed_bigint_sub_trunc_mixed() {
+    // Same sign, |self| > |rhs|
+    let a = S128::from_i128(100);
+    let b = S64::from_i64(30);
+    let c: S128 = a.sub_trunc_mixed::<1, 2>(&b);
+    assert_eq!(c.to_i128(), Some(70));
+
+    // Same sign, |self| < |rhs|
+    let d = S64::from_i64(30);
+    let e = S128::from_i128(100);
+    let f: S128 = d.sub_trunc_mixed::<2, 2>(&e);
+    assert_eq!(f.to_i128(), Some(-70));
+
+    // Different signs
+    let g = S128::from_i128(50);
+    let h = S64::from_i64(-20);
+    let i: S128 = g.sub_trunc_mixed::<1, 2>(&h);
+    assert_eq!(i.to_i128(), Some(70));
+}
+
+#[test]
+fn signed_bigint_mul_trunc_widths() {
+    // S64 * S128 -> S128
+    let a = S64::from_i64(-7);
+    let b = S128::from_i128(11);
+    let c: S128 = a.mul_trunc::<2, 2>(&b);
+    assert_eq!(c.to_i128(), Some(-77));
+
+    // S128 * S128 -> S256
+    let d = S128::from_i128(1_000_000);
+    let e = S128::from_i128(-2_000_000);
+    let f: S256 = d.mul_trunc::<2, 4>(&e);
+    assert!(!f.is_positive);
+}
+
+#[test]
+fn signed_bigint_s256_serialization() {
+    let val = S256::new([1, 2, 3, 4], false);
+    let mut bytes = Vec::new();
+    val.serialize_compressed(&mut bytes).unwrap();
+    let restored = S256::deserialize_compressed(&bytes[..]).unwrap();
+    assert_eq!(val, restored);
+}
+
+#[test]
+fn signed_bigint_s192_serialization() {
+    let val = S192::new([u64::MAX, 0, 42], true);
+    let mut bytes = Vec::new();
+    val.serialize_compressed(&mut bytes).unwrap();
+    let restored = S192::deserialize_compressed(&bytes[..]).unwrap();
+    assert_eq!(val, restored);
+}
+
+#[test]
+fn signed_bigint_from_u64_mul_i64() {
+    let r = S128::from_u64_mul_i64(100, -7);
+    assert_eq!(r.to_i128(), Some(-700));
+
+    let r2 = S128::from_u64_mul_i64(100, 7);
+    assert_eq!(r2.to_i128(), Some(700));
+}
+
+#[test]
+fn signed_bigint_from_i64_mul_u64() {
+    let r = S128::from_i64_mul_u64(-3, 100);
+    assert_eq!(r.to_i128(), Some(-300));
+}
+
+#[test]
+fn signed_bigint_ordering_negative_magnitudes() {
+    // Both negative: larger magnitude = smaller value
+    let a = S64::from_i64(-10);
+    let b = S64::from_i64(-5);
+    assert!(a < b);
+
+    // Both positive: larger magnitude = larger value
+    let c = S64::from_i64(10);
+    let d = S64::from_i64(5);
+    assert!(c > d);
+}
+
+#[test]
+fn s96_arithmetic() {
+    let a = S96::from(10i64);
+    let b = S96::from(3i64);
+
+    let sum = a + b;
+    assert!(sum.is_positive());
+    assert_eq!(sum.magnitude_lo()[0], 13);
+
+    let diff = a - b;
+    assert!(diff.is_positive());
+    assert_eq!(diff.magnitude_lo()[0], 7);
+
+    let prod = a * b;
+    assert!(prod.is_positive());
+    assert_eq!(prod.magnitude_lo()[0], 30);
+}
+
+#[test]
+fn s96_from_negative() {
+    let a = S96::from(-5i64);
+    assert!(!a.is_positive());
+    assert_eq!(a.magnitude_lo()[0], 5);
+}
+
+#[test]
+fn s96_from_s64() {
+    let s = S64::from_i64(-42);
+    let wide = S96::from(s);
+    assert!(!wide.is_positive());
+    assert_eq!(wide.magnitude_lo()[0], 42);
+}
+
+#[test]
+fn s224_operations() {
+    let a = S224::new([1, 0, 0], 0, true);
+    let b = S224::new([2, 0, 0], 0, true);
+    let sum = a + b;
+    assert!(sum.is_positive());
+    assert_eq!(sum.magnitude_lo()[0], 3);
+
+    let diff = a - b;
+    assert!(!diff.is_positive());
+    assert_eq!(diff.magnitude_lo()[0], 1);
+
+    let prod = a * b;
+    assert!(prod.is_positive());
+    assert_eq!(prod.magnitude_lo()[0], 2);
+}
+
+#[test]
+fn s224_to_limbs4() {
+    let v = S224::new([0xAAAA, 0xBBBB, 0xCCCC], 0xDD, true);
+    let limbs: Limbs<4> = v.into();
+    assert_eq!(limbs.0[0], 0xAAAA);
+    assert_eq!(limbs.0[1], 0xBBBB);
+    assert_eq!(limbs.0[2], 0xCCCC);
+    assert_eq!(limbs.0[3], 0xDD);
+}
+
+#[test]
+fn magnitude_as_limbs_nplus1_s96() {
+    let v = S96::new([42], 7, true);
+    let limbs: Limbs<2> = v.magnitude_as_limbs_nplus1::<2>();
+    assert_eq!(limbs.0[0], 42);
+    assert_eq!(limbs.0[1], 7);
+}
+
+#[test]
+fn magnitude_as_limbs_nplus1_s160() {
+    let v = S160::new([1, 2], 3, false);
+    let limbs: Limbs<3> = v.magnitude_as_limbs_nplus1::<3>();
+    assert_eq!(limbs.0[0], 1);
+    assert_eq!(limbs.0[1], 2);
+    assert_eq!(limbs.0[2], 3);
+}
+
+#[test]
+fn magnitude_as_limbs_nplus1_s224() {
+    let v = S224::new([10, 20, 30], 40, true);
+    let limbs: Limbs<4> = v.magnitude_as_limbs_nplus1::<4>();
+    assert_eq!(limbs.0[0], 10);
+    assert_eq!(limbs.0[1], 20);
+    assert_eq!(limbs.0[2], 30);
+    assert_eq!(limbs.0[3], 40);
+}
+
+#[test]
+fn zero_extend_from_s96_to_s160() {
+    let s = S96::new([42], 7, false);
+    let wide: S160 = SignedBigIntHi32::zero_extend_from(&s);
+    assert!(!wide.is_positive());
+    // When N > M, hi32 is placed into limb M as u64, new hi32 = 0
+    assert_eq!(wide.magnitude_lo()[0], 42);
+    assert_eq!(wide.magnitude_lo()[1], 7);
+    assert_eq!(wide.magnitude_hi(), 0);
+}
+
+#[test]
+fn zero_extend_from_s96_to_s96() {
+    // N == M case
+    let s = S96::new([42], 7, true);
+    let same: S96 = SignedBigIntHi32::zero_extend_from(&s);
+    assert_eq!(same.magnitude_lo()[0], 42);
+    assert_eq!(same.magnitude_hi(), 7);
+    assert!(same.is_positive());
+}
+
+#[test]
+fn s160_ordering() {
+    let a = S160::from(100u64);
+    let b = S160::from(200u64);
+    assert!(a < b);
+
+    let c = S160::new([0, 0], 0, true); // positive zero
+    let d = S160::new([0, 0], 0, false); // negative zero
+    assert_eq!(c.cmp(&d), std::cmp::Ordering::Equal);
+
+    // Positive > Negative
+    let pos = S160::from(1u64);
+    let neg = S160::new([1, 0], 0, false);
+    assert!(pos > neg);
+}
+
+#[test]
+fn s160_ordering_negative_magnitudes() {
+    // Both negative: larger magnitude = smaller value
+    let a = S160::new([10, 0], 0, false);
+    let b = S160::new([5, 0], 0, false);
+    assert!(a < b);
+}
+
+#[test]
+fn s160_ordering_hi32_tiebreak() {
+    let a = S160::new([0, 0], 1, true);
+    let b = S160::new([0, 0], 2, true);
+    assert!(a < b);
+}
+
+#[test]
+fn s160_from_sum_u128() {
+    let a = u128::MAX / 2;
+    let b = u128::MAX / 2;
+    let s = S160::from_sum_u128(a, b);
+    assert!(s.is_positive());
+    // No overflow into hi32 for this case
+    assert_eq!(s.magnitude_hi(), 0);
+
+    // Force carry into hi32
+    let s2 = S160::from_sum_u128(u128::MAX, 1);
+    assert!(s2.is_positive());
+    assert_eq!(s2.magnitude_hi(), 1);
+    assert_eq!(s2.magnitude_lo()[0], 0);
+    assert_eq!(s2.magnitude_lo()[1], 0);
+}
+
+#[test]
+fn s160_from_diff_u128() {
+    let a = S160::from_diff_u128(100, 200);
+    assert!(!a.is_positive());
+    assert_eq!(a.magnitude_lo()[0], 100);
+
+    let b = S160::from_diff_u128(200, 100);
+    assert!(b.is_positive());
+    assert_eq!(b.magnitude_lo()[0], 100);
+}
+
+#[test]
+fn s160_from_magnitude_u128() {
+    let s = S160::from_magnitude_u128(0xDEAD_BEEF_CAFE_BABEu128, false);
+    assert!(!s.is_positive());
+    assert_eq!(s.magnitude_lo()[0], 0xDEAD_BEEF_CAFE_BABEu128 as u64);
+    assert_eq!(
+        s.magnitude_lo()[1],
+        (0xDEAD_BEEF_CAFE_BABEu128 >> 64) as u64
+    );
+}
+
+#[test]
+fn s160_from_u128_minus_i128_negative_i() {
+    // u - (-i) = u + |i| (sum path)
+    let v = S160::from_u128_minus_i128(100, -50);
+    assert!(v.is_positive());
+    assert_eq!(v.magnitude_lo()[0], 150);
+}
+
+#[test]
+fn s160_from_u128_minus_i128_positive_i_larger() {
+    // u - i where i > u (diff path, negative result)
+    let v = S160::from_u128_minus_i128(10, 100);
+    assert!(!v.is_positive());
+    assert_eq!(v.magnitude_lo()[0], 90);
+}
+
+#[test]
+fn s160_serialization_roundtrip() {
+    let val = S160::new([u64::MAX, 123_456], 0xABCD, false);
+    let mut bytes = Vec::new();
+    val.serialize_compressed(&mut bytes).unwrap();
+    let restored = S160::deserialize_compressed(&bytes[..]).unwrap();
+    assert_eq!(val, restored);
+}
+
+#[test]
+fn s96_serialization_roundtrip() {
+    let val = S96::new([42], 7, true);
+    let mut bytes = Vec::new();
+    val.serialize_compressed(&mut bytes).unwrap();
+    let restored = S96::deserialize_compressed(&bytes[..]).unwrap();
+    assert_eq!(val, restored);
+}
+
+#[test]
+fn s224_serialization_roundtrip() {
+    let val = S224::new([1, 2, 3], 4, false);
+    let mut bytes = Vec::new();
+    val.serialize_compressed(&mut bytes).unwrap();
+    let restored = S224::deserialize_compressed(&bytes[..]).unwrap();
+    assert_eq!(val, restored);
+}
+
+#[test]
+fn signed_bigint_hi32_neg() {
+    let a = S160::from(42u64);
+    let b = -a;
+    assert!(!b.is_positive());
+    assert_eq!(b.magnitude_lo()[0], 42);
+
+    // Neg for &SignedBigIntHi32
+    let c = -(&a);
+    assert!(!c.is_positive());
+    assert_eq!(c.magnitude_lo()[0], 42);
+}
+
+#[test]
+fn signed_bigint_hi32_one() {
+    let one = S96::one();
+    assert!(one.is_positive());
+    assert_eq!(one.magnitude_lo()[0], 1);
+    assert_eq!(one.magnitude_hi(), 0);
+}
+
+#[test]
+fn signed_bigint_hi32_is_zero() {
+    let z = S160::zero();
+    assert!(z.is_zero());
+
+    let nz = S160::from(1u64);
+    assert!(!nz.is_zero());
+}
+
+#[test]
+fn s160_from_i128() {
+    let pos: S160 = 42i128.into();
+    assert!(pos.is_positive());
+    assert_eq!(pos.magnitude_lo()[0], 42);
+
+    let neg: S160 = (-42i128).into();
+    assert!(!neg.is_positive());
+    assert_eq!(neg.magnitude_lo()[0], 42);
+}
+
+#[test]
+fn s160_from_u128() {
+    let v: S160 = 0xDEAD_BEEF_CAFE_BABEu128.into();
+    assert!(v.is_positive());
+    assert_eq!(v.magnitude_lo()[0], 0xDEAD_BEEF_CAFE_BABEu128 as u64);
+}
+
+#[test]
+fn s160_from_s128() {
+    let s = S128::from_i128(-999);
+    let wide = S160::from(s);
+    assert!(!wide.is_positive());
+    assert_eq!(wide.magnitude_lo()[0], 999);
+}
+
+#[test]
+#[allow(clippy::op_ref)]
+fn signed_bigint_operator_variants() {
+    let a = S64::from_i64(10);
+    let b = S64::from_i64(3);
+
+    // val-val
+    let _ = a + b;
+    let _ = a - b;
+    let _ = a * b;
+
+    // val-ref
+    let _ = a + &b;
+    let _ = a - &b;
+    let _ = a * &b;
+
+    // ref-ref
+    let _ = &a + &b;
+    let _ = &a - &b;
+    let _ = &a * &b;
+
+    // OpAssign-val
+    let mut c = a;
+    c += b;
+    assert_eq!(c, S64::from_i64(13));
+    c -= b;
+    assert_eq!(c, S64::from_i64(10));
+    c *= b;
+    assert_eq!(c, S64::from_i64(30));
+
+    // OpAssign-ref
+    let mut d = a;
+    d += &b;
+    assert_eq!(d, S64::from_i64(13));
+    d -= &b;
+    assert_eq!(d, S64::from_i64(10));
+    d *= &b;
+    assert_eq!(d, S64::from_i64(30));
+}
+
+#[test]
+#[allow(clippy::op_ref)]
+fn signed_bigint_hi32_operator_variants() {
+    let a = S160::from(10u64);
+    let b = S160::from(3u64);
+
+    // val-val
+    let _ = a + b;
+    let _ = a - b;
+    let _ = a * b;
+
+    // val-ref
+    let _ = a + &b;
+    let _ = a - &b;
+    let _ = a * &b;
+
+    // ref-ref
+    let _ = &a + &b;
+    let _ = &a - &b;
+    let _ = &a * &b;
+
+    // OpAssign-val
+    let mut c = a;
+    c += b;
+    assert!(c.is_positive());
+    assert_eq!(c.magnitude_lo()[0], 13);
+    c -= b;
+    assert_eq!(c.magnitude_lo()[0], 10);
+    c *= b;
+    assert_eq!(c.magnitude_lo()[0], 30);
+
+    // OpAssign-ref
+    let mut d = a;
+    d += &b;
+    assert_eq!(d.magnitude_lo()[0], 13);
+    d -= &b;
+    assert_eq!(d.magnitude_lo()[0], 10);
+    d *= &b;
+    assert_eq!(d.magnitude_lo()[0], 30);
+}
+
+#[test]
+fn s96_mul_magnitudes_n1() {
+    // N=1 specialization: single lo limb + hi32
+    let a = S96::new([u64::MAX], 0, true);
+    let b = S96::new([2], 0, true);
+    let prod = a * b;
+    // u64::MAX * 2 = 0x1_FFFF_FFFE, truncated to 96 bits
+    assert!(prod.is_positive());
+}
+
+#[test]
+fn s160_mul_magnitudes_n2() {
+    // N=2 specialization
+    let a = S160::new([3, 0], 0, true);
+    let b = S160::new([7, 0], 0, true);
+    let prod = a * b;
+    assert_eq!(prod.magnitude_lo()[0], 21);
+    assert!(prod.is_positive());
+}
+
+#[test]
+fn s224_mul_magnitudes_n3_general() {
+    // N=3: general path (N >= 3)
+    let a = S224::new([2, 0, 0], 0, true);
+    let b = S224::new([3, 0, 0], 0, true);
+    let prod = a * b;
+    assert_eq!(prod.magnitude_lo()[0], 6);
+    assert!(prod.is_positive());
+}
+
+#[test]
+fn s160_sub_smaller_from_larger() {
+    // Tests the sign-flip path in sub_assign_in_place (via add_assign_in_place with neg)
+    let a = S160::from(3u64);
+    let b = S160::from(10u64);
+    let c = a - b;
+    assert!(!c.is_positive());
+    assert_eq!(c.magnitude_lo()[0], 7);
+}
+
+#[test]
+fn s160_add_opposite_signs_self_larger() {
+    let a = S160::new([10, 0], 0, true);
+    let b = S160::new([3, 0], 0, false);
+    let c = a + b;
+    assert!(c.is_positive());
+    assert_eq!(c.magnitude_lo()[0], 7);
+}
+
+#[test]
+fn s160_add_opposite_signs_rhs_larger() {
+    let a = S160::new([3, 0], 0, true);
+    let b = S160::new([10, 0], 0, false);
+    let c = a + b;
+    assert!(!c.is_positive());
+    assert_eq!(c.magnitude_lo()[0], 7);
+}
+
+#[test]
+fn s160_mul_mixed_signs() {
+    let a = S160::new([5, 0], 0, true);
+    let b = S160::new([3, 0], 0, false);
+    let prod = a * b;
+    assert!(!prod.is_positive());
+    assert_eq!(prod.magnitude_lo()[0], 15);
+
+    let prod2 = b * b;
+    assert!(prod2.is_positive());
+    assert_eq!(prod2.magnitude_lo()[0], 9);
+}
+
+#[test]
+fn s160_from_i64() {
+    let v: S160 = (-7i64).into();
+    assert!(!v.is_positive());
+    assert_eq!(v.magnitude_lo()[0], 7);
+    assert_eq!(v.magnitude_lo()[1], 0);
+    assert_eq!(v.magnitude_hi(), 0);
+}
+
+#[test]
+fn s160_from_u64() {
+    let v: S160 = 42u64.into();
+    assert!(v.is_positive());
+    assert_eq!(v.magnitude_lo()[0], 42);
+}
+
+#[test]
+fn s160_from_s64() {
+    let s = S64::from_i64(-99);
+    let v = S160::from(s);
+    assert!(!v.is_positive());
+    assert_eq!(v.magnitude_lo()[0], 99);
+}
+
+#[test]
+fn signed_bigint_add_opposite_signs_self_smaller() {
+    let a = S64::from_i64(3);
+    let b = S64::from_i64(-10);
+    let c = a + b;
+    assert!(!c.is_positive);
+    assert_eq!(c.magnitude_as_u64(), 7);
+}
+
+#[test]
+fn signed_bigint_sub_opposite_signs() {
+    // positive - negative = add magnitudes
+    let a = S64::from_i64(5);
+    let b = S64::from_i64(-3);
+    let c = a - b;
+    assert!(c.is_positive);
+    assert_eq!(c.magnitude_as_u64(), 8);
+}
+
+#[test]
+fn signed_bigint_sub_same_sign_smaller_magnitude() {
+    // Same sign, |self| < |rhs| => sign flips
+    let a = S64::from_i64(3);
+    let b = S64::from_i64(10);
+    let c = a - b;
+    assert!(!c.is_positive);
+    assert_eq!(c.magnitude_as_u64(), 7);
+}
+
+#[test]
+fn signed_bigint_add_trunc_mixed_opposite_signs_self_smaller() {
+    let a = S64::from_i64(3);
+    let b = S128::from_i128(-100);
+    let c: S128 = a.add_trunc_mixed::<2, 2>(&b);
+    assert_eq!(c.to_i128(), Some(-97));
+}
+
+#[test]
+fn signed_bigint_add_trunc_mixed_opposite_signs_self_larger() {
+    let a = S128::from_i128(100);
+    let b = S64::from_i64(-3);
+    let c: S128 = a.add_trunc_mixed::<1, 2>(&b);
+    assert_eq!(c.to_i128(), Some(97));
+}
+
+#[test]
+fn s96_add_with_carry_into_hi32() {
+    let a = S96::new([u64::MAX], 0, true);
+    let b = S96::new([1], 0, true);
+    let c = a + b;
+    assert!(c.is_positive());
+    assert_eq!(c.magnitude_lo()[0], 0);
+    assert_eq!(c.magnitude_hi(), 1);
+}
+
+#[test]
+fn s96_sub_with_borrow() {
+    let a = S96::new([0], 1, true);
+    let b = S96::new([1], 0, true);
+    let c = a - b;
+    assert!(c.is_positive());
+    assert_eq!(c.magnitude_lo()[0], u64::MAX);
+    assert_eq!(c.magnitude_hi(), 0);
+}
+
+#[test]
+fn signed_bigint_hi32_default() {
+    let d = S160::default();
+    assert!(d.is_zero());
+    assert!(d.is_positive());
+}
+
+#[test]
+fn signed_bigint_zero_extend_s64_to_s256() {
+    let s = S64::from_i64(-7);
+    let wide: S256 = SignedBigInt::zero_extend_from(&s);
+    assert!(!wide.is_positive);
+    assert_eq!(wide.magnitude.0[0], 7);
+    assert_eq!(wide.magnitude.0[1], 0);
+    assert_eq!(wide.magnitude.0[2], 0);
+    assert_eq!(wide.magnitude.0[3], 0);
+}
+
+#[test]
+fn signed_bigint_default_is_zero() {
+    let d = S64::default();
+    assert!(d.is_zero());
+}
+
+#[test]
+fn signed_bigint_one() {
+    let o = S64::one();
+    assert!(o.is_positive);
+    assert_eq!(o.magnitude_as_u64(), 1);
+}
+
+#[test]
+fn signed_bigint_accessors() {
+    let s = S128::from_i128(-42);
+    assert!(!s.sign());
+    assert_eq!(s.magnitude_slice(), &[42, 0]);
+    assert_eq!(s.magnitude_limbs(), [42, 0]);
+    let _ = s.as_magnitude();
+}
+
+#[test]
+fn signed_bigint_negate() {
+    let s = S64::from_i64(10);
+    let n = s.negate();
+    assert!(!n.is_positive);
+    assert_eq!(n.magnitude_as_u64(), 10);
+}
+
+#[test]
+fn fr_from_bool() {
+    let t: Fr = From::from(true);
+    let f: Fr = From::from(false);
+    assert_eq!(t, Fr::one());
+    assert_eq!(f, Fr::zero());
+}
+
+#[test]
+fn fr_from_small_types() {
+    // Exercise the From<T> for Fr trait impls in bn254.rs
+    let from_u8: Fr = <Fr as From<u8>>::from(42);
+    assert_eq!(from_u8, Fr::from_u64(42));
+
+    let from_u16: Fr = <Fr as From<u16>>::from(1000);
+    assert_eq!(from_u16, Fr::from_u64(1000));
+
+    let from_u32: Fr = <Fr as From<u32>>::from(100_000);
+    assert_eq!(from_u32, Fr::from_u64(100_000));
+
+    let from_u64: Fr = <Fr as From<u64>>::from(123_456_789);
+    assert_eq!(from_u64, Fr::from_u64(123_456_789));
+
+    let from_i64: Fr = <Fr as From<i64>>::from(-42);
+    assert_eq!(from_i64, Fr::from_i64(-42));
+
+    let from_u128: Fr = <Fr as From<u128>>::from(999_999_999_999);
+    assert_eq!(from_u128, Fr::from_u128(999_999_999_999));
+
+    let from_i128: Fr = <Fr as From<i128>>::from(-999);
+    assert_eq!(from_i128, Fr::from_i128(-999));
+
+    let from_bool: Fr = <Fr as From<bool>>::from(true);
+    assert_eq!(from_bool, Fr::one());
+    let from_bool_f: Fr = <Fr as From<bool>>::from(false);
+    assert_eq!(from_bool_f, Fr::zero());
+}
+
+#[test]
+#[allow(clippy::op_ref)]
+fn fr_ref_arithmetic() {
+    let a = Fr::from_u64(7);
+    let b = Fr::from_u64(11);
+
+    // val op &ref
+    let sum_vr = a + &b;
+    assert_eq!(sum_vr, Fr::from_u64(18));
+    let diff_vr = a - &b;
+    assert_eq!(diff_vr, Fr::from_i64(-4));
+    let prod_vr = a * &b;
+    assert_eq!(prod_vr, Fr::from_u64(77));
+    let div_vr = a / &b;
+    assert_eq!(div_vr * b, a);
+
+    // &ref op val
+    let sum_rv = &a + b;
+    assert_eq!(sum_rv, Fr::from_u64(18));
+    let diff_rv = &a - b;
+    assert_eq!(diff_rv, Fr::from_i64(-4));
+    let prod_rv = &a * b;
+    assert_eq!(prod_rv, Fr::from_u64(77));
+    let div_rv = &a / b;
+    assert_eq!(div_rv * b, a);
+
+    // &ref op &ref
+    let sum_rr = &a + &b;
+    assert_eq!(sum_rr, Fr::from_u64(18));
+    let diff_rr = &a - &b;
+    assert_eq!(diff_rr, Fr::from_i64(-4));
+    let prod_rr = &a * &b;
+    assert_eq!(prod_rr, Fr::from_u64(77));
+    let div_rr = &a / &b;
+    assert_eq!(div_rr * b, a);
+}
+
+#[test]
+fn fr_neg() {
+    let a = Fr::from_u64(42);
+    let neg_a = -a;
+    assert_eq!(a + neg_a, Fr::zero());
+
+    let neg_zero = -Fr::zero();
+    assert_eq!(neg_zero, Fr::zero());
+}
+
+#[test]
+fn fr_inner_roundtrip() {
+    // Test Fr(ark_bn254::Fr) -> ark_bn254::Fr conversion (inner type)
+    let a = Fr::from_u64(12345);
+    let bytes = a.to_bytes();
+    let b = Fr::from_bytes(&bytes);
+    assert_eq!(a, b);
+}
+
+#[test]
+fn fr_mul_assign_sub_assign() {
+    let mut a = Fr::from_u64(10);
+    a -= Fr::from_u64(3);
+    assert_eq!(a, Fr::from_u64(7));
+
+    a *= Fr::from_u64(6);
+    assert_eq!(a, Fr::from_u64(42));
+
+    a += Fr::from_u64(8);
+    assert_eq!(a, Fr::from_u64(50));
+}
+
+#[test]
+fn fr_sum_and_product_iterators() {
+    let vals: Vec<Fr> = (1..=5).map(Fr::from_u64).collect();
+    let sum: Fr = vals.iter().copied().sum();
+    assert_eq!(sum, Fr::from_u64(15));
+
+    let sum_ref: Fr = vals.iter().sum();
+    assert_eq!(sum_ref, Fr::from_u64(15));
+
+    let prod: Fr = vals.iter().copied().product();
+    assert_eq!(prod, Fr::from_u64(120));
+
+    let prod_ref: Fr = vals.iter().product();
+    assert_eq!(prod_ref, Fr::from_u64(120));
+}
+
+#[test]
+fn wide_accumulator_reduce_matches_field() {
+    use jolt_field::WideAccumulator;
+    let mut rng = test_rng();
+
+    let mut acc = WideAccumulator::default();
+    let mut expected = Fr::zero();
+
+    for _ in 0..200 {
+        let a = Fr::random(&mut rng);
+        let b = Fr::random(&mut rng);
+        acc.fmadd(a, b);
+        expected += a * b;
+    }
+    assert_eq!(acc.reduce(), expected);
+}
+
+#[test]
+fn wide_accumulator_merge_reduce() {
+    use jolt_field::WideAccumulator;
+    let mut rng = test_rng();
+
+    let mut acc1 = WideAccumulator::default();
+    let mut acc2 = WideAccumulator::default();
+    let mut expected = Fr::zero();
+
+    for _ in 0..100 {
+        let a = Fr::random(&mut rng);
+        let b = Fr::random(&mut rng);
+        acc1.fmadd(a, b);
+        expected += a * b;
+    }
+    for _ in 0..100 {
+        let a = Fr::random(&mut rng);
+        let b = Fr::random(&mut rng);
+        acc2.fmadd(a, b);
+        expected += a * b;
+    }
+    acc1.merge(acc2);
+    assert_eq!(acc1.reduce(), expected);
+}
diff --git a/crates/jolt-field/tests/field_operations.rs b/crates/jolt-field/tests/field_operations.rs
new file mode 100644
index 000000000..820d9c89e
--- /dev/null
+++ b/crates/jolt-field/tests/field_operations.rs
@@ -0,0 +1,239 @@
+use ark_std::rand::Rng;
+use ark_std::{test_rng, One, Zero};
+use jolt_field::Field;
+use jolt_field::Fr;
+use rand_chacha::rand_core::RngCore;
+
+#[test]
+fn implicit_montgomery_conversion() {
+    let mut rng = test_rng();
+
+    for _ in 0..256 {
+        let x = rng.next_u64();
+        assert_eq!(
+            <Fr as Field>::from_u64(x),
+            Fr::one() * <Fr as Field>::from_u64(x)
+        );
+    }
+
+    for _ in 0..256 {
+        let x = rng.next_u64();
+        let y: Fr = Field::random(&mut rng);
+        assert_eq!(
+            y * <Fr as Field>::from_u64(x),
+            y * <Fr as Field>::from_u64(x)
+        );
+    }
+}
+
+#[test]
+fn field_arithmetic() {
+    let mut rng = test_rng();
+
+    let x = <Fr as Field>::from_u64(rng.next_u64());
+    let y = <Fr as Field>::from_u64(rng.next_u64());
+
+    let sum = x + y;
+    assert_eq!(sum, y + x);
+
+    let product = x * y;
+    assert_eq!(product, y * x);
+
+    let diff = x - y;
+    assert_eq!(diff + y, x);
+
+    if !y.is_zero() {
+        let quotient = x / y;
+        assert_eq!(quotient * y, x);
+    }
+}
+
+#[test]
+fn field_conversions() {
+    let mut rng = test_rng();
+
+    assert_eq!(<Fr as Field>::from_bool(true), Fr::one());
+    assert_eq!(<Fr as Field>::from_bool(false), Fr::zero());
+
+    for _ in 0..100 {
+        let val = rng.gen::<u8>();
+        let field_elem = <Fr as Field>::from_u8(val);
+        assert_eq!(field_elem, <Fr as Field>::from_u64(val as u64));
+    }
+
+    for _ in 0..100 {
+        let val = rng.gen::<u16>();
+        let field_elem = <Fr as Field>::from_u16(val);
+        assert_eq!(field_elem, <Fr as Field>::from_u64(val as u64));
+    }
+
+    for _ in 0..100 {
+        let val = rng.gen::<u32>();
+        let field_elem = <Fr as Field>::from_u32(val);
+        assert_eq!(field_elem, <Fr as Field>::from_u64(val as u64));
+    }
+
+    for _ in 0..100 {
+        let val = rng.gen::<u128>();
+        let field_elem = <Fr as Field>::from_u128(val);
+        assert!(!field_elem.is_zero() || val == 0);
+    }
+}
+
+#[test]
+fn bytes_conversion() {
+    let mut rng = test_rng();
+
+    for &len in &[1, 8, 16, 32, 48, 64] {
+        let mut bytes = vec![0u8; len];
+        rng.fill_bytes(&mut bytes);
+        let _field_elem = <Fr as Field>::from_bytes(&bytes);
+    }
+}
+
+#[test]
+fn signed_conversions() {
+    let mut rng = test_rng();
+
+    for _ in 0..100 {
+        let val = rng.gen::<i64>();
+        let field_elem = <Fr as Field>::from_i64(val);
+
+        if val >= 0 {
+            assert_eq!(field_elem, <Fr as Field>::from_u64(val as u64));
+        } else {
+            assert_eq!(field_elem, -<Fr as Field>::from_u64(val.unsigned_abs()));
+        }
+    }
+
+    for _ in 0..100 {
+        let val = rng.gen::<i128>();
+        let field_elem = <Fr as Field>::from_i128(val);
+
+        if val >= 0 {
+            assert_eq!(field_elem, <Fr as Field>::from_u128(val as u128));
+        } else {
+            assert_eq!(field_elem, -<Fr as Field>::from_u128(val.unsigned_abs()));
+        }
+    }
+}
+
+#[test]
+fn mul_u64_method() {
+    let mut rng = test_rng();
+
+    for _ in 0..100 {
+        let field_elem: Fr = Field::random(&mut rng);
+        let n = rng.next_u64();
+
+        // Use UFCS to call trait method (arkworks has inherent mul_u64 with different signature)
+        let result = <Fr as Field>::mul_u64(&field_elem, n);
+        let expected = field_elem * <Fr as Field>::from_u64(n);
+        assert_eq!(result, expected);
+    }
+}
+
+#[test]
+fn mul_i64_method() {
+    let mut rng = test_rng();
+
+    for _ in 0..100 {
+        let field_elem: Fr = Field::random(&mut rng);
+        let n = rng.gen::<i64>();
+
+        let result = <Fr as Field>::mul_i64(&field_elem, n);
+        let expected = field_elem * <Fr as Field>::from_i64(n);
+        assert_eq!(result, expected);
+    }
+}
+
+#[test]
+fn mul_u128_method() {
+    let mut rng = test_rng();
+
+    for _ in 0..100 {
+        let field_elem: Fr = Field::random(&mut rng);
+        let n = rng.gen::<u128>();
+
+        let result = <Fr as Field>::mul_u128(&field_elem, n);
+        let expected = field_elem * <Fr as Field>::from_u128(n);
+        assert_eq!(result, expected);
+    }
+}
+
+#[test]
+fn mul_i128_method() {
+    let mut rng = test_rng();
+
+    for _ in 0..100 {
+        let field_elem: Fr = Field::random(&mut rng);
+        let n = rng.gen::<i128>();
+
+        let result = <Fr as Field>::mul_i128(&field_elem, n);
+        let expected = field_elem * <Fr as Field>::from_i128(n);
+        assert_eq!(result, expected);
+    }
+}
+
+#[test]
+fn mul_pow_2_method() {
+    let mut rng = test_rng();
+
+    for _ in 0..10 {
+        let field_elem: Fr = Field::random(&mut rng);
+
+        for pow in [0, 1, 2, 7, 16, 32, 63, 64, 127, 128, 255] {
+            let result = <Fr as Field>::mul_pow_2(&field_elem, pow);
+            let mut expected = field_elem;
+            for _ in 0..pow {
+                expected = expected + expected;
+            }
+            assert_eq!(result, expected, "Failed for pow={pow}");
+        }
+    }
+}
+
+#[test]
+fn mul_by_small_values() {
+    let mut rng = test_rng();
+
+    for _ in 0..100 {
+        let field_elem: Fr = Field::random(&mut rng);
+        let small_val = rng.gen_range(0u64..1000);
+
+        let result1 = field_elem * <Fr as Field>::from_u64(small_val);
+
+        let mut result2 = Fr::zero();
+        for _ in 0..small_val {
+            result2 += field_elem;
+        }
+
+        assert_eq!(result1, result2);
+    }
+}
+
+#[test]
+fn special_values() {
+    let mut rng = test_rng();
+    let field_elem: Fr = Field::random(&mut rng);
+
+    assert_eq!(field_elem * <Fr as Field>::from_u64(0), Fr::zero());
+    assert_eq!(field_elem * <Fr as Field>::from_u64(1), field_elem);
+    assert!((Fr::zero() * <Fr as Field>::from_u64(rng.next_u64())).is_zero());
+
+    assert_eq!(<Fr as Field>::mul_u64(&field_elem, 0), Fr::zero());
+    assert_eq!(<Fr as Field>::mul_u64(&field_elem, 1), field_elem);
+    assert_eq!(<Fr as Field>::mul_u64(&Fr::zero(), 42), Fr::zero());
+}
+
+#[test]
+fn to_u64_conversion() {
+    for i in 0..1000u64 {
+        let field_elem = <Fr as Field>::from_u64(i);
+        assert_eq!(field_elem.to_u64(), Some(i));
+    }
+
+    let mut rng = test_rng();
+    let large_field: Fr = Field::random(&mut rng);
+    let _ = large_field.to_u64();
+}
diff --git a/crates/jolt-instructions/Cargo.toml b/crates/jolt-instructions/Cargo.toml
new file mode 100644
index 000000000..5a3fa0ade
--- /dev/null
+++ b/crates/jolt-instructions/Cargo.toml
@@ -0,0 +1,20 @@
+[package]
+name = "jolt-instructions"
+version = "0.1.0"
+authors = ["Jolt Contributors"]
+edition = "2021"
+license = "MIT OR Apache-2.0"
+description = "RISC-V instruction set and lookup tables for the Jolt zkVM"
+repository = "https://github.com/a16z/jolt"
+keywords = ["SNARK", "zkvm", "risc-v", "lookup-tables"]
+categories = ["cryptography"]
+
+[lints]
+workspace = true
+
+[dependencies]
+jolt-field = { path = "../jolt-field" }
+serde = { workspace = true, features = ["derive"] }
+
+[dev-dependencies]
+rand = { workspace = true }
diff --git a/crates/jolt-instructions/README.md b/crates/jolt-instructions/README.md
new file mode 100644
index 000000000..c9db98bd2
--- /dev/null
+++ b/crates/jolt-instructions/README.md
@@ -0,0 +1,56 @@
+# jolt-instructions
+
+RISC-V instruction set and lookup tables for the Jolt zkVM.
+
+Part of the [Jolt](https://github.com/a16z/jolt) zkVM.
+
+## Overview
+
+This crate defines the instruction abstraction layer for the Jolt lookup argument. Each RISC-V instruction is decomposed into lookup queries against small tables, which are then verified via the Twist/Shout protocol. The crate covers the full RV64IMAC instruction set plus virtual instructions for operations that require multi-step decomposition (shifts, rotates, division validation, byte manipulation, SHA XOR-rotate).
+
+105 instructions map to 41 lookup tables, each with a prefix/suffix sparse-dense MLE decomposition that enables sub-linear evaluation during sumcheck.
+
+## Public API
+
+### Core Traits
+
+- **`Instruction: Flags`** -- A RISC-V instruction. Methods: `opcode()`, `name()`, `execute(x, y) -> u64`, `lookup_table() -> Option<LookupTableKind>`.
+- **`LookupTable<XLEN>`** -- A small evaluation table. Methods: `materialize_entry(index) -> u64`, `evaluate_mle(r) -> F`.
+- **`Flags`** -- Static flag configuration. Methods: `circuit_flags() -> [bool; NUM_CIRCUIT_FLAGS]`, `instruction_flags() -> [bool; NUM_INSTRUCTION_FLAGS]`.
+- **`PrefixSuffixDecomposition<XLEN>`** -- Sub-linear MLE evaluation via `table_mle(r) = Sum prefix_i(r_high) * suffix_i(r_low)`.
+- **`ChallengeOps<F>`** / **`FieldOps<C>`** -- Arithmetic bounds for challenge-field operations in prefix/suffix evaluation.
+
+### Types
+
+- **`LookupTableKind`** -- `#[repr(u8)]` enum identifying one of 41 distinct lookup table types. Compact serialization for wire format.
+- **`LookupTables<XLEN>`** -- Runtime dispatch enum over all concrete table implementations. Constructed from `LookupTableKind` via `From`. The `XLEN` const generic selects word size (8 for tests, 64 for production).
+- **`LookupBits`** -- Compact 17-byte bitvector for lookup index substrings (prefix/suffix decomposition).
+- **`CircuitFlags`** -- R1CS-relevant boolean flags (14 variants: `AddOperands`, `Load`, `Store`, `Jump`, etc.).
+- **`InstructionFlags`** -- Non-R1CS flags for witness generation (7 variants: `LeftOperandIsPC`, `RightOperandIsImm`, `Branch`, etc.).
+- **`JoltInstructionSet`** -- Registry of all 105 RV64IMAC instructions with opcode-indexed dispatch.
+- **`Prefixes`** / **`Suffixes`** -- Enum dispatch over 46 prefix and 13 suffix polynomial types.
+
+### Utilities
+
+- **`interleave_bits`** / **`uninterleave_bits`** -- Bit-interleaving (Morton/Z-order) for two-operand lookup indices.
+- **`ALL_PREFIXES`** -- Const array of all 46 prefix variants for safe iteration.
+- **`NUM_CIRCUIT_FLAGS`** / **`NUM_INSTRUCTION_FLAGS`** -- Constant counts for flag arrays.
+
+### Modules
+
+- **`rv`** -- Concrete RISC-V instruction implementations (arithmetic, arithmetic_w, branch, compare, jump, load, logic, shift, shift_w, store, system).
+- **`virtual_`** -- Virtual instructions (advice, arithmetic, assert, bitwise, byte, division, extension, shift, xor-rotate).
+- **`tables`** -- Lookup table implementations with prefix/suffix sparse-dense decomposition.
+- **`opcodes`** -- Opcode constants and encoding.
+
+## Dependency Position
+
+`jolt-instructions` depends only on `jolt-field` and `serde`. It is used by `jolt-host` and `jolt-zkvm`.
+
+## Feature Flags
+
+This crate has no feature flags.
+
+## License
+
+MIT OR Apache-2.0
diff --git a/crates/jolt-instructions/REVIEW.md b/crates/jolt-instructions/REVIEW.md
new file mode 100644
index 000000000..18fbc3fb4
--- /dev/null
+++ b/crates/jolt-instructions/REVIEW.md
@@ -0,0 +1,217 @@
+# jolt-instructions Review
+
+**Crate:** jolt-instructions (Level 1)
+**LOC:** 10,280 (was 10,672 — reduced ~4% via macro-based dispatch deduplication)
+**Baseline:** 0 clippy warnings, 260 tests passing, 2 doc tests (ignored)
+
+## Overview
+
+RISC-V instruction definitions and lookup table decompositions for the Jolt zkVM.
+Provides the `Instruction` trait (execution semantics + flags), `LookupTable` trait
+(MLE evaluation + prefix/suffix decomposition), 105 concrete instructions (RV64IMAC +
+virtual), 41 lookup tables, and the `JoltInstructionSet` registry. Used by 2 downstream
+crates (jolt-host, jolt-zkvm).
+
+**Verdict:** Well-structured crate with excellent test coverage (260 tests including
+exhaustive MLE verification). The `define_instruction!` macro is clean and the
+prefix/suffix decomposition is well-tested. A few maintenance hazards from manually
+synchronized constants and duplicated enum variant lists.
+
+---
+
+## Findings
+
+### [CD-1.1] LookupTableKind::COUNT is 40 but there are 41 variants
+
+**File:** `src/tables/mod.rs:180`
+**Severity:** MEDIUM
+**Finding:** `pub const COUNT: usize = 40;` but the `LookupTableKind` enum has 41 variants
+(0-indexed: `RangeCheck` = 0 through `VirtualXORROTW7` = 40). This means `COUNT` is
+off-by-one. Currently unused outside the crate, but any downstream code using it for
+array sizing would silently drop the last variant.
+
+**Status:** [x] RESOLVED — Changed to `COUNT: usize = 41`, added compile-time const assertion.
+
+---
+
+### [CQ-1.1] No compile-time assertions for enum-vs-constant sync
+
+**File:** `src/tables/mod.rs`, `src/tables/prefixes/mod.rs`, `src/flags.rs`
+**Severity:** MEDIUM
+**Finding:** Three manually maintained constants must match their enum variant counts:
+- `LookupTableKind::COUNT` (wrong — see CD-1.1)
+- `NUM_PREFIXES = 46` (correct today)
+- `NUM_CIRCUIT_FLAGS = 14`, `NUM_INSTRUCTION_FLAGS = 7` (correct today)
+
+The flags module has tests (`circuit_flags_count_matches_enum`,
+`instruction_flags_count_matches_enum`) that verify last-variant + 1 == count.
+But `LookupTableKind::COUNT` and `NUM_PREFIXES` have no such tests.
+
+**Status:** [x] RESOLVED — Added `const _: ()` assertions for both `LookupTableKind::COUNT` and `NUM_PREFIXES` (compile-time, not just tests).
+
+---
+
+### [CQ-1.2] `unsafe transmute` for Prefixes iteration
+
+**File:** `src/tables/test_utils.rs:130`, `src/tables/prefixes/mod.rs`
+**Severity:** LOW
+**Finding:** Two call sites use `unsafe { std::mem::transmute(i as u8) }` to iterate
+over `Prefixes` variants. This is UB if `NUM_PREFIXES` doesn't match the actual
+variant count (the transmute creates an invalid enum discriminant).
+
+**Status:** [x] RESOLVED — Added `pub const ALL_PREFIXES: [Prefixes; NUM_PREFIXES]` array. Replaced both transmute sites with safe `ALL_PREFIXES[index]` indexing.
+
+---
+
+### [CQ-2.1] LookupBits::PartialEq ignores len
+
+**File:** `src/lookup_bits.rs:168-173`
+**Severity:** LOW
+**Finding:** `PartialEq` compares only `self.as_u128() == other.as_u128()`, ignoring
+the `len` field. Two bitvectors with different logical lengths but the same raw bits
+(e.g., `LookupBits::new(5, 4)` vs `LookupBits::new(5, 8)`) would compare as equal.
+
+In practice this doesn't cause bugs because `new()` masks excess bits and callers
+always use the same `len` when comparing. But it's surprising semantics.
+
+**Status:** [ ] PASS — not worth changing behavior; masking in `new()` prevents real issues.
+
+---
+
+### [CQ-3.1] Five sync points when adding a new lookup table
+
+**File:** `src/tables/mod.rs`
+**Severity:** LOW
+**Finding:** Adding a new lookup table requires coordinated changes in 5 places within
+`tables/mod.rs` alone: (1) `LookupTableKind` enum, (2) `LookupTables` enum,
+(3) `dispatch_table!` macro arms, (4) `LookupTables::kind()` match,
+(5) `From<LookupTableKind>` match, plus updating `COUNT`.
+
+**Status:** [x] RESOLVED — Reduced to 3 sync points via `kind_table_identity!` macro that auto-generates the `kind()`, `From<LookupTableKind>`, and `From<LookupTables>` impls from a single variant list.
+
+---
+
+### [CQ-3.2] Duplicated prefix dispatch boilerplate
+
+**File:** `src/tables/prefixes/mod.rs`
+**Severity:** LOW
+**Finding:** `prefix_mle()` and `update_prefix_checkpoint()` each had ~40 lines of
+identical `use` imports and ~46-arm match blocks dispatching to concrete prefix types.
+The two methods duplicated ~350 lines of near-identical boilerplate.
+
+**Status:** [x] RESOLVED — Introduced `dispatch_prefix!` macro that encodes the variant-to-type mapping once. Both methods now delegate via a one-line macro call. Reduced `prefixes/mod.rs` from 690 to 350 lines (-49%).
+
+---
+
+### [CQ-4.1] Manual Instruction impls duplicate macro-generated boilerplate
+
+**File:** `src/rv/arithmetic.rs` (MulH, MulHSU, MulHU, Div, DivU, Rem, RemU),
+`src/rv/arithmetic_w.rs` (DivW, DivUW, RemW, RemUW)
+**Severity:** LOW
+**Finding:** 11 instructions are implemented manually (full struct + trait impls)
+instead of using the `define_instruction!` macro. The macro doesn't support multi-line
+execute bodies with `let` bindings, so these complex instructions opt out.
+
+The manual impls are correct and consistent (same derives, same `#[inline]` placement).
+Extending the macro to support block-expression bodies would reduce ~350 lines but
+isn't clearly worth the macro complexity.
+
+**Status:** [ ] PASS — manual impls are correct and consistent.
+
+---
+
+### [CD-6.1] Dispatch table duplication between jolt-host and jolt-zkvm
+
+**File:** `jolt-host/src/cycle_row_impl.rs`, `jolt-zkvm/src/witness/flags.rs`
+**Severity:** LOW (architecture, not this crate's fault)
+**Finding:** Both downstream crates contain ~270-line match statements mapping
+`tracer::Instruction` variants to `jolt-instructions` flag arrays. These are
+mechanically identical and acknowledged as absorbed copies (comment: "ISA dispatch
+tables absorbed from jolt-zkvm/src/witness/flags.rs").
+
+The duplication exists because `jolt-instructions` correctly doesn't depend on
+`tracer`, so it can't provide the mapping. This is proper layering. The CycleRow
+trait (planned in `jolt-host/PLAN.md`) will consolidate this.
+
+**Status:** [ ] PASS — proper layering; CycleRow plan addresses this.
+
+---
+
+### [CQ-5.1] JoltInstructionSet uses Vec<Box<dyn Instruction>>
+
+**File:** `src/instruction_set.rs:14-16`
+**Severity:** LOW
+**Finding:** The registry heap-allocates 105 `Box<dyn Instruction>` for zero-sized
+unit structs. A flat array of function pointers or an enum dispatch would avoid the
+allocations and virtual dispatch overhead.
+
+However, `JoltInstructionSet::new()` is called once at startup, not in hot paths.
+The dynamic dispatch through `instruction()` is also cold-path (used for tests and
+debugging, not the prover's inner loop). The proving system uses direct struct
+instantiation, not the registry.
+
+**Status:** [ ] PASS — cold-path only, not worth optimizing.
+
+---
+
+### [CQ-6.1] Shift instructions don't set lookup table
+
+**File:** `src/rv/shift.rs`
+**Severity:** PASS
+**Finding:** `Sll`, `SllI`, `Srl`, `SrlI`, `Sra`, `SraI` have `table: None` (no
+`table:` clause in the macro). This is correct — these are decomposed into virtual
+shift sequences (`VirtualSrl`, `VirtualSra`, etc.) which DO have lookup tables.
+The real shift instructions don't directly participate in lookup-based proving.
+
+**Status:** [x] PASS
+
+---
+
+### [CD-2.1] `is_multiple_of` uses nightly-only API
+
+**File:** `src/virtual_/assert.rs:63,71`
+**Severity:** LOW
+**Finding:** `x.is_multiple_of(4)` and `x.is_multiple_of(2)` use
+`u64::is_multiple_of()` which was stabilized in Rust 1.85. If the MSRV is below 1.85,
+this would fail to compile. Given the crate already compiles and the workspace likely
+targets recent nightly, this is fine.
+
+**Status:** [ ] PASS — compiles on the target toolchain.
+
+---
+
+### [CQ-7.1] Doc examples use `ignore` attribute
+
+**File:** `src/tables/mod.rs:229-232`
+**Severity:** LOW
+**Finding:** The `LookupTables` doc example uses `ignore`:
+```rust
+/// ```ignore
+/// let table = LookupTables::<64>::from(LookupTableKind::And);
+/// ```
+```
+This means the example is never compiled or tested. It could use `no_run` instead
+if the issue is runtime requirements, or be made into a compilable example.
+
+**Status:** [ ] PASS — minor.
+
+---
+
+## Summary
+
+| Severity | Count | Resolved | Pass/WontFix |
+|----------|-------|----------|-------------|
+| HIGH     | 0     | 0        | 0           |
+| MEDIUM   | 2     | 2        | 0           |
+| LOW      | 10    | 3        | 7           |
+| **Total** | **12** | **5** | **7** |
+
+**Final state:** 0 clippy warnings, 260 tests passing.
+
+### Changes made:
+
+1. **Fixed LookupTableKind::COUNT** — changed 40 → 41
+2. **Added compile-time const assertions** — `LookupTableKind::COUNT` and `NUM_PREFIXES` now verified at compile time
+3. **Eliminated unsafe transmute** — replaced with `ALL_PREFIXES` const array (exported from crate root)
+4. **`dispatch_prefix!` macro** — eliminated ~340 lines of duplicated import+match boilerplate in `prefixes/mod.rs`
+5. **`kind_table_identity!` macro** — eliminated ~80 lines of duplicated identity mapping in `tables/mod.rs`
diff --git a/crates/jolt-instructions/src/challenge_ops.rs b/crates/jolt-instructions/src/challenge_ops.rs
new file mode 100644
index 000000000..6280c8535
--- /dev/null
+++ b/crates/jolt-instructions/src/challenge_ops.rs
@@ -0,0 +1,73 @@
+//! Convenience trait bounds for challenge-field arithmetic in prefix/suffix evaluation.
+//!
+//! During the sumcheck protocol, prefix MLEs are evaluated using challenge values
+//! drawn from the Fiat-Shamir transcript. These traits capture the arithmetic bounds
+//! needed for prefix/suffix MLE computation.
+//!
+//! Since challenges are now just field elements (`C = F`), these traits are trivially
+//! satisfied by any `F: Field`. They remain as named bounds for readability at use sites.
+
+use jolt_field::Field;
+use std::ops::{Add, Mul, Sub};
+
+/// A challenge value that can do arithmetic with field elements and other challenges.
+///
+/// The key property is that all arithmetic produces field elements `F`, even
+/// operations between two challenges (`C * C -> F`).
+pub trait ChallengeOps<F: Field>:
+    Copy
+    + Send
+    + Sync
+    + Into<F>
+    + Add<F, Output = F>
+    + for<'a> Add<&'a F, Output = F>
+    + Sub<F, Output = F>
+    + for<'a> Sub<&'a F, Output = F>
+    + Mul<F, Output = F>
+    + for<'a> Mul<&'a F, Output = F>
+    + Add<Self, Output = F>
+    + Sub<Self, Output = F>
+    + Mul<Self, Output = F>
+{
+}
+
+impl<F: Field, C> ChallengeOps<F> for C where
+    C: Copy
+        + Send
+        + Sync
+        + Into<F>
+        + Add<F, Output = F>
+        + for<'a> Add<&'a F, Output = F>
+        + Sub<F, Output = F>
+        + for<'a> Sub<&'a F, Output = F>
+        + Mul<F, Output = F>
+        + for<'a> Mul<&'a F, Output = F>
+        + Add<C, Output = F>
+        + Sub<C, Output = F>
+        + Mul<C, Output = F>
+{
+}
+
+/// A field element that accepts arithmetic with a challenge type `C`.
+///
+/// Enables expressions like `F::from_u64(n) * challenge` where the field element
+/// is on the left-hand side.
+pub trait FieldOps<C>:
+    Add<C, Output = Self>
+    + for<'a> Add<&'a C, Output = Self>
+    + Sub<C, Output = Self>
+    + for<'a> Sub<&'a C, Output = Self>
+    + Mul<C, Output = Self>
+    + for<'a> Mul<&'a C, Output = Self>
+{
+}
+
+impl<F, C> FieldOps<C> for F where
+    F: Add<C, Output = F>
+        + for<'a> Add<&'a C, Output = F>
+        + Sub<C, Output = F>
+        + for<'a> Sub<&'a C, Output = F>
+        + Mul<C, Output = F>
+        + for<'a> Mul<&'a C, Output = F>
+{
+}
diff --git a/crates/jolt-instructions/src/flags.rs b/crates/jolt-instructions/src/flags.rs
new file mode 100644
index 000000000..34f2269b3
--- /dev/null
+++ b/crates/jolt-instructions/src/flags.rs
@@ -0,0 +1,181 @@
+//! Boolean flags controlling instruction behavior in R1CS constraints and witness generation.
+//!
+//! [`CircuitFlags`] are embedded in Jolt's R1CS constraints (the "opflags" from the Jolt paper).
+//! [`InstructionFlags`] control witness generation and operand routing but are not
+//! directly constrained.
+//!
+//! Every instruction implements the [`Flags`] trait, returning its static flag
+//! configuration. The arrays support ergonomic indexing by enum variant.
+
+use std::ops::{Index, IndexMut};
+
+/// Boolean flags used in Jolt's R1CS constraints (`opflags` in the Jolt paper).
+///
+/// Note: the flags below deviate somewhat from those described in Appendix A.1
+/// of the Jolt paper.
+#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
+#[repr(u8)]
+pub enum CircuitFlags {
+    /// First lookup operand is the sum of the two instruction operands.
+    AddOperands,
+    /// First lookup operand is the difference of the two instruction operands.
+    SubtractOperands,
+    /// First lookup operand is the product of the two instruction operands.
+    MultiplyOperands,
+    /// Instruction is a load (e.g. `LW`).
+    Load,
+    /// Instruction is a store (e.g. `SW`).
+    Store,
+    /// Instruction is a jump (e.g. `JAL`, `JALR`).
+    Jump,
+    /// Lookup output is stored in `rd` at the end of the step.
+    WriteLookupOutputToRD,
+    /// Instruction is "virtual" (Section 6.1 of the Jolt paper).
+    VirtualInstruction,
+    /// Instruction is an assert (Section 6.1.1 of the Jolt paper).
+    Assert,
+    /// PC unchanged during inline virtual sequences.
+    DoNotUpdateUnexpandedPC,
+    /// Is a (virtual) advice instruction.
+    Advice,
+    /// Is a compressed instruction (UnexpandedPc += 2 instead of 4).
+    IsCompressed,
+    /// First instruction in a virtual sequence.
+    IsFirstInSequence,
+    /// Last instruction in a virtual sequence.
+    IsLastInSequence,
+}
+
+/// Number of circuit flags.
+pub const NUM_CIRCUIT_FLAGS: usize = 14;
+
+/// Boolean flags that are NOT part of Jolt's R1CS constraints.
+///
+/// These control witness generation, operand routing, and auxiliary prover logic.
+#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
+#[repr(u8)]
+pub enum InstructionFlags {
+    /// First instruction operand is the program counter.
+    LeftOperandIsPC,
+    /// Second instruction operand is an immediate value.
+    RightOperandIsImm,
+    /// First instruction operand is RS1 register value.
+    LeftOperandIsRs1Value,
+    /// Second instruction operand is RS2 register value.
+    RightOperandIsRs2Value,
+    /// Instruction is a branch (e.g. `BEQ`, `BNE`).
+    Branch,
+    /// No-op instruction.
+    IsNoop,
+    /// Destination register index is nonzero.
+    IsRdNotZero,
+}
+
+/// Number of instruction flags.
+pub const NUM_INSTRUCTION_FLAGS: usize = 7;
+
+/// Static flag configuration for an instruction.
+///
+/// Every instruction struct implements this trait to declare which circuit
+/// and instruction flags are set. The returned arrays are indexed by
+/// [`CircuitFlags`] and [`InstructionFlags`] variants respectively.
+pub trait Flags {
+    /// Returns the R1CS-relevant circuit flags for this instruction.
+    fn circuit_flags(&self) -> [bool; NUM_CIRCUIT_FLAGS];
+
+    /// Returns the non-R1CS instruction flags for this instruction.
+    fn instruction_flags(&self) -> [bool; NUM_INSTRUCTION_FLAGS];
+}
+
+/// Checks whether an instruction uses interleaved-bit operand encoding.
+///
+/// Instructions that combine operands (ADD, SUB, MUL) or use advice
+/// set explicit operand-combination flags; all others use the default
+/// interleaved-bit layout for lookup indices.
+pub trait InterleavedBitsMarker {
+    /// Returns `true` if neither `AddOperands`, `SubtractOperands`,
+    /// `MultiplyOperands`, nor `Advice` is set.
+    fn is_interleaved_operands(&self) -> bool;
+}
+
+impl InterleavedBitsMarker for [bool; NUM_CIRCUIT_FLAGS] {
+    #[inline]
+    fn is_interleaved_operands(&self) -> bool {
+        !self[CircuitFlags::AddOperands]
+            && !self[CircuitFlags::SubtractOperands]
+            && !self[CircuitFlags::MultiplyOperands]
+            && !self[CircuitFlags::Advice]
+    }
+}
+
+impl Index<CircuitFlags> for [bool; NUM_CIRCUIT_FLAGS] {
+    type Output = bool;
+    #[inline]
+    fn index(&self, index: CircuitFlags) -> &bool {
+        &self[index as usize]
+    }
+}
+
+impl IndexMut<CircuitFlags> for [bool; NUM_CIRCUIT_FLAGS] {
+    #[inline]
+    fn index_mut(&mut self, index: CircuitFlags) -> &mut bool {
+        &mut self[index as usize]
+    }
+}
+
+impl Index<InstructionFlags> for [bool; NUM_INSTRUCTION_FLAGS] {
+    type Output = bool;
+    #[inline]
+    fn index(&self, index: InstructionFlags) -> &bool {
+        &self[index as usize]
+    }
+}
+
+impl IndexMut<InstructionFlags> for [bool; NUM_INSTRUCTION_FLAGS] {
+    #[inline]
+    fn index_mut(&mut self, index: InstructionFlags) -> &mut bool {
+        &mut self[index as usize]
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn circuit_flags_count_matches_enum() {
+        assert_eq!(
+            CircuitFlags::IsLastInSequence as usize + 1,
+            NUM_CIRCUIT_FLAGS
+        );
+    }
+
+    #[test]
+    fn instruction_flags_count_matches_enum() {
+        assert_eq!(
+            InstructionFlags::IsRdNotZero as usize + 1,
+            NUM_INSTRUCTION_FLAGS
+        );
+    }
+
+    #[test]
+    fn indexing_by_variant() {
+        let mut flags = [false; NUM_CIRCUIT_FLAGS];
+        flags[CircuitFlags::Load] = true;
+        assert!(flags[CircuitFlags::Load]);
+        assert!(!flags[CircuitFlags::Store]);
+    }
+
+    #[test]
+    fn interleaved_default() {
+        let flags = [false; NUM_CIRCUIT_FLAGS];
+        assert!(flags.is_interleaved_operands());
+    }
+
+    #[test]
+    fn add_operands_not_interleaved() {
+        let mut flags = [false; NUM_CIRCUIT_FLAGS];
+        flags[CircuitFlags::AddOperands] = true;
+        assert!(!flags.is_interleaved_operands());
+    }
+}
diff --git a/crates/jolt-instructions/src/instruction_set.rs b/crates/jolt-instructions/src/instruction_set.rs
new file mode 100644
index 000000000..73f78a01c
--- /dev/null
+++ b/crates/jolt-instructions/src/instruction_set.rs
@@ -0,0 +1,255 @@
+//! The complete Jolt instruction set registry.
+//!
+//! [`JoltInstructionSet`] collects all instruction implementations into an
+//! array-indexed registry for O(1) opcode dispatch.
+
+use crate::opcodes;
+use crate::traits::Instruction;
+
+/// Registry of all Jolt instructions, indexed by opcode for fast dispatch.
+///
+/// Instructions are stored in a flat array where the index equals the opcode,
+/// enabling O(1) lookup without hashing.
+#[derive(Default)]
+pub struct JoltInstructionSet {
+    instructions: Vec<Box<dyn Instruction>>,
+}
+
+impl JoltInstructionSet {
+    /// Creates a new instruction set with all RV64IMAC and virtual instructions registered.
+    pub fn new() -> Self {
+        use crate::rv::arithmetic::*;
+        use crate::rv::arithmetic_w::*;
+        use crate::rv::branch::*;
+        use crate::rv::compare::*;
+        use crate::rv::jump::*;
+        use crate::rv::load::*;
+        use crate::rv::logic::*;
+        use crate::rv::shift::*;
+        use crate::rv::shift_w::*;
+        use crate::rv::store::*;
+        use crate::rv::system::*;
+        use crate::virtual_::advice::*;
+        use crate::virtual_::arithmetic::*;
+        use crate::virtual_::assert::*;
+        use crate::virtual_::bitwise::*;
+        use crate::virtual_::byte::*;
+        use crate::virtual_::division::*;
+        use crate::virtual_::extension::*;
+        use crate::virtual_::shift::*;
+        use crate::virtual_::xor_rotate::*;
+
+        let all: Vec<Box<dyn Instruction>> = vec![
+            // RV64I arithmetic (0-3)
+            Box::new(Add),
+            Box::new(Sub),
+            Box::new(Lui),
+            Box::new(Auipc),
+            // RV64M multiply/divide (4-11)
+            Box::new(Mul),
+            Box::new(MulH),
+            Box::new(MulHSU),
+            Box::new(MulHU),
+            Box::new(Div),
+            Box::new(DivU),
+            Box::new(Rem),
+            Box::new(RemU),
+            // RV64I arithmetic W-suffix (12-13)
+            Box::new(AddW),
+            Box::new(SubW),
+            // RV64M W-suffix (14-18)
+            Box::new(MulW),
+            Box::new(DivW),
+            Box::new(DivUW),
+            Box::new(RemW),
+            Box::new(RemUW),
+            // RV64I logic (19-24)
+            Box::new(And),
+            Box::new(Or),
+            Box::new(Xor),
+            Box::new(AndI),
+            Box::new(OrI),
+            Box::new(XorI),
+            // RV64I shifts (25-30)
+            Box::new(Sll),
+            Box::new(Srl),
+            Box::new(Sra),
+            Box::new(SllI),
+            Box::new(SrlI),
+            Box::new(SraI),
+            // RV64I shifts W-suffix (31-36)
+            Box::new(SllW),
+            Box::new(SrlW),
+            Box::new(SraW),
+            Box::new(SllIW),
+            Box::new(SrlIW),
+            Box::new(SraIW),
+            // RV64I compare (37-40)
+            Box::new(Slt),
+            Box::new(SltU),
+            Box::new(SltI),
+            Box::new(SltIU),
+            // RV64I branch (41-46)
+            Box::new(Beq),
+            Box::new(Bne),
+            Box::new(Blt),
+            Box::new(Bge),
+            Box::new(BltU),
+            Box::new(BgeU),
+            // RV64I load (47-53)
+            Box::new(Lb),
+            Box::new(Lbu),
+            Box::new(Lh),
+            Box::new(Lhu),
+            Box::new(Lw),
+            Box::new(Lwu),
+            Box::new(Ld),
+            // RV64I store (54-57)
+            Box::new(Sb),
+            Box::new(Sh),
+            Box::new(Sw),
+            Box::new(Sd),
+            // RV64I system (58-61)
+            Box::new(Ecall),
+            Box::new(Ebreak),
+            Box::new(Fence),
+            Box::new(Noop),
+            // RV64I immediate aliases (62-63)
+            Box::new(Addi),
+            Box::new(AddiW),
+            // RV64I jump (64-65)
+            Box::new(Jal),
+            Box::new(Jalr),
+            // Zbb extension (66)
+            Box::new(Andn),
+            // Virtual arithmetic (67-74)
+            Box::new(AssertEq),
+            Box::new(AssertLte),
+            Box::new(Pow2),
+            Box::new(MovSign),
+            Box::new(Pow2I),
+            Box::new(Pow2W),
+            Box::new(Pow2IW),
+            Box::new(MulI),
+            // Virtual assert (75-79)
+            Box::new(AssertValidDiv0),
+            Box::new(AssertValidUnsignedRemainder),
+            Box::new(AssertMulUNoOverflow),
+            Box::new(AssertWordAlignment),
+            Box::new(AssertHalfwordAlignment),
+            // Virtual shift (80-87)
+            Box::new(VirtualSrl),
+            Box::new(VirtualSrli),
+            Box::new(VirtualSra),
+            Box::new(VirtualSrai),
+            Box::new(VirtualShiftRightBitmask),
+            Box::new(VirtualShiftRightBitmaski),
+            Box::new(VirtualRotri),
+            Box::new(VirtualRotriw),
+            // Virtual division (88-89)
+            Box::new(VirtualChangeDivisor),
+            Box::new(VirtualChangeDivisorW),
+            // Virtual extension (90-91)
+            Box::new(VirtualSignExtendWord),
+            Box::new(VirtualZeroExtendWord),
+            // Virtual XOR-rotate (92-99)
+            Box::new(VirtualXorRot32),
+            Box::new(VirtualXorRot24),
+            Box::new(VirtualXorRot16),
+            Box::new(VirtualXorRot63),
+            Box::new(VirtualXorRotW16),
+            Box::new(VirtualXorRotW12),
+            Box::new(VirtualXorRotW8),
+            Box::new(VirtualXorRotW7),
+            // Virtual byte (100)
+            Box::new(VirtualRev8W),
+            // Virtual advice/IO (101-104)
+            Box::new(VirtualAdvice),
+            Box::new(VirtualAdviceLen),
+            Box::new(VirtualAdviceLoad),
+            Box::new(VirtualHostIO),
+        ];
+
+        debug_assert_eq!(all.len(), opcodes::COUNT as usize);
+        let mut sorted: Vec<(u32, Box<dyn Instruction>)> =
+            all.into_iter().map(|i| (i.opcode(), i)).collect();
+        sorted.sort_by_key(|(op, _)| *op);
+        for (i, (op, _)) in sorted.iter().enumerate() {
+            debug_assert_eq!(*op as usize, i, "expected opcode {i} but got {op}");
+        }
+        let instructions = sorted.into_iter().map(|(_, i)| i).collect();
+
+        Self { instructions }
+    }
+
+    /// Look up an instruction by opcode. Returns `None` if the opcode is out of range.
+    #[inline]
+    pub fn instruction(&self, opcode: u32) -> Option<&dyn Instruction> {
+        self.instructions.get(opcode as usize).map(|b| b.as_ref())
+    }
+
+    /// Iterate over all registered instructions in opcode order.
+    #[inline]
+    pub fn iter(&self) -> impl Iterator<Item = &dyn Instruction> {
+        self.instructions.iter().map(|b| b.as_ref())
+    }
+
+    /// Total number of registered instructions.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.instructions.len()
+    }
+
+    /// Returns `true` if no instructions are registered.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.instructions.is_empty()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn all_opcodes_covered() {
+        let set = JoltInstructionSet::new();
+        assert_eq!(set.len(), opcodes::COUNT as usize);
+
+        for (i, instr) in set.iter().enumerate() {
+            assert_eq!(
+                instr.opcode() as usize,
+                i,
+                "instruction {} has opcode {} but is at index {}",
+                instr.name(),
+                instr.opcode(),
+                i
+            );
+        }
+    }
+
+    #[test]
+    fn lookup_by_opcode() {
+        let set = JoltInstructionSet::new();
+        let add = set.instruction(opcodes::ADD).unwrap();
+        assert_eq!(add.name(), "ADD");
+        assert_eq!(add.execute(3, 5), 8);
+    }
+
+    #[test]
+    fn out_of_range_returns_none() {
+        let set = JoltInstructionSet::new();
+        assert!(set.instruction(opcodes::COUNT).is_none());
+        assert!(set.instruction(u32::MAX).is_none());
+    }
+
+    #[test]
+    fn unique_names() {
+        let set = JoltInstructionSet::new();
+        let mut names: Vec<&str> = set.iter().map(|i| i.name()).collect();
+        names.sort_unstable();
+        let before = names.len();
+        names.dedup();
+        assert_eq!(before, names.len(), "duplicate instruction names found");
+    }
+}
diff --git a/crates/jolt-instructions/src/interleave.rs b/crates/jolt-instructions/src/interleave.rs
new file mode 100644
index 000000000..f4ec049f1
--- /dev/null
+++ b/crates/jolt-instructions/src/interleave.rs
@@ -0,0 +1,113 @@
+//! Bit interleaving utilities for two-operand lookup table indexing.
+//!
+//! In the Twist/Shout lookup argument, two XLEN-bit operands are combined
+//! into a single `2*XLEN`-bit index by interleaving their bits. The first
+//! operand occupies even positions and the second occupies odd positions.
+
+/// Interleaves bits from two 64-bit operands into a 128-bit lookup index.
+///
+/// Bit `i` of `x` is placed at position `2*i + 1` (even indices from MSB perspective),
+/// and bit `i` of `y` is placed at position `2*i` (odd indices).
+///
+/// This matches the convention in the Jolt paper where the combined index
+/// has `x` bits at even positions and `y` bits at odd positions (MSB-first).
+#[inline]
+pub fn interleave_bits(x: u64, y: u64) -> u128 {
+    let mut x_bits = x as u128;
+    x_bits = (x_bits | (x_bits << 32)) & 0x0000_0000_FFFF_FFFF_0000_0000_FFFF_FFFF;
+    x_bits = (x_bits | (x_bits << 16)) & 0x0000_FFFF_0000_FFFF_0000_FFFF_0000_FFFF;
+    x_bits = (x_bits | (x_bits << 8)) & 0x00FF_00FF_00FF_00FF_00FF_00FF_00FF_00FF;
+    x_bits = (x_bits | (x_bits << 4)) & 0x0F0F_0F0F_0F0F_0F0F_0F0F_0F0F_0F0F_0F0F;
+    x_bits = (x_bits | (x_bits << 2)) & 0x3333_3333_3333_3333_3333_3333_3333_3333;
+    x_bits = (x_bits | (x_bits << 1)) & 0x5555_5555_5555_5555_5555_5555_5555_5555;
+
+    let mut y_bits = y as u128;
+    y_bits = (y_bits | (y_bits << 32)) & 0x0000_0000_FFFF_FFFF_0000_0000_FFFF_FFFF;
+    y_bits = (y_bits | (y_bits << 16)) & 0x0000_FFFF_0000_FFFF_0000_FFFF_0000_FFFF;
+    y_bits = (y_bits | (y_bits << 8)) & 0x00FF_00FF_00FF_00FF_00FF_00FF_00FF_00FF;
+    y_bits = (y_bits | (y_bits << 4)) & 0x0F0F_0F0F_0F0F_0F0F_0F0F_0F0F_0F0F_0F0F;
+    y_bits = (y_bits | (y_bits << 2)) & 0x3333_3333_3333_3333_3333_3333_3333_3333;
+    y_bits = (y_bits | (y_bits << 1)) & 0x5555_5555_5555_5555_5555_5555_5555_5555;
+
+    (x_bits << 1) | y_bits
+}
+
+/// Recovers two 64-bit operands from an interleaved 128-bit lookup index.
+///
+/// Inverse of [`interleave_bits`]: extracts even-position bits into `x`
+/// and odd-position bits into `y`.
+#[inline]
+pub fn uninterleave_bits(val: u128) -> (u64, u64) {
+    let mut x_bits = (val >> 1) & 0x5555_5555_5555_5555_5555_5555_5555_5555;
+    let mut y_bits = val & 0x5555_5555_5555_5555_5555_5555_5555_5555;
+
+    x_bits = (x_bits | (x_bits >> 1)) & 0x3333_3333_3333_3333_3333_3333_3333_3333;
+    x_bits = (x_bits | (x_bits >> 2)) & 0x0F0F_0F0F_0F0F_0F0F_0F0F_0F0F_0F0F_0F0F;
+    x_bits = (x_bits | (x_bits >> 4)) & 0x00FF_00FF_00FF_00FF_00FF_00FF_00FF_00FF;
+    x_bits = (x_bits | (x_bits >> 8)) & 0x0000_FFFF_0000_FFFF_0000_FFFF_0000_FFFF;
+    x_bits = (x_bits | (x_bits >> 16)) & 0x0000_0000_FFFF_FFFF_0000_0000_FFFF_FFFF;
+    x_bits = (x_bits | (x_bits >> 32)) & 0x0000_0000_0000_0000_FFFF_FFFF_FFFF_FFFF;
+
+    y_bits = (y_bits | (y_bits >> 1)) & 0x3333_3333_3333_3333_3333_3333_3333_3333;
+    y_bits = (y_bits | (y_bits >> 2)) & 0x0F0F_0F0F_0F0F_0F0F_0F0F_0F0F_0F0F_0F0F;
+    y_bits = (y_bits | (y_bits >> 4)) & 0x00FF_00FF_00FF_00FF_00FF_00FF_00FF_00FF;
+    y_bits = (y_bits | (y_bits >> 8)) & 0x0000_FFFF_0000_FFFF_0000_FFFF_0000_FFFF;
+    y_bits = (y_bits | (y_bits >> 16)) & 0x0000_0000_FFFF_FFFF_0000_0000_FFFF_FFFF;
+    y_bits = (y_bits | (y_bits >> 32)) & 0x0000_0000_0000_0000_FFFF_FFFF_FFFF_FFFF;
+
+    (x_bits as u64, y_bits as u64)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn roundtrip_small() {
+        let x: u64 = 0b01;
+        let y: u64 = 0b10;
+        let interleaved = interleave_bits(x, y);
+        // x=01 → bits at positions 1,3: 0,1
+        // y=10 → bits at positions 0,2: 0,1
+        // Combined (MSB first): bit3=0, bit2=1, bit1=1, bit0=0 = 0b0110 = 6
+        assert_eq!(interleaved, 0b0110);
+        let (rx, ry) = uninterleave_bits(interleaved);
+        assert_eq!((rx, ry), (x, y));
+    }
+
+    #[test]
+    fn roundtrip_random_patterns() {
+        let pairs: &[(u64, u64)] = &[
+            (0, 0),
+            (u64::MAX, u64::MAX),
+            (u64::MAX, 0),
+            (0, u64::MAX),
+            (0xDEAD_BEEF_CAFE_BABE, 0x1234_5678_9ABC_DEF0),
+            (1, 1),
+            (0x8000_0000_0000_0000, 0x8000_0000_0000_0000),
+        ];
+        for &(x, y) in pairs {
+            let interleaved = interleave_bits(x, y);
+            let (rx, ry) = uninterleave_bits(interleaved);
+            assert_eq!((rx, ry), (x, y), "roundtrip failed for ({x:#x}, {y:#x})");
+        }
+    }
+
+    #[test]
+    fn uninterleave_interleave_roundtrip() {
+        let vals: &[u128] = &[0, 1, u128::MAX, 0xAAAA_BBBB_CCCC_DDDD_1111_2222_3333_4444];
+        for &val in vals {
+            let (x, y) = uninterleave_bits(val);
+            let reinterleaved = interleave_bits(x, y);
+            assert_eq!(reinterleaved, val, "roundtrip failed for {val:#x}");
+        }
+    }
+
+    #[test]
+    fn single_bit_positions() {
+        // x=1 (bit 0 set) should appear at position 1 in the interleaved result
+        assert_eq!(interleave_bits(1, 0), 0b10);
+        // y=1 (bit 0 set) should appear at position 0
+        assert_eq!(interleave_bits(0, 1), 0b01);
+    }
+}
diff --git a/crates/jolt-instructions/src/lib.rs b/crates/jolt-instructions/src/lib.rs
new file mode 100644
index 000000000..3d4f2f0e8
--- /dev/null
+++ b/crates/jolt-instructions/src/lib.rs
@@ -0,0 +1,50 @@
+//! RISC-V instruction set definitions and lookup table decompositions
+//! for the Jolt zkVM proving system.
+//!
+//! This crate provides:
+//!
+//! - The [`Instruction`] trait: execution semantics, lookup table association, and flags.
+//! - The [`LookupTable`] trait: table materialization and MLE evaluation.
+//! - The [`Flags`] trait with [`CircuitFlags`] and [`InstructionFlags`] enums.
+//! - Concrete implementations of all RV64IMAC + virtual instructions.
+//! - The [`JoltInstructionSet`] registry for opcode-indexed dispatch.
+//! - Prefix/suffix sparse-dense decomposition for sub-linear MLE evaluation.
+//! - Bit-interleaving utilities for two-operand lookup indices.
+//!
+//! # Architecture
+//!
+//! Each instruction is a zero-sized unit struct implementing [`Instruction`]
+//! (which requires [`Flags`]). The `execute` method provides ground-truth
+//! computation. The `lookup_table` method declares which [`LookupTableKind`]
+//! the instruction decomposes into for the proving system.
+//!
+//! Flags are split into *static* (determined by instruction type) and *dynamic*
+//! (determined per-cycle by the runtime). This crate provides static flags;
+//! dynamic flags (`VirtualInstruction`, `IsCompressed`, `IsRdNotZero`, etc.)
+//! are applied by `jolt-zkvm` based on trace context.
+
+#[macro_use]
+mod macros;
+
+pub mod challenge_ops;
+pub mod flags;
+pub mod instruction_set;
+pub mod interleave;
+pub mod lookup_bits;
+pub mod opcodes;
+pub mod rv;
+pub mod tables;
+pub mod traits;
+pub mod virtual_;
+
+pub use challenge_ops::{ChallengeOps, FieldOps};
+pub use flags::{
+    CircuitFlags, Flags, InstructionFlags, InterleavedBitsMarker, NUM_CIRCUIT_FLAGS,
+    NUM_INSTRUCTION_FLAGS,
+};
+pub use instruction_set::JoltInstructionSet;
+pub use interleave::{interleave_bits, uninterleave_bits};
+pub use lookup_bits::LookupBits;
+pub use tables::prefixes::ALL_PREFIXES;
+pub use tables::{LookupTableKind, LookupTables};
+pub use traits::{Instruction, LookupTable};
diff --git a/crates/jolt-instructions/src/lookup_bits.rs b/crates/jolt-instructions/src/lookup_bits.rs
new file mode 100644
index 000000000..784c46296
--- /dev/null
+++ b/crates/jolt-instructions/src/lookup_bits.rs
@@ -0,0 +1,247 @@
+//! Compact bitvector type for lookup table index substrings.
+//!
+//! During the sumcheck protocol over lookup tables, indices are decomposed
+//! into prefix/suffix substrings. [`LookupBits`] represents these substrings
+//! as a compact 17-byte bitvector (vs 32 bytes for `u128`), which matters
+//! because millions of these are created during proving.
+
+use crate::uninterleave_bits;
+use std::fmt::Display;
+use std::ops::BitAnd;
+
+/// A bitvector representing a substring of a lookup index.
+///
+/// Stores up to 128 bits in a packed byte array with a length tag.
+/// The byte-array layout avoids the 16-byte alignment that `u128` requires,
+/// reducing struct size from 32 bytes to 17.
+#[derive(Clone, Copy, Debug)]
+pub struct LookupBits {
+    bytes: [u8; 16],
+    len: u8,
+}
+
+impl LookupBits {
+    /// Creates a new bitvector from the low `len` bits of `bits`.
+    ///
+    /// Bits beyond position `len` are masked off.
+    pub fn new(mut bits: u128, len: usize) -> Self {
+        debug_assert!(len <= 128);
+        if len < 128 {
+            bits %= 1 << len;
+        }
+        Self {
+            bytes: bits.to_le_bytes(),
+            len: len as u8,
+        }
+    }
+
+    /// Splits interleaved x/y bits into separate bitvectors.
+    ///
+    /// Even-position bits (from MSB perspective) become `x`,
+    /// odd-position bits become `y`.
+    pub fn uninterleave(&self) -> (Self, Self) {
+        let (x_bits, y_bits) = uninterleave_bits(u128::from_le_bytes(self.bytes));
+        let x = Self::new(x_bits as u128, (self.len / 2) as usize);
+        let y = Self::new(y_bits as u128, (self.len - x.len) as usize);
+        (x, y)
+    }
+
+    /// Splits into `(prefix, suffix)` where `suffix.len() == suffix_len`.
+    pub fn split(&self, suffix_len: usize) -> (Self, Self) {
+        let bits = u128::from_le_bytes(self.bytes);
+        let suffix_bits = bits % (1 << suffix_len);
+        let suffix = Self::new(suffix_bits, suffix_len);
+        let prefix_bits = bits >> suffix_len;
+        let prefix = Self::new(prefix_bits, self.len as usize - suffix_len);
+        (prefix, suffix)
+    }
+
+    /// Pops and returns the most significant bit, decrementing `len`.
+    pub fn pop_msb(&mut self) -> u8 {
+        let mut bits = u128::from_le_bytes(self.bytes);
+        let msb = (bits >> (self.len - 1)) & 1;
+        bits %= 1 << (self.len - 1);
+        self.bytes = bits.to_le_bytes();
+        self.len -= 1;
+        msb as u8
+    }
+
+    /// Number of bits in this bitvector.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.len as usize
+    }
+
+    /// Returns `true` if this bitvector is empty.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.len == 0
+    }
+
+    /// Number of trailing zero bits.
+    pub fn trailing_zeros(&self) -> u32 {
+        std::cmp::min(
+            u128::from_le_bytes(self.bytes).trailing_zeros(),
+            self.len as u32,
+        )
+    }
+
+    /// Number of leading one bits.
+    pub fn leading_ones(&self) -> u32 {
+        u128::from_le_bytes(self.bytes)
+            .wrapping_shl(128 - self.len as u32)
+            .leading_ones()
+    }
+
+    /// Returns the raw bits as `u128`.
+    #[inline]
+    fn as_u128(&self) -> u128 {
+        u128::from_le_bytes(self.bytes)
+    }
+}
+
+impl Display for LookupBits {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{:0width$b}", self.as_u128(), width = self.len as usize)
+    }
+}
+
+impl From<LookupBits> for u128 {
+    #[inline]
+    fn from(value: LookupBits) -> u128 {
+        value.as_u128()
+    }
+}
+
+impl From<LookupBits> for usize {
+    #[inline]
+    fn from(value: LookupBits) -> usize {
+        value.as_u128().try_into().unwrap()
+    }
+}
+
+impl From<LookupBits> for u32 {
+    #[inline]
+    fn from(value: LookupBits) -> u32 {
+        value.as_u128().try_into().unwrap()
+    }
+}
+
+impl From<LookupBits> for u64 {
+    #[inline]
+    fn from(value: LookupBits) -> u64 {
+        value.as_u128().try_into().unwrap()
+    }
+}
+
+impl From<&LookupBits> for u128 {
+    #[inline]
+    fn from(value: &LookupBits) -> u128 {
+        value.as_u128()
+    }
+}
+
+impl From<&LookupBits> for usize {
+    #[inline]
+    fn from(value: &LookupBits) -> usize {
+        value.as_u128().try_into().unwrap()
+    }
+}
+
+impl From<&LookupBits> for u32 {
+    #[inline]
+    fn from(value: &LookupBits) -> u32 {
+        value.as_u128().try_into().unwrap()
+    }
+}
+
+impl BitAnd<usize> for LookupBits {
+    type Output = usize;
+
+    #[inline]
+    fn bitand(self, rhs: usize) -> Self::Output {
+        let lhs = usize::from_le_bytes(self.bytes[0..size_of::<usize>()].try_into().unwrap());
+        lhs & rhs
+    }
+}
+
+impl PartialEq for LookupBits {
+    fn eq(&self, other: &Self) -> bool {
+        self.as_u128() == other.as_u128()
+    }
+}
+
+impl Eq for LookupBits {}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::interleave_bits;
+
+    #[test]
+    fn new_masks_excess_bits() {
+        let bits = LookupBits::new(0xFF, 4);
+        assert_eq!(u128::from(bits), 0x0F);
+        assert_eq!(bits.len(), 4);
+    }
+
+    #[test]
+    fn split_roundtrip() {
+        let bits = LookupBits::new(0b1101_0110, 8);
+        let (prefix, suffix) = bits.split(4);
+        assert_eq!(u128::from(prefix), 0b1101);
+        assert_eq!(u128::from(suffix), 0b0110);
+        assert_eq!(prefix.len(), 4);
+        assert_eq!(suffix.len(), 4);
+    }
+
+    #[test]
+    fn pop_msb_sequence() {
+        let mut bits = LookupBits::new(0b101, 3);
+        assert_eq!(bits.pop_msb(), 1);
+        assert_eq!(bits.len(), 2);
+        assert_eq!(bits.pop_msb(), 0);
+        assert_eq!(bits.len(), 1);
+        assert_eq!(bits.pop_msb(), 1);
+        assert_eq!(bits.len(), 0);
+    }
+
+    #[test]
+    fn uninterleave_matches_global() {
+        let x: u64 = 0xDEAD;
+        let y: u64 = 0xBEEF;
+        let interleaved = interleave_bits(x, y);
+        let bits = LookupBits::new(interleaved, 32);
+        let (bx, by) = bits.uninterleave();
+        assert_eq!(u64::from(bx), x);
+        assert_eq!(u64::from(by), y);
+    }
+
+    #[test]
+    fn trailing_zeros_and_leading_ones() {
+        let bits = LookupBits::new(0b1110_1000, 8);
+        assert_eq!(bits.trailing_zeros(), 3);
+        assert_eq!(bits.leading_ones(), 3);
+    }
+
+    #[test]
+    fn bitand_usize() {
+        let bits = LookupBits::new(0xFF, 8);
+        assert_eq!(bits & 0x0F, 0x0F);
+    }
+
+    #[test]
+    fn display_format() {
+        let bits = LookupBits::new(0b101, 4);
+        assert_eq!(format!("{bits}"), "0101");
+    }
+
+    #[test]
+    fn equality() {
+        let a = LookupBits::new(42, 8);
+        let b = LookupBits::new(42, 8);
+        let c = LookupBits::new(43, 8);
+        assert_eq!(a, b);
+        assert_ne!(a, c);
+    }
+}
diff --git a/crates/jolt-instructions/src/macros.rs b/crates/jolt-instructions/src/macros.rs
new file mode 100644
index 000000000..a84c232c1
--- /dev/null
+++ b/crates/jolt-instructions/src/macros.rs
@@ -0,0 +1,80 @@
+//! Internal macros for concise instruction definitions.
+
+/// Defines a unit-struct instruction with `Instruction` and `Flags` trait implementations.
+///
+/// # Syntax
+///
+/// ```ignore
+/// define_instruction!(
+///     /// Doc comment.
+///     Add, opcodes::ADD, "ADD",
+///     |x, y| x.wrapping_add(y),
+///     circuit: [AddOperands, WriteLookupOutputToRD],
+///     instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value],
+///     table: RangeCheck,
+/// );
+/// ```
+///
+/// All three trailing sections (`circuit`, `instruction`, `table`) are optional.
+/// Omitting `circuit` or `instruction` produces an all-false flag array.
+/// Omitting `table` makes `lookup_table()` return `None`.
+macro_rules! define_instruction {
+    (
+        $(#[$meta:meta])*
+        $name:ident, $opcode:expr, $label:expr,
+        |$x:ident, $y:ident| $body:expr
+        $(, circuit: [$($cflag:ident),* $(,)?])?
+        $(, instruction: [$($iflag:ident),* $(,)?])?
+        $(, table: $table:ident)?
+        $(,)?
+    ) => {
+        $(#[$meta])*
+        #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)]
+        #[derive(serde::Serialize, serde::Deserialize)]
+        pub struct $name;
+
+        impl $crate::Instruction for $name {
+            #[inline]
+            fn opcode(&self) -> u32 {
+                $opcode
+            }
+
+            #[inline]
+            fn name(&self) -> &'static str {
+                $label
+            }
+
+            #[inline]
+            fn execute(&self, $x: u64, $y: u64) -> u64 {
+                $body
+            }
+
+            #[inline]
+            fn lookup_table(&self) -> Option<$crate::LookupTableKind> {
+                define_instruction!(@table $($table)?)
+            }
+        }
+
+        impl $crate::Flags for $name {
+            #[inline]
+            fn circuit_flags(&self) -> [bool; $crate::NUM_CIRCUIT_FLAGS] {
+                #[allow(unused_mut)]
+                let mut flags = [false; $crate::NUM_CIRCUIT_FLAGS];
+                $($(flags[$crate::CircuitFlags::$cflag] = true;)*)?
+                flags
+            }
+
+            #[inline]
+            fn instruction_flags(&self) -> [bool; $crate::NUM_INSTRUCTION_FLAGS] {
+                #[allow(unused_mut)]
+                let mut flags = [false; $crate::NUM_INSTRUCTION_FLAGS];
+                $($(flags[$crate::InstructionFlags::$iflag] = true;)*)?
+                flags
+            }
+        }
+    };
+
+    // Internal: resolve optional table to Some(Kind) or None.
+    (@table $table:ident) => { Some($crate::LookupTableKind::$table) };
+    (@table) => { None };
+}
diff --git a/crates/jolt-instructions/src/opcodes.rs b/crates/jolt-instructions/src/opcodes.rs
new file mode 100644
index 000000000..aa59cf22d
--- /dev/null
+++ b/crates/jolt-instructions/src/opcodes.rs
@@ -0,0 +1,158 @@
+//! Sequential opcode assignments for all Jolt instructions.
+//!
+//! Opcodes are assigned contiguously starting from 0 to enable efficient
+//! array-based dispatch in the instruction set registry.
+
+// RV64I arithmetic
+pub const ADD: u32 = 0;
+pub const SUB: u32 = 1;
+pub const LUI: u32 = 2;
+pub const AUIPC: u32 = 3;
+
+// RV64M multiply/divide
+pub const MUL: u32 = 4;
+pub const MULH: u32 = 5;
+pub const MULHSU: u32 = 6;
+pub const MULHU: u32 = 7;
+pub const DIV: u32 = 8;
+pub const DIVU: u32 = 9;
+pub const REM: u32 = 10;
+pub const REMU: u32 = 11;
+
+// RV64I arithmetic W-suffix (32-bit on RV64)
+pub const ADDW: u32 = 12;
+pub const SUBW: u32 = 13;
+
+// RV64M W-suffix
+pub const MULW: u32 = 14;
+pub const DIVW: u32 = 15;
+pub const DIVUW: u32 = 16;
+pub const REMW: u32 = 17;
+pub const REMUW: u32 = 18;
+
+// RV64I logic
+pub const AND: u32 = 19;
+pub const OR: u32 = 20;
+pub const XOR: u32 = 21;
+pub const ANDI: u32 = 22;
+pub const ORI: u32 = 23;
+pub const XORI: u32 = 24;
+
+// RV64I shifts
+pub const SLL: u32 = 25;
+pub const SRL: u32 = 26;
+pub const SRA: u32 = 27;
+pub const SLLI: u32 = 28;
+pub const SRLI: u32 = 29;
+pub const SRAI: u32 = 30;
+
+// RV64I shifts W-suffix
+pub const SLLW: u32 = 31;
+pub const SRLW: u32 = 32;
+pub const SRAW: u32 = 33;
+pub const SLLIW: u32 = 34;
+pub const SRLIW: u32 = 35;
+pub const SRAIW: u32 = 36;
+
+// RV64I compare
+pub const SLT: u32 = 37;
+pub const SLTU: u32 = 38;
+pub const SLTI: u32 = 39;
+pub const SLTIU: u32 = 40;
+
+// RV64I branch
+pub const BEQ: u32 = 41;
+pub const BNE: u32 = 42;
+pub const BLT: u32 = 43;
+pub const BGE: u32 = 44;
+pub const BLTU: u32 = 45;
+pub const BGEU: u32 = 46;
+
+// RV64I load
+pub const LB: u32 = 47;
+pub const LBU: u32 = 48;
+pub const LH: u32 = 49;
+pub const LHU: u32 = 50;
+pub const LW: u32 = 51;
+pub const LWU: u32 = 52;
+pub const LD: u32 = 53;
+
+// RV64I store
+pub const SB: u32 = 54;
+pub const SH: u32 = 55;
+pub const SW: u32 = 56;
+pub const SD: u32 = 57;
+
+// RV64I system
+pub const ECALL: u32 = 58;
+pub const EBREAK: u32 = 59;
+pub const FENCE: u32 = 60;
+pub const NOOP: u32 = 61;
+
+// RV64I immediate aliases
+pub const ADDI: u32 = 62;
+pub const ADDIW: u32 = 63;
+
+// RV64I jump
+pub const JAL: u32 = 64;
+pub const JALR: u32 = 65;
+
+// Zbb extension
+pub const ANDN: u32 = 66;
+
+// Virtual arithmetic
+pub const ASSERT_EQ: u32 = 67;
+pub const ASSERT_LTE: u32 = 68;
+pub const POW2: u32 = 69;
+pub const MOVSIGN: u32 = 70;
+pub const VIRTUAL_POW2I: u32 = 71;
+pub const VIRTUAL_POW2W: u32 = 72;
+pub const VIRTUAL_POW2IW: u32 = 73;
+pub const VIRTUAL_MULI: u32 = 74;
+
+// Virtual assert
+pub const VIRTUAL_ASSERT_VALID_DIV0: u32 = 75;
+pub const VIRTUAL_ASSERT_VALID_UNSIGNED_REMAINDER: u32 = 76;
+pub const VIRTUAL_ASSERT_MULU_NO_OVERFLOW: u32 = 77;
+pub const VIRTUAL_ASSERT_WORD_ALIGNMENT: u32 = 78;
+pub const VIRTUAL_ASSERT_HALFWORD_ALIGNMENT: u32 = 79;
+
+// Virtual shift
+pub const VIRTUAL_SRL: u32 = 80;
+pub const VIRTUAL_SRLI: u32 = 81;
+pub const VIRTUAL_SRA: u32 = 82;
+pub const VIRTUAL_SRAI: u32 = 83;
+pub const VIRTUAL_SHIFT_RIGHT_BITMASK: u32 = 84;
+pub const VIRTUAL_SHIFT_RIGHT_BITMASKI: u32 = 85;
+pub const VIRTUAL_ROTRI: u32 = 86;
+pub const VIRTUAL_ROTRIW: u32 = 87;
+
+// Virtual division
+pub const VIRTUAL_CHANGE_DIVISOR: u32 = 88;
+pub const VIRTUAL_CHANGE_DIVISOR_W: u32 = 89;
+
+// Virtual extension
+pub const VIRTUAL_SIGN_EXTEND_WORD: u32 = 90;
+pub const VIRTUAL_ZERO_EXTEND_WORD: u32 = 91;
+
+// Virtual XOR-rotate (SHA)
+pub const VIRTUAL_XORROT32: u32 = 92;
+pub const VIRTUAL_XORROT24: u32 = 93;
+pub const VIRTUAL_XORROT16: u32 = 94;
+pub const VIRTUAL_XORROT63: u32 = 95;
+pub const VIRTUAL_XORROTW16: u32 = 96;
+pub const VIRTUAL_XORROTW12: u32 = 97;
+pub const VIRTUAL_XORROTW8: u32 = 98;
+pub const VIRTUAL_XORROTW7: u32 = 99;
+
+// Virtual byte manipulation
+pub const VIRTUAL_REV8W: u32 = 100;
+
+// Virtual advice/IO
+pub const VIRTUAL_ADVICE: u32 = 101;
+pub const VIRTUAL_ADVICE_LEN: u32 = 102;
+pub const VIRTUAL_ADVICE_LOAD: u32 = 103;
+pub const VIRTUAL_HOST_IO: u32 = 104;
+
+/// Total number of opcodes in the instruction set.
+pub const COUNT: u32 = 105;
diff --git a/crates/jolt-instructions/src/rv/arithmetic.rs b/crates/jolt-instructions/src/rv/arithmetic.rs
new file mode 100644
index 000000000..5fa2c049e
--- /dev/null
+++ b/crates/jolt-instructions/src/rv/arithmetic.rs
@@ -0,0 +1,461 @@
+//! RV64I/M arithmetic instructions: ADD, SUB, LUI, AUIPC, and
+//! the M-extension multiply/divide family.
+
+use crate::opcodes;
+
+define_instruction!(
+    /// RV64I ADD: `rd = rs1 + rs2` (wrapping).
+    Add, opcodes::ADD, "ADD",
+    |x, y| x.wrapping_add(y),
+    circuit: [AddOperands, WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value],
+    table: RangeCheck,
+);
+
+define_instruction!(
+    /// RV64I ADDI: `rd = rs1 + imm` (wrapping). Immediate already decoded.
+    Addi, opcodes::ADDI, "ADDI",
+    |x, y| x.wrapping_add(y),
+    circuit: [AddOperands, WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsImm],
+    table: RangeCheck,
+);
+
+define_instruction!(
+    /// RV64I SUB: `rd = rs1 - rs2` (wrapping).
+    Sub, opcodes::SUB, "SUB",
+    |x, y| x.wrapping_sub(y),
+    circuit: [SubtractOperands, WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value],
+    table: RangeCheck,
+);
+
+define_instruction!(
+    /// RV64I LUI: load upper immediate. Result is the immediate value itself.
+    Lui, opcodes::LUI, "LUI",
+    |x, _y| x,
+    circuit: [AddOperands, WriteLookupOutputToRD],
+    instruction: [RightOperandIsImm],
+    table: RangeCheck,
+);
+
+define_instruction!(
+    /// RV64I AUIPC: add upper immediate to PC. `rd = PC + imm`.
+    Auipc, opcodes::AUIPC, "AUIPC",
+    |x, y| x.wrapping_add(y),
+    circuit: [AddOperands, WriteLookupOutputToRD],
+    instruction: [LeftOperandIsPC, RightOperandIsImm],
+    table: RangeCheck,
+);
+
+define_instruction!(
+    /// RV64M MUL: signed multiply, lower 64 bits of the 128-bit product.
+    Mul, opcodes::MUL, "MUL",
+    |x, y| x.wrapping_mul(y),
+    circuit: [MultiplyOperands, WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value],
+    table: RangeCheck,
+);
+
+/// RV64M MULH: signed×signed multiply, upper 64 bits.
+#[derive(
+    Clone, Copy, Debug, Default, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize,
+)]
+pub struct MulH;
+
+impl crate::Instruction for MulH {
+    #[inline]
+    fn opcode(&self) -> u32 {
+        opcodes::MULH
+    }
+    #[inline]
+    fn name(&self) -> &'static str {
+        "MULH"
+    }
+    #[inline]
+    fn execute(&self, x: u64, y: u64) -> u64 {
+        let product = (x as i64 as i128).wrapping_mul(y as i64 as i128);
+        (product >> 64) as u64
+    }
+    #[inline]
+    fn lookup_table(&self) -> Option<crate::LookupTableKind> {
+        None
+    }
+}
+
+impl crate::Flags for MulH {
+    #[inline]
+    fn circuit_flags(&self) -> [bool; crate::NUM_CIRCUIT_FLAGS] {
+        [false; crate::NUM_CIRCUIT_FLAGS]
+    }
+    #[inline]
+    fn instruction_flags(&self) -> [bool; crate::NUM_INSTRUCTION_FLAGS] {
+        let mut flags = [false; crate::NUM_INSTRUCTION_FLAGS];
+        flags[crate::InstructionFlags::LeftOperandIsRs1Value] = true;
+        flags[crate::InstructionFlags::RightOperandIsRs2Value] = true;
+        flags
+    }
+}
+
+/// RV64M MULHSU: signed×unsigned multiply, upper 64 bits.
+#[derive(
+    Clone, Copy, Debug, Default, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize,
+)]
+pub struct MulHSU;
+
+impl crate::Instruction for MulHSU {
+    #[inline]
+    fn opcode(&self) -> u32 {
+        opcodes::MULHSU
+    }
+    #[inline]
+    fn name(&self) -> &'static str {
+        "MULHSU"
+    }
+    #[inline]
+    fn execute(&self, x: u64, y: u64) -> u64 {
+        let product = (x as i64 as i128).wrapping_mul(y as u128 as i128);
+        (product >> 64) as u64
+    }
+    #[inline]
+    fn lookup_table(&self) -> Option<crate::LookupTableKind> {
+        None
+    }
+}
+
+impl crate::Flags for MulHSU {
+    #[inline]
+    fn circuit_flags(&self) -> [bool; crate::NUM_CIRCUIT_FLAGS] {
+        [false; crate::NUM_CIRCUIT_FLAGS]
+    }
+    #[inline]
+    fn instruction_flags(&self) -> [bool; crate::NUM_INSTRUCTION_FLAGS] {
+        let mut flags = [false; crate::NUM_INSTRUCTION_FLAGS];
+        flags[crate::InstructionFlags::LeftOperandIsRs1Value] = true;
+        flags[crate::InstructionFlags::RightOperandIsRs2Value] = true;
+        flags
+    }
+}
+
+/// RV64M MULHU: unsigned×unsigned multiply, upper 64 bits.
+#[derive(
+    Clone, Copy, Debug, Default, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize,
+)]
+pub struct MulHU;
+
+impl crate::Instruction for MulHU {
+    #[inline]
+    fn opcode(&self) -> u32 {
+        opcodes::MULHU
+    }
+    #[inline]
+    fn name(&self) -> &'static str {
+        "MULHU"
+    }
+    #[inline]
+    fn execute(&self, x: u64, y: u64) -> u64 {
+        let product = (x as u128).wrapping_mul(y as u128);
+        (product >> 64) as u64
+    }
+    #[inline]
+    fn lookup_table(&self) -> Option<crate::LookupTableKind> {
+        Some(crate::LookupTableKind::UpperWord)
+    }
+}
+
+impl crate::Flags for MulHU {
+    #[inline]
+    fn circuit_flags(&self) -> [bool; crate::NUM_CIRCUIT_FLAGS] {
+        let mut flags = [false; crate::NUM_CIRCUIT_FLAGS];
+        flags[crate::CircuitFlags::MultiplyOperands] = true;
+        flags[crate::CircuitFlags::WriteLookupOutputToRD] = true;
+        flags
+    }
+    #[inline]
+    fn instruction_flags(&self) -> [bool; crate::NUM_INSTRUCTION_FLAGS] {
+        let mut flags = [false; crate::NUM_INSTRUCTION_FLAGS];
+        flags[crate::InstructionFlags::LeftOperandIsRs1Value] = true;
+        flags[crate::InstructionFlags::RightOperandIsRs2Value] = true;
+        flags
+    }
+}
+
+/// RV64M DIV: signed division with RISC-V overflow handling.
+///
+/// Special cases per the RISC-V spec:
+/// - Division by zero returns `u64::MAX` (all bits set, i.e. -1 unsigned).
+/// - `i64::MIN / -1` returns `i64::MIN` (overflow wraps).
+#[derive(
+    Clone, Copy, Debug, Default, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize,
+)]
+pub struct Div;
+
+impl crate::Instruction for Div {
+    #[inline]
+    fn opcode(&self) -> u32 {
+        opcodes::DIV
+    }
+    #[inline]
+    fn name(&self) -> &'static str {
+        "DIV"
+    }
+    #[inline]
+    fn execute(&self, x: u64, y: u64) -> u64 {
+        let sx = x as i64;
+        let sy = y as i64;
+        if sy == 0 {
+            u64::MAX
+        } else if sx == i64::MIN && sy == -1 {
+            sx as u64
+        } else {
+            sx.wrapping_div(sy) as u64
+        }
+    }
+    #[inline]
+    fn lookup_table(&self) -> Option<crate::LookupTableKind> {
+        None
+    }
+}
+
+impl crate::Flags for Div {
+    #[inline]
+    fn circuit_flags(&self) -> [bool; crate::NUM_CIRCUIT_FLAGS] {
+        [false; crate::NUM_CIRCUIT_FLAGS]
+    }
+    #[inline]
+    fn instruction_flags(&self) -> [bool; crate::NUM_INSTRUCTION_FLAGS] {
+        let mut flags = [false; crate::NUM_INSTRUCTION_FLAGS];
+        flags[crate::InstructionFlags::LeftOperandIsRs1Value] = true;
+        flags[crate::InstructionFlags::RightOperandIsRs2Value] = true;
+        flags
+    }
+}
+
+/// RV64M DIVU: unsigned division. Returns `u64::MAX` on division by zero.
+#[derive(
+    Clone, Copy, Debug, Default, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize,
+)]
+pub struct DivU;
+
+impl crate::Instruction for DivU {
+    #[inline]
+    fn opcode(&self) -> u32 {
+        opcodes::DIVU
+    }
+    #[inline]
+    fn name(&self) -> &'static str {
+        "DIVU"
+    }
+    #[inline]
+    fn execute(&self, x: u64, y: u64) -> u64 {
+        if y == 0 {
+            u64::MAX
+        } else {
+            x / y
+        }
+    }
+    #[inline]
+    fn lookup_table(&self) -> Option<crate::LookupTableKind> {
+        None
+    }
+}
+
+impl crate::Flags for DivU {
+    #[inline]
+    fn circuit_flags(&self) -> [bool; crate::NUM_CIRCUIT_FLAGS] {
+        [false; crate::NUM_CIRCUIT_FLAGS]
+    }
+    #[inline]
+    fn instruction_flags(&self) -> [bool; crate::NUM_INSTRUCTION_FLAGS] {
+        let mut flags = [false; crate::NUM_INSTRUCTION_FLAGS];
+        flags[crate::InstructionFlags::LeftOperandIsRs1Value] = true;
+        flags[crate::InstructionFlags::RightOperandIsRs2Value] = true;
+        flags
+    }
+}
+
+/// RV64M REM: signed remainder. Returns `x` on division by zero,
+/// returns 0 when `x == i64::MIN && y == -1`.
+#[derive(
+    Clone, Copy, Debug, Default, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize,
+)]
+pub struct Rem;
+
+impl crate::Instruction for Rem {
+    #[inline]
+    fn opcode(&self) -> u32 {
+        opcodes::REM
+    }
+    #[inline]
+    fn name(&self) -> &'static str {
+        "REM"
+    }
+    #[inline]
+    fn execute(&self, x: u64, y: u64) -> u64 {
+        let sx = x as i64;
+        let sy = y as i64;
+        if sy == 0 {
+            x
+        } else if sx == i64::MIN && sy == -1 {
+            0
+        } else {
+            sx.wrapping_rem(sy) as u64
+        }
+    }
+    #[inline]
+    fn lookup_table(&self) -> Option<crate::LookupTableKind> {
+        None
+    }
+}
+
+impl crate::Flags for Rem {
+    #[inline]
+    fn circuit_flags(&self) -> [bool; crate::NUM_CIRCUIT_FLAGS] {
+        [false; crate::NUM_CIRCUIT_FLAGS]
+    }
+    #[inline]
+    fn instruction_flags(&self) -> [bool; crate::NUM_INSTRUCTION_FLAGS] {
+        let mut flags = [false; crate::NUM_INSTRUCTION_FLAGS];
+        flags[crate::InstructionFlags::LeftOperandIsRs1Value] = true;
+        flags[crate::InstructionFlags::RightOperandIsRs2Value] = true;
+        flags
+    }
+}
+
+/// RV64M REMU: unsigned remainder. Returns `x` on division by zero.
+#[derive(
+    Clone, Copy, Debug, Default, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize,
+)]
+pub struct RemU;
+
+impl crate::Instruction for RemU {
+    #[inline]
+    fn opcode(&self) -> u32 {
+        opcodes::REMU
+    }
+    #[inline]
+    fn name(&self) -> &'static str {
+        "REMU"
+    }
+    #[inline]
+    fn execute(&self, x: u64, y: u64) -> u64 {
+        if y == 0 {
+            x
+        } else {
+            x % y
+        }
+    }
+    #[inline]
+    fn lookup_table(&self) -> Option<crate::LookupTableKind> {
+        None
+    }
+}
+
+impl crate::Flags for RemU {
+    #[inline]
+    fn circuit_flags(&self) -> [bool; crate::NUM_CIRCUIT_FLAGS] {
+        [false; crate::NUM_CIRCUIT_FLAGS]
+    }
+    #[inline]
+    fn instruction_flags(&self) -> [bool; crate::NUM_INSTRUCTION_FLAGS] {
+        let mut flags = [false; crate::NUM_INSTRUCTION_FLAGS];
+        flags[crate::InstructionFlags::LeftOperandIsRs1Value] = true;
+        flags[crate::InstructionFlags::RightOperandIsRs2Value] = true;
+        flags
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::Instruction;
+
+    #[test]
+    fn add_basic() {
+        assert_eq!(Add.execute(3, 5), 8);
+        assert_eq!(Add.execute(u64::MAX, 1), 0); // wrapping
+    }
+
+    #[test]
+    fn sub_basic() {
+        assert_eq!(Sub.execute(10, 3), 7);
+        assert_eq!(Sub.execute(0, 1), u64::MAX); // wrapping
+    }
+
+    #[test]
+    fn mul_lower_bits() {
+        assert_eq!(Mul.execute(6, 7), 42);
+        assert_eq!(Mul.execute(u64::MAX, 2), u64::MAX - 1); // wrapping
+    }
+
+    #[test]
+    fn mulh_upper_signed() {
+        // 2^63 * 2 signed: (i64::MIN as i128) * 2 = -2^64, upper = -1
+        let x = i64::MIN as u64;
+        assert_eq!(MulH.execute(x, 2), u64::MAX); // -1 in two's complement
+
+        // Small positive numbers: upper bits are 0
+        assert_eq!(MulH.execute(100, 200), 0);
+    }
+
+    #[test]
+    fn mulhu_upper_unsigned() {
+        assert_eq!(MulHU.execute(u64::MAX, 2), 1);
+        assert_eq!(MulHU.execute(100, 200), 0);
+    }
+
+    #[test]
+    fn mulhsu_mixed() {
+        // -1 (signed) * 2 (unsigned) = -2 as i128, upper = -1
+        assert_eq!(MulHSU.execute(u64::MAX, 2), u64::MAX);
+        assert_eq!(MulHSU.execute(100, 200), 0);
+    }
+
+    #[test]
+    fn div_signed() {
+        assert_eq!(Div.execute(20u64, 3u64), (20i64 / 3) as u64);
+        // Negative: -20 / 3 = -6 (truncated toward zero)
+        assert_eq!(Div.execute((-20i64) as u64, 3u64), (-6i64) as u64);
+    }
+
+    #[test]
+    fn div_by_zero() {
+        assert_eq!(Div.execute(42, 0), u64::MAX);
+        assert_eq!(DivU.execute(42, 0), u64::MAX);
+    }
+
+    #[test]
+    fn div_overflow() {
+        assert_eq!(
+            Div.execute(i64::MIN as u64, (-1i64) as u64),
+            i64::MIN as u64
+        );
+    }
+
+    #[test]
+    fn rem_signed() {
+        assert_eq!(Rem.execute(20, 3), (20i64 % 3) as u64);
+        assert_eq!(Rem.execute((-20i64) as u64, 3), (-2i64) as u64);
+    }
+
+    #[test]
+    fn rem_by_zero() {
+        assert_eq!(Rem.execute(42, 0), 42);
+        assert_eq!(RemU.execute(42, 0), 42);
+    }
+
+    #[test]
+    fn rem_overflow() {
+        assert_eq!(Rem.execute(i64::MIN as u64, (-1i64) as u64), 0);
+    }
+
+    #[test]
+    fn lui_passthrough() {
+        assert_eq!(Lui.execute(0xDEAD_0000, 999), 0xDEAD_0000);
+    }
+
+    #[test]
+    fn auipc_add() {
+        assert_eq!(Auipc.execute(0x1000, 0x2000), 0x3000);
+    }
+}
diff --git a/crates/jolt-instructions/src/rv/arithmetic_w.rs b/crates/jolt-instructions/src/rv/arithmetic_w.rs
new file mode 100644
index 000000000..f5c2df4dc
--- /dev/null
+++ b/crates/jolt-instructions/src/rv/arithmetic_w.rs
@@ -0,0 +1,285 @@
+//! RV64 W-suffix arithmetic instructions operating on the lower 32 bits
+//! with sign-extension of the result to 64 bits.
+
+use crate::opcodes;
+
+define_instruction!(
+    /// RV64I ADDW: 32-bit add, sign-extended to 64 bits.
+    AddW, opcodes::ADDW, "ADDW",
+    |x, y| (x as i32).wrapping_add(y as i32) as i64 as u64,
+    circuit: [AddOperands, WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value],
+);
+
+define_instruction!(
+    /// RV64I ADDIW: 32-bit add immediate, sign-extended to 64 bits.
+    AddiW, opcodes::ADDIW, "ADDIW",
+    |x, y| (x as i32).wrapping_add(y as i32) as i64 as u64,
+    circuit: [AddOperands, WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsImm],
+);
+
+define_instruction!(
+    /// RV64I SUBW: 32-bit subtract, sign-extended to 64 bits.
+    SubW, opcodes::SUBW, "SUBW",
+    |x, y| (x as i32).wrapping_sub(y as i32) as i64 as u64,
+    circuit: [SubtractOperands, WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value],
+);
+
+define_instruction!(
+    /// RV64M MULW: 32-bit multiply, sign-extended to 64 bits.
+    MulW, opcodes::MULW, "MULW",
+    |x, y| (x as i32).wrapping_mul(y as i32) as i64 as u64,
+    circuit: [MultiplyOperands, WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value],
+);
+
+/// RV64M DIVW: 32-bit signed division, sign-extended to 64 bits.
+///
+/// Division by zero returns `u64::MAX`. Overflow (`i32::MIN / -1`) returns `i32::MIN` sign-extended.
+#[derive(
+    Clone, Copy, Debug, Default, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize,
+)]
+pub struct DivW;
+
+impl crate::Instruction for DivW {
+    #[inline]
+    fn opcode(&self) -> u32 {
+        opcodes::DIVW
+    }
+    #[inline]
+    fn name(&self) -> &'static str {
+        "DIVW"
+    }
+    #[inline]
+    fn execute(&self, x: u64, y: u64) -> u64 {
+        let sx = x as i32;
+        let sy = y as i32;
+        if sy == 0 {
+            u64::MAX
+        } else if sx == i32::MIN && sy == -1 {
+            sx as i64 as u64
+        } else {
+            sx.wrapping_div(sy) as i64 as u64
+        }
+    }
+    #[inline]
+    fn lookup_table(&self) -> Option<crate::LookupTableKind> {
+        None
+    }
+}
+
+impl crate::Flags for DivW {
+    #[inline]
+    fn circuit_flags(&self) -> [bool; crate::NUM_CIRCUIT_FLAGS] {
+        [false; crate::NUM_CIRCUIT_FLAGS]
+    }
+    #[inline]
+    fn instruction_flags(&self) -> [bool; crate::NUM_INSTRUCTION_FLAGS] {
+        let mut flags = [false; crate::NUM_INSTRUCTION_FLAGS];
+        flags[crate::InstructionFlags::LeftOperandIsRs1Value] = true;
+        flags[crate::InstructionFlags::RightOperandIsRs2Value] = true;
+        flags
+    }
+}
+
+/// RV64M DIVUW: 32-bit unsigned division, sign-extended to 64 bits.
+/// Returns `u64::MAX` on division by zero.
+#[derive(
+    Clone, Copy, Debug, Default, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize,
+)]
+pub struct DivUW;
+
+impl crate::Instruction for DivUW {
+    #[inline]
+    fn opcode(&self) -> u32 {
+        opcodes::DIVUW
+    }
+    #[inline]
+    fn name(&self) -> &'static str {
+        "DIVUW"
+    }
+    #[inline]
+    fn execute(&self, x: u64, y: u64) -> u64 {
+        let ux = x as u32;
+        let uy = y as u32;
+        if uy == 0 {
+            u64::MAX
+        } else {
+            (ux / uy) as i32 as i64 as u64
+        }
+    }
+    #[inline]
+    fn lookup_table(&self) -> Option<crate::LookupTableKind> {
+        None
+    }
+}
+
+impl crate::Flags for DivUW {
+    #[inline]
+    fn circuit_flags(&self) -> [bool; crate::NUM_CIRCUIT_FLAGS] {
+        [false; crate::NUM_CIRCUIT_FLAGS]
+    }
+    #[inline]
+    fn instruction_flags(&self) -> [bool; crate::NUM_INSTRUCTION_FLAGS] {
+        let mut flags = [false; crate::NUM_INSTRUCTION_FLAGS];
+        flags[crate::InstructionFlags::LeftOperandIsRs1Value] = true;
+        flags[crate::InstructionFlags::RightOperandIsRs2Value] = true;
+        flags
+    }
+}
+
+/// RV64M REMW: 32-bit signed remainder, sign-extended to 64 bits.
+/// Returns `x` (truncated to 32 bits, sign-extended) on division by zero.
+#[derive(
+    Clone, Copy, Debug, Default, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize,
+)]
+pub struct RemW;
+
+impl crate::Instruction for RemW {
+    #[inline]
+    fn opcode(&self) -> u32 {
+        opcodes::REMW
+    }
+    #[inline]
+    fn name(&self) -> &'static str {
+        "REMW"
+    }
+    #[inline]
+    fn execute(&self, x: u64, y: u64) -> u64 {
+        let sx = x as i32;
+        let sy = y as i32;
+        if sy == 0 {
+            sx as i64 as u64
+        } else if sx == i32::MIN && sy == -1 {
+            0
+        } else {
+            sx.wrapping_rem(sy) as i64 as u64
+        }
+    }
+    #[inline]
+    fn lookup_table(&self) -> Option<crate::LookupTableKind> {
+        None
+    }
+}
+
+impl crate::Flags for RemW {
+    #[inline]
+    fn circuit_flags(&self) -> [bool; crate::NUM_CIRCUIT_FLAGS] {
+        [false; crate::NUM_CIRCUIT_FLAGS]
+    }
+    #[inline]
+    fn instruction_flags(&self) -> [bool; crate::NUM_INSTRUCTION_FLAGS] {
+        let mut flags = [false; crate::NUM_INSTRUCTION_FLAGS];
+        flags[crate::InstructionFlags::LeftOperandIsRs1Value] = true;
+        flags[crate::InstructionFlags::RightOperandIsRs2Value] = true;
+        flags
+    }
+}
+
+/// RV64M REMUW: 32-bit unsigned remainder, sign-extended to 64 bits.
+/// Returns `x` (truncated to 32 bits, sign-extended) on division by zero.
+#[derive(
+    Clone, Copy, Debug, Default, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize,
+)]
+pub struct RemUW;
+
+impl crate::Instruction for RemUW {
+    #[inline]
+    fn opcode(&self) -> u32 {
+        opcodes::REMUW
+    }
+    #[inline]
+    fn name(&self) -> &'static str {
+        "REMUW"
+    }
+    #[inline]
+    fn execute(&self, x: u64, y: u64) -> u64 {
+        let ux = x as u32;
+        let uy = y as u32;
+        if uy == 0 {
+            ux as i32 as i64 as u64
+        } else {
+            (ux % uy) as i32 as i64 as u64
+        }
+    }
+    #[inline]
+    fn lookup_table(&self) -> Option<crate::LookupTableKind> {
+        None
+    }
+}
+
+impl crate::Flags for RemUW {
+    #[inline]
+    fn circuit_flags(&self) -> [bool; crate::NUM_CIRCUIT_FLAGS] {
+        [false; crate::NUM_CIRCUIT_FLAGS]
+    }
+    #[inline]
+    fn instruction_flags(&self) -> [bool; crate::NUM_INSTRUCTION_FLAGS] {
+        let mut flags = [false; crate::NUM_INSTRUCTION_FLAGS];
+        flags[crate::InstructionFlags::LeftOperandIsRs1Value] = true;
+        flags[crate::InstructionFlags::RightOperandIsRs2Value] = true;
+        flags
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::Instruction;
+
+    #[test]
+    fn addw_sign_extends() {
+        // 0x7FFF_FFFF + 1 = 0x8000_0000 as i32 = -2147483648, sign-extended
+        let result = AddW.execute(0x7FFF_FFFF, 1);
+        assert_eq!(result, 0xFFFF_FFFF_8000_0000);
+    }
+
+    #[test]
+    fn subw_basic() {
+        assert_eq!(SubW.execute(10, 3), 7);
+        assert_eq!(SubW.execute(0, 1), 0xFFFF_FFFF_FFFF_FFFF); // -1 sign-extended
+    }
+
+    #[test]
+    fn mulw_basic() {
+        assert_eq!(MulW.execute(6, 7), 42);
+    }
+
+    #[test]
+    fn divw_by_zero() {
+        assert_eq!(DivW.execute(42, 0), u64::MAX);
+    }
+
+    #[test]
+    fn divw_overflow() {
+        assert_eq!(
+            DivW.execute(i32::MIN as u64, (-1i32) as u64),
+            i32::MIN as i64 as u64
+        );
+    }
+
+    #[test]
+    fn divuw_basic() {
+        assert_eq!(DivUW.execute(10, 3), 3);
+        assert_eq!(DivUW.execute(10, 0), u64::MAX);
+    }
+
+    #[test]
+    fn remw_basic() {
+        assert_eq!(RemW.execute(10, 3), 1);
+        assert_eq!(RemW.execute(10, 0), 10);
+    }
+
+    #[test]
+    fn remuw_basic() {
+        assert_eq!(RemUW.execute(10, 3), 1);
+        assert_eq!(RemUW.execute(10, 0), 10);
+    }
+
+    #[test]
+    fn remw_overflow() {
+        assert_eq!(RemW.execute(i32::MIN as u64, (-1i32) as u64), 0);
+    }
+}
diff --git a/crates/jolt-instructions/src/rv/branch.rs b/crates/jolt-instructions/src/rv/branch.rs
new file mode 100644
index 000000000..1be35ddc0
--- /dev/null
+++ b/crates/jolt-instructions/src/rv/branch.rs
@@ -0,0 +1,87 @@
+//! RV64I conditional branch instructions.
+//!
+//! Each returns 1 if the branch condition is true, 0 otherwise.
+//! The actual PC update is handled by the VM, not the instruction itself.
+
+use crate::opcodes;
+
+define_instruction!(
+    /// RV64I BEQ: branch if equal. Returns 1 when `rs1 == rs2`.
+    Beq, opcodes::BEQ, "BEQ",
+    |x, y| u64::from(x == y),
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value, Branch],
+    table: Equal,
+);
+
+define_instruction!(
+    /// RV64I BNE: branch if not equal. Returns 1 when `rs1 != rs2`.
+    Bne, opcodes::BNE, "BNE",
+    |x, y| u64::from(x != y),
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value, Branch],
+    table: NotEqual,
+);
+
+define_instruction!(
+    /// RV64I BLT: branch if less than (signed).
+    Blt, opcodes::BLT, "BLT",
+    |x, y| u64::from((x as i64) < (y as i64)),
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value, Branch],
+    table: SignedLessThan,
+);
+
+define_instruction!(
+    /// RV64I BGE: branch if greater than or equal (signed).
+    Bge, opcodes::BGE, "BGE",
+    |x, y| u64::from((x as i64) >= (y as i64)),
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value, Branch],
+    table: SignedGreaterThanEqual,
+);
+
+define_instruction!(
+    /// RV64I BLTU: branch if less than (unsigned).
+    BltU, opcodes::BLTU, "BLTU",
+    |x, y| u64::from(x < y),
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value, Branch],
+    table: UnsignedLessThan,
+);
+
+define_instruction!(
+    /// RV64I BGEU: branch if greater than or equal (unsigned).
+    BgeU, opcodes::BGEU, "BGEU",
+    |x, y| u64::from(x >= y),
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value, Branch],
+    table: UnsignedGreaterThanEqual,
+);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::Instruction;
+
+    #[test]
+    fn beq_bne() {
+        assert_eq!(Beq.execute(5, 5), 1);
+        assert_eq!(Beq.execute(5, 6), 0);
+        assert_eq!(Bne.execute(5, 5), 0);
+        assert_eq!(Bne.execute(5, 6), 1);
+    }
+
+    #[test]
+    fn blt_bge_signed() {
+        let neg1 = (-1i64) as u64;
+        assert_eq!(Blt.execute(neg1, 1), 1);
+        assert_eq!(Blt.execute(1, neg1), 0);
+        assert_eq!(Bge.execute(neg1, 1), 0);
+        assert_eq!(Bge.execute(1, neg1), 1);
+        assert_eq!(Bge.execute(5, 5), 1);
+    }
+
+    #[test]
+    fn bltu_bgeu_unsigned() {
+        assert_eq!(BltU.execute(1, 2), 1);
+        assert_eq!(BltU.execute(2, 1), 0);
+        assert_eq!(BgeU.execute(2, 1), 1);
+        assert_eq!(BgeU.execute(1, 2), 0);
+        assert_eq!(BgeU.execute(3, 3), 1);
+    }
+}
diff --git a/crates/jolt-instructions/src/rv/compare.rs b/crates/jolt-instructions/src/rv/compare.rs
new file mode 100644
index 000000000..cb45aaaa1
--- /dev/null
+++ b/crates/jolt-instructions/src/rv/compare.rs
@@ -0,0 +1,66 @@
+//! RV64I comparison instructions that write 1 or 0 to the destination register.
+
+use crate::opcodes;
+
+define_instruction!(
+    /// RV64I SLT: set if less than (signed). `rd = (rs1 < rs2) ? 1 : 0`.
+    Slt, opcodes::SLT, "SLT",
+    |x, y| u64::from((x as i64) < (y as i64)),
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value],
+    table: SignedLessThan,
+);
+
+define_instruction!(
+    /// RV64I SLTI: set if less than immediate (signed).
+    SltI, opcodes::SLTI, "SLTI",
+    |x, y| u64::from((x as i64) < (y as i64)),
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsImm],
+    table: SignedLessThan,
+);
+
+define_instruction!(
+    /// RV64I SLTU: set if less than (unsigned). `rd = (rs1 < rs2) ? 1 : 0`.
+    SltU, opcodes::SLTU, "SLTU",
+    |x, y| u64::from(x < y),
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value],
+    table: UnsignedLessThan,
+);
+
+define_instruction!(
+    /// RV64I SLTIU: set if less than immediate (unsigned).
+    SltIU, opcodes::SLTIU, "SLTIU",
+    |x, y| u64::from(x < y),
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsImm],
+    table: UnsignedLessThan,
+);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::Instruction;
+
+    #[test]
+    fn slt_signed() {
+        assert_eq!(Slt.execute((-1i64) as u64, 1), 1);
+        assert_eq!(Slt.execute(1, (-1i64) as u64), 0);
+        assert_eq!(Slt.execute(5, 5), 0);
+    }
+
+    #[test]
+    fn sltu_unsigned() {
+        assert_eq!(SltU.execute(1, 2), 1);
+        assert_eq!(SltU.execute(2, 1), 0);
+        // -1 as u64 is MAX, so it's greater
+        assert_eq!(SltU.execute((-1i64) as u64, 1), 0);
+    }
+
+    #[test]
+    fn immediate_variants_match() {
+        assert_eq!(Slt.execute(3, 5), SltI.execute(3, 5));
+        assert_eq!(SltU.execute(3, 5), SltIU.execute(3, 5));
+    }
+}
diff --git a/crates/jolt-instructions/src/rv/jump.rs b/crates/jolt-instructions/src/rv/jump.rs
new file mode 100644
index 000000000..d06254fdd
--- /dev/null
+++ b/crates/jolt-instructions/src/rv/jump.rs
@@ -0,0 +1,51 @@
+//! RV64I jump instructions.
+
+use crate::opcodes;
+
+define_instruction!(
+    /// RV64I JAL: jump and link. `rd = PC + 4; PC = PC + imm`.
+    /// The execute function computes the jump target `PC + imm`.
+    Jal, opcodes::JAL, "JAL",
+    |x, y| x.wrapping_add(y),
+    circuit: [AddOperands, Jump],
+    instruction: [LeftOperandIsPC, RightOperandIsImm],
+    table: RangeCheck,
+);
+
+define_instruction!(
+    /// RV64I JALR: jump and link register. `rd = PC + 4; PC = (rs1 + imm) & !1`.
+    /// The execute function computes the jump target `(rs1 + imm) & !1`.
+    Jalr, opcodes::JALR, "JALR",
+    |x, y| x.wrapping_add(y) & !1,
+    circuit: [AddOperands, Jump],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsImm],
+    table: RangeCheckAligned,
+);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::Instruction;
+
+    #[test]
+    fn jal_basic() {
+        assert_eq!(Jal.execute(0x1000, 0x100), 0x1100);
+    }
+
+    #[test]
+    fn jal_wrapping() {
+        assert_eq!(Jal.execute(u64::MAX, 1), 0);
+    }
+
+    #[test]
+    fn jalr_aligns() {
+        // Result should clear bit 0
+        assert_eq!(Jalr.execute(0x1001, 0x100), 0x1100); // 0x1101 & !1 = 0x1100
+        assert_eq!(Jalr.execute(0x1000, 0x101), 0x1100); // 0x1101 & !1 = 0x1100
+    }
+
+    #[test]
+    fn jalr_even_unchanged() {
+        assert_eq!(Jalr.execute(0x1000, 0x100), 0x1100);
+    }
+}
diff --git a/crates/jolt-instructions/src/rv/load.rs b/crates/jolt-instructions/src/rv/load.rs
new file mode 100644
index 000000000..77ddffae7
--- /dev/null
+++ b/crates/jolt-instructions/src/rv/load.rs
@@ -0,0 +1,100 @@
+//! RV64I load instructions that extract and extend bytes from a memory word.
+//!
+//! In Jolt's execution model, `x` contains the loaded value from memory
+//! and the instruction performs sign/zero extension to 64 bits.
+
+use crate::opcodes;
+
+define_instruction!(
+    /// RV64I LB: load byte, sign-extended to 64 bits.
+    Lb, opcodes::LB, "LB",
+    |x, _y| (x as i8) as i64 as u64,
+    circuit: [Load],
+);
+
+define_instruction!(
+    /// RV64I LBU: load byte, zero-extended to 64 bits.
+    Lbu, opcodes::LBU, "LBU",
+    |x, _y| x & 0xFF,
+    circuit: [Load],
+);
+
+define_instruction!(
+    /// RV64I LH: load halfword (16 bits), sign-extended to 64 bits.
+    Lh, opcodes::LH, "LH",
+    |x, _y| (x as i16) as i64 as u64,
+    circuit: [Load],
+);
+
+define_instruction!(
+    /// RV64I LHU: load halfword, zero-extended to 64 bits.
+    Lhu, opcodes::LHU, "LHU",
+    |x, _y| x & 0xFFFF,
+    circuit: [Load],
+);
+
+define_instruction!(
+    /// RV64I LW: load word (32 bits), sign-extended to 64 bits.
+    Lw, opcodes::LW, "LW",
+    |x, _y| (x as i32) as i64 as u64,
+    circuit: [Load],
+);
+
+define_instruction!(
+    /// RV64I LWU: load word, zero-extended to 64 bits.
+    Lwu, opcodes::LWU, "LWU",
+    |x, _y| x & 0xFFFF_FFFF,
+    circuit: [Load],
+);
+
+define_instruction!(
+    /// RV64I LD: load doubleword (64 bits). Identity operation.
+    Ld, opcodes::LD, "LD",
+    |x, _y| x,
+    circuit: [Load],
+);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::Instruction;
+
+    #[test]
+    fn lb_sign_extends() {
+        assert_eq!(Lb.execute(0x80, 0), 0xFFFF_FFFF_FFFF_FF80); // -128
+        assert_eq!(Lb.execute(0x7F, 0), 0x7F); // +127
+    }
+
+    #[test]
+    fn lbu_zero_extends() {
+        assert_eq!(Lbu.execute(0x80, 0), 0x80);
+        assert_eq!(Lbu.execute(0xFF_FF, 0), 0xFF);
+    }
+
+    #[test]
+    fn lh_sign_extends() {
+        assert_eq!(Lh.execute(0x8000, 0), 0xFFFF_FFFF_FFFF_8000);
+        assert_eq!(Lh.execute(0x7FFF, 0), 0x7FFF);
+    }
+
+    #[test]
+    fn lhu_zero_extends() {
+        assert_eq!(Lhu.execute(0x8000, 0), 0x8000);
+    }
+
+    #[test]
+    fn lw_sign_extends() {
+        assert_eq!(Lw.execute(0x8000_0000, 0), 0xFFFF_FFFF_8000_0000);
+        assert_eq!(Lw.execute(0x7FFF_FFFF, 0), 0x7FFF_FFFF);
+    }
+
+    #[test]
+    fn lwu_zero_extends() {
+        assert_eq!(Lwu.execute(0x8000_0000, 0), 0x8000_0000);
+    }
+
+    #[test]
+    fn ld_identity() {
+        assert_eq!(Ld.execute(0xDEAD_BEEF_CAFE_BABE, 0), 0xDEAD_BEEF_CAFE_BABE);
+    }
+}
diff --git a/crates/jolt-instructions/src/rv/logic.rs b/crates/jolt-instructions/src/rv/logic.rs
new file mode 100644
index 000000000..3bd6835f7
--- /dev/null
+++ b/crates/jolt-instructions/src/rv/logic.rs
@@ -0,0 +1,104 @@
+//! RV64I bitwise logic instructions.
+
+use crate::opcodes;
+
+define_instruction!(
+    /// RV64I AND: bitwise AND of two registers.
+    And, opcodes::AND, "AND",
+    |x, y| x & y,
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value],
+    table: And,
+);
+
+define_instruction!(
+    /// RV64I ANDI: bitwise AND with sign-extended immediate.
+    AndI, opcodes::ANDI, "ANDI",
+    |x, y| x & y,
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsImm],
+    table: And,
+);
+
+define_instruction!(
+    /// RV64I OR: bitwise OR of two registers.
+    Or, opcodes::OR, "OR",
+    |x, y| x | y,
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value],
+    table: Or,
+);
+
+define_instruction!(
+    /// RV64I ORI: bitwise OR with sign-extended immediate.
+    OrI, opcodes::ORI, "ORI",
+    |x, y| x | y,
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsImm],
+    table: Or,
+);
+
+define_instruction!(
+    /// RV64I XOR: bitwise exclusive OR of two registers.
+    Xor, opcodes::XOR, "XOR",
+    |x, y| x ^ y,
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value],
+    table: Xor,
+);
+
+define_instruction!(
+    /// RV64I XORI: bitwise exclusive OR with sign-extended immediate.
+    XorI, opcodes::XORI, "XORI",
+    |x, y| x ^ y,
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsImm],
+    table: Xor,
+);
+
+define_instruction!(
+    /// Zbb ANDN: bitwise AND-NOT. `rd = rs1 & ~rs2`.
+    Andn, opcodes::ANDN, "ANDN",
+    |x, y| x & !y,
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value],
+    table: Andn,
+);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::Instruction;
+
+    #[test]
+    fn and_basic() {
+        assert_eq!(And.execute(0xFF00, 0x0FF0), 0x0F00);
+        assert_eq!(And.execute(u64::MAX, 0), 0);
+    }
+
+    #[test]
+    fn or_basic() {
+        assert_eq!(Or.execute(0xFF00, 0x00FF), 0xFFFF);
+        assert_eq!(Or.execute(0, 0), 0);
+    }
+
+    #[test]
+    fn xor_basic() {
+        assert_eq!(Xor.execute(0xFF, 0xFF), 0);
+        assert_eq!(Xor.execute(0xFF, 0x00), 0xFF);
+    }
+
+    #[test]
+    fn immediate_variants_match() {
+        assert_eq!(And.execute(0xAB, 0xCD), AndI.execute(0xAB, 0xCD));
+        assert_eq!(Or.execute(0xAB, 0xCD), OrI.execute(0xAB, 0xCD));
+        assert_eq!(Xor.execute(0xAB, 0xCD), XorI.execute(0xAB, 0xCD));
+    }
+
+    #[test]
+    fn andn_basic() {
+        assert_eq!(Andn.execute(0xFF, 0x0F), 0xF0);
+        assert_eq!(Andn.execute(0xFF, 0xFF), 0);
+        assert_eq!(Andn.execute(0xFF, 0), 0xFF);
+    }
+}
diff --git a/crates/jolt-instructions/src/rv/mod.rs b/crates/jolt-instructions/src/rv/mod.rs
new file mode 100644
index 000000000..b86cf6f1d
--- /dev/null
+++ b/crates/jolt-instructions/src/rv/mod.rs
@@ -0,0 +1,13 @@
+//! RISC-V instruction implementations for the RV64IMAC base ISA.
+
+pub mod arithmetic;
+pub mod arithmetic_w;
+pub mod branch;
+pub mod compare;
+pub mod jump;
+pub mod load;
+pub mod logic;
+pub mod shift;
+pub mod shift_w;
+pub mod store;
+pub mod system;
diff --git a/crates/jolt-instructions/src/rv/shift.rs b/crates/jolt-instructions/src/rv/shift.rs
new file mode 100644
index 000000000..d1c458bca
--- /dev/null
+++ b/crates/jolt-instructions/src/rv/shift.rs
@@ -0,0 +1,95 @@
+//! RV64I shift instructions operating on full 64-bit values.
+//! Shift amount is masked to 6 bits (0..63) per the RISC-V spec.
+
+use crate::opcodes;
+
+define_instruction!(
+    /// RV64I SLL: shift left logical. Shift amount from lower 6 bits of `y`.
+    Sll, opcodes::SLL, "SLL",
+    |x, y| x << (y & 63),
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value],
+);
+
+define_instruction!(
+    /// RV64I SLLI: shift left logical by immediate. Immediate already masked.
+    SllI, opcodes::SLLI, "SLLI",
+    |x, y| x << (y & 63),
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsImm],
+);
+
+define_instruction!(
+    /// RV64I SRL: shift right logical. Shift amount from lower 6 bits of `y`.
+    Srl, opcodes::SRL, "SRL",
+    |x, y| x >> (y & 63),
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value],
+);
+
+define_instruction!(
+    /// RV64I SRLI: shift right logical by immediate.
+    SrlI, opcodes::SRLI, "SRLI",
+    |x, y| x >> (y & 63),
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsImm],
+);
+
+define_instruction!(
+    /// RV64I SRA: shift right arithmetic. Preserves sign bit.
+    Sra, opcodes::SRA, "SRA",
+    |x, y| ((x as i64) >> (y & 63)) as u64,
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value],
+);
+
+define_instruction!(
+    /// RV64I SRAI: shift right arithmetic by immediate.
+    SraI, opcodes::SRAI, "SRAI",
+    |x, y| ((x as i64) >> (y & 63)) as u64,
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsImm],
+);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::Instruction;
+
+    #[test]
+    fn sll_basic() {
+        assert_eq!(Sll.execute(1, 10), 1024);
+        assert_eq!(Sll.execute(1, 63), 1 << 63);
+    }
+
+    #[test]
+    fn sll_masks_shift_amount() {
+        // Shift by 64 should wrap to shift by 0
+        assert_eq!(Sll.execute(1, 64), 1);
+    }
+
+    #[test]
+    fn srl_basic() {
+        assert_eq!(Srl.execute(1024, 10), 1);
+        assert_eq!(Srl.execute(u64::MAX, 63), 1);
+    }
+
+    #[test]
+    fn sra_sign_extends() {
+        let neg = (-1024i64) as u64;
+        let result = Sra.execute(neg, 4);
+        assert_eq!(result, (-64i64) as u64);
+    }
+
+    #[test]
+    fn sra_positive() {
+        assert_eq!(Sra.execute(1024, 4), 64);
+    }
+
+    #[test]
+    fn immediate_variants_match() {
+        assert_eq!(Sll.execute(42, 5), SllI.execute(42, 5));
+        assert_eq!(Srl.execute(42, 5), SrlI.execute(42, 5));
+        assert_eq!(Sra.execute(42, 5), SraI.execute(42, 5));
+    }
+}
diff --git a/crates/jolt-instructions/src/rv/shift_w.rs b/crates/jolt-instructions/src/rv/shift_w.rs
new file mode 100644
index 000000000..02d976869
--- /dev/null
+++ b/crates/jolt-instructions/src/rv/shift_w.rs
@@ -0,0 +1,90 @@
+//! RV64 W-suffix shift instructions operating on the lower 32 bits
+//! with sign-extension of the result to 64 bits.
+//! Shift amount is masked to 5 bits (0..31).
+
+use crate::opcodes;
+
+define_instruction!(
+    /// RV64I SLLW: 32-bit shift left logical, sign-extended to 64 bits.
+    SllW, opcodes::SLLW, "SLLW",
+    |x, y| ((x as u32) << (y & 31)) as i32 as i64 as u64,
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value],
+);
+
+define_instruction!(
+    /// RV64I SLLIW: 32-bit shift left logical by immediate, sign-extended.
+    SllIW, opcodes::SLLIW, "SLLIW",
+    |x, y| ((x as u32) << (y & 31)) as i32 as i64 as u64,
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsImm],
+);
+
+define_instruction!(
+    /// RV64I SRLW: 32-bit shift right logical, sign-extended to 64 bits.
+    SrlW, opcodes::SRLW, "SRLW",
+    |x, y| ((x as u32) >> (y & 31)) as i32 as i64 as u64,
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value],
+);
+
+define_instruction!(
+    /// RV64I SRLIW: 32-bit shift right logical by immediate, sign-extended.
+    SrlIW, opcodes::SRLIW, "SRLIW",
+    |x, y| ((x as u32) >> (y & 31)) as i32 as i64 as u64,
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsImm],
+);
+
+define_instruction!(
+    /// RV64I SRAW: 32-bit shift right arithmetic, sign-extended to 64 bits.
+    SraW, opcodes::SRAW, "SRAW",
+    |x, y| ((x as i32) >> (y & 31)) as i64 as u64,
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value],
+);
+
+define_instruction!(
+    /// RV64I SRAIW: 32-bit shift right arithmetic by immediate, sign-extended.
+    SraIW, opcodes::SRAIW, "SRAIW",
+    |x, y| ((x as i32) >> (y & 31)) as i64 as u64,
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsImm],
+);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::Instruction;
+
+    #[test]
+    fn sllw_sign_extends() {
+        // Shifting 1 left by 31 gives 0x8000_0000 which is negative as i32
+        let result = SllW.execute(1, 31);
+        assert_eq!(result, 0xFFFF_FFFF_8000_0000);
+    }
+
+    #[test]
+    fn sllw_masks_to_5_bits() {
+        assert_eq!(SllW.execute(1, 32), 1); // 32 & 31 = 0
+    }
+
+    #[test]
+    fn srlw_basic() {
+        assert_eq!(SrlW.execute(0x8000_0000, 31), 1);
+    }
+
+    #[test]
+    fn sraw_sign_extends() {
+        let neg = 0xFFFF_FFFF_8000_0000u64; // -2^31 sign-extended
+        let result = SraW.execute(neg, 1);
+        assert_eq!(result, 0xFFFF_FFFF_C000_0000); // -2^30 sign-extended
+    }
+
+    #[test]
+    fn immediate_variants_match() {
+        assert_eq!(SllW.execute(42, 5), SllIW.execute(42, 5));
+        assert_eq!(SrlW.execute(42, 5), SrlIW.execute(42, 5));
+        assert_eq!(SraW.execute(42, 5), SraIW.execute(42, 5));
+    }
+}
diff --git a/crates/jolt-instructions/src/rv/store.rs b/crates/jolt-instructions/src/rv/store.rs
new file mode 100644
index 000000000..8d092ff26
--- /dev/null
+++ b/crates/jolt-instructions/src/rv/store.rs
@@ -0,0 +1,60 @@
+//! RV64I store instructions that mask a value to the appropriate width.
+//!
+//! In Jolt's execution model, `x` is the value to store and the instruction
+//! truncates it to the target memory width.
+
+use crate::opcodes;
+
+define_instruction!(
+    /// RV64I SB: store byte (lowest 8 bits).
+    Sb, opcodes::SB, "SB",
+    |x, _y| x & 0xFF,
+    circuit: [Store],
+);
+
+define_instruction!(
+    /// RV64I SH: store halfword (lowest 16 bits).
+    Sh, opcodes::SH, "SH",
+    |x, _y| x & 0xFFFF,
+    circuit: [Store],
+);
+
+define_instruction!(
+    /// RV64I SW: store word (lowest 32 bits).
+    Sw, opcodes::SW, "SW",
+    |x, _y| x & 0xFFFF_FFFF,
+    circuit: [Store],
+);
+
+define_instruction!(
+    /// RV64I SD: store doubleword (full 64 bits). Identity operation.
+    Sd, opcodes::SD, "SD",
+    |x, _y| x,
+    circuit: [Store],
+);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::Instruction;
+
+    #[test]
+    fn sb_masks_to_byte() {
+        assert_eq!(Sb.execute(0xDEAD_BEEF, 0), 0xEF);
+    }
+
+    #[test]
+    fn sh_masks_to_halfword() {
+        assert_eq!(Sh.execute(0xDEAD_BEEF, 0), 0xBEEF);
+    }
+
+    #[test]
+    fn sw_masks_to_word() {
+        assert_eq!(Sw.execute(0xDEAD_BEEF_CAFE_BABE, 0), 0xCAFE_BABE);
+    }
+
+    #[test]
+    fn sd_identity() {
+        assert_eq!(Sd.execute(0xDEAD_BEEF_CAFE_BABE, 0), 0xDEAD_BEEF_CAFE_BABE);
+    }
+}
diff --git a/crates/jolt-instructions/src/rv/system.rs b/crates/jolt-instructions/src/rv/system.rs
new file mode 100644
index 000000000..dcc4583fd
--- /dev/null
+++ b/crates/jolt-instructions/src/rv/system.rs
@@ -0,0 +1,45 @@
+//! RV64I system and synchronization instructions.
+//!
+//! These instructions produce side effects (syscalls, memory fences) that
+//! are handled by the VM. Their `execute` returns 0 as a no-op.
+
+use crate::opcodes;
+
+define_instruction!(
+    /// RV64I ECALL: environment call (syscall). Returns 0.
+    Ecall, opcodes::ECALL, "ECALL",
+    |_x, _y| 0,
+);
+
+define_instruction!(
+    /// RV64I EBREAK: breakpoint trap. Returns 0.
+    Ebreak, opcodes::EBREAK, "EBREAK",
+    |_x, _y| 0,
+);
+
+define_instruction!(
+    /// RV64I FENCE: memory ordering fence. Returns 0.
+    Fence, opcodes::FENCE, "FENCE",
+    |_x, _y| 0,
+);
+
+define_instruction!(
+    /// No-operation pseudo-instruction. Returns 0.
+    Noop, opcodes::NOOP, "NOOP",
+    |_x, _y| 0,
+    instruction: [IsNoop],
+);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::Instruction;
+
+    #[test]
+    fn system_instructions_return_zero() {
+        assert_eq!(Ecall.execute(42, 99), 0);
+        assert_eq!(Ebreak.execute(42, 99), 0);
+        assert_eq!(Fence.execute(42, 99), 0);
+        assert_eq!(Noop.execute(42, 99), 0);
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/and.rs b/crates/jolt-instructions/src/tables/and.rs
new file mode 100644
index 000000000..9dc148790
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/and.rs
@@ -0,0 +1,45 @@
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::tables::prefixes::{PrefixEval, Prefixes};
+use crate::tables::suffixes::{SuffixEval, Suffixes};
+use crate::tables::PrefixSuffixDecomposition;
+use crate::traits::LookupTable;
+use crate::uninterleave_bits;
+
+#[derive(Copy, Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
+pub struct AndTable<const XLEN: usize>;
+
+impl<const XLEN: usize> LookupTable<XLEN> for AndTable<XLEN> {
+    fn materialize_entry(&self, index: u128) -> u64 {
+        let (x, y) = uninterleave_bits(index);
+        x & y
+    }
+
+    fn evaluate_mle<F, C>(&self, r: &[C]) -> F
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        debug_assert_eq!(r.len(), 2 * XLEN);
+        let mut result = F::zero();
+        for i in 0..XLEN {
+            let x_i = r[2 * i];
+            let y_i = r[2 * i + 1];
+            result += F::from_u64(1u64 << (XLEN - 1 - i)) * x_i * y_i;
+        }
+        result
+    }
+}
+
+impl<const XLEN: usize> PrefixSuffixDecomposition<XLEN> for AndTable<XLEN> {
+    fn suffixes(&self) -> Vec<Suffixes> {
+        vec![Suffixes::One, Suffixes::And]
+    }
+
+    fn combine<F: Field>(&self, prefixes: &[PrefixEval<F>], suffixes: &[SuffixEval<F>]) -> F {
+        let [one, and] = suffixes.try_into().unwrap();
+        prefixes[Prefixes::And] * one + and
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/andn.rs b/crates/jolt-instructions/src/tables/andn.rs
new file mode 100644
index 000000000..1e47abbe3
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/andn.rs
@@ -0,0 +1,45 @@
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::tables::prefixes::{PrefixEval, Prefixes};
+use crate::tables::suffixes::{SuffixEval, Suffixes};
+use crate::tables::PrefixSuffixDecomposition;
+use crate::traits::LookupTable;
+use crate::uninterleave_bits;
+
+#[derive(Copy, Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
+pub struct AndnTable<const XLEN: usize>;
+
+impl<const XLEN: usize> LookupTable<XLEN> for AndnTable<XLEN> {
+    fn materialize_entry(&self, index: u128) -> u64 {
+        let (x, y) = uninterleave_bits(index);
+        x & !y
+    }
+
+    fn evaluate_mle<F, C>(&self, r: &[C]) -> F
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        debug_assert_eq!(r.len(), 2 * XLEN);
+        let mut result = F::zero();
+        for i in 0..XLEN {
+            let x_i = r[2 * i];
+            let y_i = r[2 * i + 1];
+            result += F::from_u64(1u64 << (XLEN - 1 - i)) * x_i * (F::one() - y_i);
+        }
+        result
+    }
+}
+
+impl<const XLEN: usize> PrefixSuffixDecomposition<XLEN> for AndnTable<XLEN> {
+    fn suffixes(&self) -> Vec<Suffixes> {
+        vec![Suffixes::One, Suffixes::NotAnd]
+    }
+
+    fn combine<F: Field>(&self, prefixes: &[PrefixEval<F>], suffixes: &[SuffixEval<F>]) -> F {
+        let [one, andn] = suffixes.try_into().unwrap();
+        prefixes[Prefixes::Andn] * one + andn
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/equal.rs b/crates/jolt-instructions/src/tables/equal.rs
new file mode 100644
index 000000000..63faed65d
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/equal.rs
@@ -0,0 +1,46 @@
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::tables::prefixes::{PrefixEval, Prefixes};
+use crate::tables::suffixes::{SuffixEval, Suffixes};
+use crate::tables::PrefixSuffixDecomposition;
+use crate::traits::LookupTable;
+use crate::uninterleave_bits;
+
+#[derive(Copy, Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
+pub struct EqualTable<const XLEN: usize>;
+
+impl<const XLEN: usize> LookupTable<XLEN> for EqualTable<XLEN> {
+    fn materialize_entry(&self, index: u128) -> u64 {
+        let (x, y) = uninterleave_bits(index);
+        (x == y).into()
+    }
+
+    fn evaluate_mle<F, C>(&self, r: &[C]) -> F
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        debug_assert!(r.len().is_multiple_of(2));
+        let mut result = F::one();
+        for i in (0..r.len()).step_by(2) {
+            let x_i = r[i];
+            let y_i = r[i + 1];
+            result *= x_i * y_i + (F::one() - x_i) * (F::one() - y_i);
+        }
+        result
+    }
+}
+
+impl<const XLEN: usize> PrefixSuffixDecomposition<XLEN> for EqualTable<XLEN> {
+    fn suffixes(&self) -> Vec<Suffixes> {
+        vec![Suffixes::Eq]
+    }
+
+    fn combine<F: Field>(&self, prefixes: &[PrefixEval<F>], suffixes: &[SuffixEval<F>]) -> F {
+        debug_assert_eq!(self.suffixes().len(), suffixes.len());
+        let [eq] = suffixes.try_into().unwrap();
+        prefixes[Prefixes::Eq] * eq
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/halfword_alignment.rs b/crates/jolt-instructions/src/tables/halfword_alignment.rs
new file mode 100644
index 000000000..342a5a548
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/halfword_alignment.rs
@@ -0,0 +1,38 @@
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::tables::prefixes::{PrefixEval, Prefixes};
+use crate::tables::suffixes::{SuffixEval, Suffixes};
+use crate::tables::PrefixSuffixDecomposition;
+use crate::traits::LookupTable;
+
+#[derive(Copy, Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
+pub struct HalfwordAlignmentTable<const XLEN: usize>;
+
+impl<const XLEN: usize> LookupTable<XLEN> for HalfwordAlignmentTable<XLEN> {
+    fn materialize_entry(&self, index: u128) -> u64 {
+        (index.is_multiple_of(2)).into()
+    }
+
+    fn evaluate_mle<F, C>(&self, r: &[C]) -> F
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        let lsb = r[r.len() - 1];
+        F::one() - lsb
+    }
+}
+
+impl<const XLEN: usize> PrefixSuffixDecomposition<XLEN> for HalfwordAlignmentTable<XLEN> {
+    fn suffixes(&self) -> Vec<Suffixes> {
+        vec![Suffixes::One, Suffixes::Lsb]
+    }
+
+    fn combine<F: Field>(&self, prefixes: &[PrefixEval<F>], suffixes: &[SuffixEval<F>]) -> F {
+        debug_assert_eq!(self.suffixes().len(), suffixes.len());
+        let [one, lsb] = suffixes.try_into().unwrap();
+        one - prefixes[Prefixes::Lsb] * lsb
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/lookup_table_tests.rs b/crates/jolt-instructions/src/tables/lookup_table_tests.rs
new file mode 100644
index 000000000..057f6707a
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/lookup_table_tests.rs
@@ -0,0 +1,218 @@
+#![allow(unused_results)]
+
+//! Comprehensive lookup table correctness tests.
+//!
+//! Three test tiers per table:
+//! 1. `mle_full_hypercube` — exhaustive 2^16 check at XLEN=8
+//! 2. `mle_random` — 1000 random points at XLEN=64
+//! 3. `prefix_suffix` — sparse-dense decomposition vs MLE across all sumcheck rounds
+
+use jolt_field::Fr;
+
+use super::test_utils::{mle_full_hypercube_test, mle_random_test, prefix_suffix_test};
+
+use super::and::AndTable;
+use super::andn::AndnTable;
+use super::equal::EqualTable;
+use super::halfword_alignment::HalfwordAlignmentTable;
+use super::lower_half_word::LowerHalfWordTable;
+use super::movsign::MovsignTable;
+use super::mulu_no_overflow::MulUNoOverflowTable;
+use super::not_equal::NotEqualTable;
+use super::or::OrTable;
+use super::pow2::Pow2Table;
+use super::pow2_w::Pow2WTable;
+use super::range_check::RangeCheckTable;
+use super::range_check_aligned::RangeCheckAlignedTable;
+use super::shift_right_bitmask::ShiftRightBitmaskTable;
+use super::sign_extend_half_word::SignExtendHalfWordTable;
+use super::signed_greater_than_equal::SignedGreaterThanEqualTable;
+use super::signed_less_than::SignedLessThanTable;
+use super::unsigned_greater_than_equal::UnsignedGreaterThanEqualTable;
+use super::unsigned_less_than::UnsignedLessThanTable;
+use super::unsigned_less_than_equal::UnsignedLessThanEqualTable;
+use super::upper_word::UpperWordTable;
+use super::valid_div0::ValidDiv0Table;
+use super::valid_signed_remainder::ValidSignedRemainderTable;
+use super::valid_unsigned_remainder::ValidUnsignedRemainderTable;
+use super::virtual_change_divisor::VirtualChangeDivisorTable;
+use super::virtual_change_divisor_w::VirtualChangeDivisorWTable;
+use super::virtual_rev8w::VirtualRev8WTable;
+use super::virtual_rotr::VirtualRotrTable;
+use super::virtual_rotrw::VirtualRotrWTable;
+use super::virtual_sra::VirtualSRATable;
+use super::virtual_srl::VirtualSRLTable;
+use super::virtual_xor_rot::VirtualXORROTTable;
+use super::virtual_xor_rotw::VirtualXORROTWTable;
+use super::word_alignment::WordAlignmentTable;
+use super::xor::XorTable;
+
+macro_rules! table_tests {
+    ($mod:ident, $table8:ty, $table64:ty) => {
+        mod $mod {
+            use super::*;
+
+            #[test]
+            fn mle_full_hypercube() {
+                mle_full_hypercube_test::<Fr, $table8>();
+            }
+
+            #[test]
+            fn mle_random() {
+                mle_random_test::<64, Fr, $table64>();
+            }
+
+            #[test]
+            fn prefix_suffix() {
+                prefix_suffix_test::<64, Fr, $table64>();
+            }
+        }
+    };
+}
+
+// Arithmetic / range-check
+table_tests!(range_check, RangeCheckTable<8>, RangeCheckTable<64>);
+table_tests!(
+    range_check_aligned,
+    RangeCheckAlignedTable<8>,
+    RangeCheckAlignedTable<64>
+);
+
+// Bitwise
+table_tests!(and, AndTable<8>, AndTable<64>);
+table_tests!(andn, AndnTable<8>, AndnTable<64>);
+table_tests!(or, OrTable<8>, OrTable<64>);
+table_tests!(xor, XorTable<8>, XorTable<64>);
+
+// Comparison
+table_tests!(equal, EqualTable<8>, EqualTable<64>);
+table_tests!(not_equal, NotEqualTable<8>, NotEqualTable<64>);
+table_tests!(
+    signed_less_than,
+    SignedLessThanTable<8>,
+    SignedLessThanTable<64>
+);
+table_tests!(
+    unsigned_less_than,
+    UnsignedLessThanTable<8>,
+    UnsignedLessThanTable<64>
+);
+table_tests!(
+    signed_greater_than_equal,
+    SignedGreaterThanEqualTable<8>,
+    SignedGreaterThanEqualTable<64>
+);
+table_tests!(
+    unsigned_greater_than_equal,
+    UnsignedGreaterThanEqualTable<8>,
+    UnsignedGreaterThanEqualTable<64>
+);
+table_tests!(
+    unsigned_less_than_equal,
+    UnsignedLessThanEqualTable<8>,
+    UnsignedLessThanEqualTable<64>
+);
+
+// Word extraction
+table_tests!(upper_word, UpperWordTable<8>, UpperWordTable<64>);
+table_tests!(
+    lower_half_word,
+    LowerHalfWordTable<8>,
+    LowerHalfWordTable<64>
+);
+table_tests!(
+    sign_extend_half_word,
+    SignExtendHalfWordTable<8>,
+    SignExtendHalfWordTable<64>
+);
+
+// Sign/conditional
+table_tests!(movsign, MovsignTable<8>, MovsignTable<64>);
+
+// Power of 2
+table_tests!(pow2, Pow2Table<8>, Pow2Table<64>);
+table_tests!(pow2_w, Pow2WTable<8>, Pow2WTable<64>);
+
+// Shift
+table_tests!(
+    shift_right_bitmask,
+    ShiftRightBitmaskTable<8>,
+    ShiftRightBitmaskTable<64>
+);
+table_tests!(virtual_srl, VirtualSRLTable<8>, VirtualSRLTable<64>);
+table_tests!(virtual_sra, VirtualSRATable<8>, VirtualSRATable<64>);
+table_tests!(virtual_rotr, VirtualRotrTable<8>, VirtualRotrTable<64>);
+table_tests!(virtual_rotrw, VirtualRotrWTable<8>, VirtualRotrWTable<64>);
+
+// Division validation
+table_tests!(valid_div0, ValidDiv0Table<8>, ValidDiv0Table<64>);
+table_tests!(
+    valid_unsigned_remainder,
+    ValidUnsignedRemainderTable<8>,
+    ValidUnsignedRemainderTable<64>
+);
+table_tests!(
+    valid_signed_remainder,
+    ValidSignedRemainderTable<8>,
+    ValidSignedRemainderTable<64>
+);
+table_tests!(
+    virtual_change_divisor,
+    VirtualChangeDivisorTable<8>,
+    VirtualChangeDivisorTable<64>
+);
+table_tests!(
+    virtual_change_divisor_w,
+    VirtualChangeDivisorWTable<8>,
+    VirtualChangeDivisorWTable<64>
+);
+
+// Alignment
+table_tests!(
+    halfword_alignment,
+    HalfwordAlignmentTable<8>,
+    HalfwordAlignmentTable<64>
+);
+table_tests!(
+    word_alignment,
+    WordAlignmentTable<8>,
+    WordAlignmentTable<64>
+);
+
+// Multiply overflow
+table_tests!(
+    mulu_no_overflow,
+    MulUNoOverflowTable<8>,
+    MulUNoOverflowTable<64>
+);
+
+// Byte manipulation
+table_tests!(virtual_rev8w, VirtualRev8WTable<8>, VirtualRev8WTable<64>);
+
+// XOR-rotate (SHA) — 64-bit only (no XLEN=8 hypercube test)
+macro_rules! xor_rot_tests {
+    ($mod:ident, $table64:ty) => {
+        mod $mod {
+            use super::*;
+
+            #[test]
+            fn mle_random() {
+                mle_random_test::<64, Fr, $table64>();
+            }
+
+            #[test]
+            fn prefix_suffix() {
+                prefix_suffix_test::<64, Fr, $table64>();
+            }
+        }
+    };
+}
+
+xor_rot_tests!(virtual_xor_rot_32, VirtualXORROTTable<64, 32>);
+xor_rot_tests!(virtual_xor_rot_24, VirtualXORROTTable<64, 24>);
+xor_rot_tests!(virtual_xor_rot_16, VirtualXORROTTable<64, 16>);
+xor_rot_tests!(virtual_xor_rot_63, VirtualXORROTTable<64, 63>);
+xor_rot_tests!(virtual_xor_rotw_16, VirtualXORROTWTable<64, 16>);
+xor_rot_tests!(virtual_xor_rotw_12, VirtualXORROTWTable<64, 12>);
+xor_rot_tests!(virtual_xor_rotw_8, VirtualXORROTWTable<64, 8>);
+xor_rot_tests!(virtual_xor_rotw_7, VirtualXORROTWTable<64, 7>);
diff --git a/crates/jolt-instructions/src/tables/lower_half_word.rs b/crates/jolt-instructions/src/tables/lower_half_word.rs
new file mode 100644
index 000000000..9a525957f
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/lower_half_word.rs
@@ -0,0 +1,45 @@
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::tables::prefixes::{PrefixEval, Prefixes};
+use crate::tables::suffixes::{SuffixEval, Suffixes};
+use crate::tables::PrefixSuffixDecomposition;
+use crate::traits::LookupTable;
+
+/// Extracts the lower half of a word.
+/// For XLEN=64 this extracts the lower 32 bits; for XLEN=32, the lower 16 bits.
+#[derive(Copy, Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
+pub struct LowerHalfWordTable<const XLEN: usize>;
+
+impl<const XLEN: usize> LookupTable<XLEN> for LowerHalfWordTable<XLEN> {
+    fn materialize_entry(&self, index: u128) -> u64 {
+        let half_word_size = XLEN / 2;
+        (index % (1u128 << half_word_size)) as u64
+    }
+
+    fn evaluate_mle<F, C>(&self, r: &[C]) -> F
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        debug_assert_eq!(r.len(), 2 * XLEN);
+        let half_word_size = XLEN / 2;
+        let mut result = F::zero();
+        for i in 0..half_word_size {
+            result += F::from_u64(1 << (half_word_size - 1 - i)) * r[XLEN + half_word_size + i];
+        }
+        result
+    }
+}
+
+impl<const XLEN: usize> PrefixSuffixDecomposition<XLEN> for LowerHalfWordTable<XLEN> {
+    fn suffixes(&self) -> Vec<Suffixes> {
+        vec![Suffixes::One, Suffixes::LowerHalfWord]
+    }
+
+    fn combine<F: Field>(&self, prefixes: &[PrefixEval<F>], suffixes: &[SuffixEval<F>]) -> F {
+        let [one, lower_half_word] = suffixes.try_into().unwrap();
+        prefixes[Prefixes::LowerHalfWord] * one + lower_half_word
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/mod.rs b/crates/jolt-instructions/src/tables/mod.rs
new file mode 100644
index 000000000..1d5715b2c
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/mod.rs
@@ -0,0 +1,530 @@
+//! Lookup table definitions for Jolt instruction decomposition.
+//!
+//! Each instruction that participates in the sumcheck-based lookup argument
+//! maps to exactly one [`LookupTableKind`]. Concrete table implementations
+//! provide [`materialize_entry`](crate::LookupTable::materialize_entry) for
+//! preprocessing and [`evaluate_mle`](crate::LookupTable::evaluate_mle) for
+//! the sumcheck verifier.
+//!
+//! The prefix/suffix sparse-dense decomposition enables sub-linear MLE
+//! evaluation during the sumcheck prover's inner loop.
+
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::traits::LookupTable;
+
+pub mod and;
+pub mod andn;
+pub mod equal;
+pub mod halfword_alignment;
+pub mod lower_half_word;
+pub mod movsign;
+pub mod mulu_no_overflow;
+pub mod not_equal;
+pub mod or;
+pub mod pow2;
+pub mod pow2_w;
+pub mod prefixes;
+pub mod range_check;
+pub mod range_check_aligned;
+pub mod shift_right_bitmask;
+pub mod sign_extend_half_word;
+pub mod signed_greater_than_equal;
+pub mod signed_less_than;
+pub mod suffixes;
+pub mod unsigned_greater_than_equal;
+pub mod unsigned_less_than;
+pub mod unsigned_less_than_equal;
+pub mod upper_word;
+pub mod valid_div0;
+pub mod valid_signed_remainder;
+pub mod valid_unsigned_remainder;
+pub mod virtual_change_divisor;
+pub mod virtual_change_divisor_w;
+pub mod virtual_rev8w;
+pub mod virtual_rotr;
+pub mod virtual_rotrw;
+pub mod virtual_sra;
+pub mod virtual_srl;
+pub mod virtual_xor_rot;
+pub mod virtual_xor_rotw;
+pub mod word_alignment;
+pub mod xor;
+
+pub use prefixes::{PrefixEval, Prefixes};
+pub use suffixes::{SuffixEval, Suffixes};
+
+/// Identifies a lookup table type.
+///
+/// Each variant corresponds to a concrete table with its own
+/// [`LookupTable`] implementation. Instructions
+/// declare which table they use via [`Instruction::lookup_table()`](crate::Instruction::lookup_table).
+///
+/// The enum is `#[repr(u8)]` for compact serialization and efficient
+/// discriminant extraction.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
+#[repr(u8)]
+pub enum LookupTableKind {
+    // Arithmetic
+    /// Identity/range-check: extracts the lower XLEN bits.
+    /// Used by ADD, SUB, MUL, ADDI, JAL, and other combined-operand instructions.
+    RangeCheck,
+    /// Range check with LSB alignment (clears bit 0). Used by JALR.
+    RangeCheckAligned,
+
+    // Bitwise
+    /// Bitwise AND. Used by AND, ANDI.
+    And,
+    /// Bitwise AND-NOT (x & !y). Used by ANDN (Zbb extension).
+    Andn,
+    /// Bitwise OR. Used by OR, ORI.
+    Or,
+    /// Bitwise XOR. Used by XOR, XORI.
+    Xor,
+
+    // Comparison
+    /// Equality check: returns 1 if x == y. Used by BEQ.
+    Equal,
+    /// Not-equal: returns 1 if x != y. Used by BNE.
+    NotEqual,
+    /// Signed less-than. Used by BLT, SLT, SLTI.
+    SignedLessThan,
+    /// Unsigned less-than. Used by BLTU, SLTU, SLTIU.
+    UnsignedLessThan,
+    /// Signed greater-than-or-equal. Used by BGE.
+    SignedGreaterThanEqual,
+    /// Unsigned greater-than-or-equal. Used by BGEU.
+    UnsignedGreaterThanEqual,
+    /// Unsigned less-than-or-equal.
+    UnsignedLessThanEqual,
+
+    // Word extraction
+    /// Extract upper XLEN bits of a 2*XLEN-bit value. Used by MULHU.
+    UpperWord,
+    /// Extract lower half-word (XLEN/2 bits).
+    LowerHalfWord,
+    /// Sign-extend half-word to full word.
+    SignExtendHalfWord,
+
+    // Sign/conditional
+    /// Sign-bit conditional: returns all-ones if MSB set, else zero. Used by MOVSIGN.
+    Movsign,
+
+    // Power of 2
+    /// 2^(index mod XLEN). Used by POW2, POW2I.
+    Pow2,
+    /// 2^(index mod 32). Used by POW2W, POW2IW.
+    Pow2W,
+
+    // Shift
+    /// Bitmask for right-shift: `((1 << (XLEN - shift)) - 1) << shift`.
+    ShiftRightBitmask,
+    /// Logical right shift (virtual decomposition). Used by SRL, SRLI.
+    VirtualSRL,
+    /// Arithmetic right shift (virtual decomposition). Used by SRA, SRAI.
+    VirtualSRA,
+    /// Rotate right. Used by ROTRI.
+    VirtualROTR,
+    /// Rotate right word (32-bit). Used by ROTRIW.
+    VirtualROTRW,
+
+    // Division validation
+    /// Division-by-zero validity check. Used by ASSERT_VALID_DIV0.
+    ValidDiv0,
+    /// Unsigned remainder validity (remainder < divisor or divisor == 0).
+    ValidUnsignedRemainder,
+    /// Signed remainder validity.
+    ValidSignedRemainder,
+    /// Divisor transform for signed div overflow. Used by CHANGE_DIVISOR.
+    VirtualChangeDivisor,
+    /// Divisor transform (32-bit). Used by CHANGE_DIVISOR_W.
+    VirtualChangeDivisorW,
+
+    // Alignment
+    /// Halfword alignment check (divisible by 2).
+    HalfwordAlignment,
+    /// Word alignment check (divisible by 4).
+    WordAlignment,
+
+    // Multiply overflow
+    /// Unsigned multiply no-overflow check. Used by ASSERT_MULU_NO_OVERFLOW.
+    MulUNoOverflow,
+
+    // Byte manipulation
+    /// Byte-reverse within word. Used by REV8W.
+    VirtualRev8W,
+
+    // XOR-rotate (SHA)
+    /// XOR then rotate right by 32 bits.
+    VirtualXORROT32,
+    /// XOR then rotate right by 24 bits.
+    VirtualXORROT24,
+    /// XOR then rotate right by 16 bits.
+    VirtualXORROT16,
+    /// XOR then rotate right by 63 bits.
+    VirtualXORROT63,
+    /// XOR then rotate right word by 16 bits.
+    VirtualXORROTW16,
+    /// XOR then rotate right word by 12 bits.
+    VirtualXORROTW12,
+    /// XOR then rotate right word by 8 bits.
+    VirtualXORROTW8,
+    /// XOR then rotate right word by 7 bits.
+    VirtualXORROTW7,
+}
+
+impl LookupTableKind {
+    /// Total number of distinct lookup table types.
+    pub const COUNT: usize = 41;
+
+    /// Returns the discriminant as a `usize`, suitable for array indexing.
+    #[inline]
+    pub fn index(self) -> usize {
+        self as usize
+    }
+}
+
+const _: () = assert!(LookupTableKind::VirtualXORROTW7 as usize + 1 == LookupTableKind::COUNT);
+
+/// Prefix/suffix decomposition for sub-linear MLE evaluation.
+///
+/// Each lookup table decomposes its MLE as:
+/// ```text
+/// table_mle(r) = Σ_i prefix_i(r_high) · suffix_i(r_low)
+/// ```
+///
+/// where the sum is over a small number of prefix-suffix pairs.
+/// This enables the sumcheck prover to avoid materializing the entire table.
+pub trait PrefixSuffixDecomposition<const XLEN: usize>: crate::LookupTable<XLEN> + Default {
+    /// The suffix types used in this table's decomposition.
+    fn suffixes(&self) -> Vec<Suffixes>;
+
+    /// Recombine evaluated prefix and suffix values into the table's MLE evaluation.
+    fn combine<F: Field>(&self, prefixes: &[PrefixEval<F>], suffixes: &[SuffixEval<F>]) -> F;
+
+    /// Generate a random lookup index for testing.
+    ///
+    /// The default returns a uniform random u128. Tables with constrained input
+    /// domains (e.g., shift/rotate tables that expect bitmask-shaped right operands)
+    /// should override this to produce valid test inputs.
+    #[cfg(test)]
+    fn random_lookup_index(rng: &mut rand::rngs::StdRng) -> u128 {
+        rand::Rng::gen(rng)
+    }
+}
+
+#[cfg(test)]
+pub(crate) mod test_utils;
+
+#[cfg(test)]
+mod lookup_table_tests;
+
+/// Runtime dispatch wrapper over all concrete lookup tables.
+///
+/// Each variant corresponds 1:1 to a [`LookupTableKind`] and delegates to the
+/// concrete ZST table type. The `XLEN` const generic selects the word size
+/// (8 for tests, 64 for production).
+///
+/// Construct from a [`LookupTableKind`] via [`From`]:
+/// ```ignore
+/// let table = LookupTables::<64>::from(LookupTableKind::And);
+/// assert_eq!(table.materialize_entry(0b11), 1);
+/// ```
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub enum LookupTables<const XLEN: usize> {
+    RangeCheck,
+    RangeCheckAligned,
+    And,
+    Andn,
+    Or,
+    Xor,
+    Equal,
+    NotEqual,
+    SignedLessThan,
+    UnsignedLessThan,
+    SignedGreaterThanEqual,
+    UnsignedGreaterThanEqual,
+    UnsignedLessThanEqual,
+    UpperWord,
+    LowerHalfWord,
+    SignExtendHalfWord,
+    Movsign,
+    Pow2,
+    Pow2W,
+    ShiftRightBitmask,
+    VirtualSRL,
+    VirtualSRA,
+    VirtualROTR,
+    VirtualROTRW,
+    ValidDiv0,
+    ValidUnsignedRemainder,
+    ValidSignedRemainder,
+    VirtualChangeDivisor,
+    VirtualChangeDivisorW,
+    HalfwordAlignment,
+    WordAlignment,
+    MulUNoOverflow,
+    VirtualRev8W,
+    VirtualXORROT32,
+    VirtualXORROT24,
+    VirtualXORROT16,
+    VirtualXORROT63,
+    VirtualXORROTW16,
+    VirtualXORROTW12,
+    VirtualXORROTW8,
+    VirtualXORROTW7,
+}
+
+/// Dispatches a method call to the concrete table type for each variant.
+macro_rules! dispatch_table {
+    ($self:expr, |$t:ident| $body:expr) => {
+        match $self {
+            Self::RangeCheck => {
+                let $t = range_check::RangeCheckTable::<XLEN>;
+                $body
+            }
+            Self::RangeCheckAligned => {
+                let $t = range_check_aligned::RangeCheckAlignedTable::<XLEN>;
+                $body
+            }
+            Self::And => {
+                let $t = and::AndTable::<XLEN>;
+                $body
+            }
+            Self::Andn => {
+                let $t = andn::AndnTable::<XLEN>;
+                $body
+            }
+            Self::Or => {
+                let $t = or::OrTable::<XLEN>;
+                $body
+            }
+            Self::Xor => {
+                let $t = xor::XorTable::<XLEN>;
+                $body
+            }
+            Self::Equal => {
+                let $t = equal::EqualTable::<XLEN>;
+                $body
+            }
+            Self::NotEqual => {
+                let $t = not_equal::NotEqualTable::<XLEN>;
+                $body
+            }
+            Self::SignedLessThan => {
+                let $t = signed_less_than::SignedLessThanTable::<XLEN>;
+                $body
+            }
+            Self::UnsignedLessThan => {
+                let $t = unsigned_less_than::UnsignedLessThanTable::<XLEN>;
+                $body
+            }
+            Self::SignedGreaterThanEqual => {
+                let $t = signed_greater_than_equal::SignedGreaterThanEqualTable::<XLEN>;
+                $body
+            }
+            Self::UnsignedGreaterThanEqual => {
+                let $t = unsigned_greater_than_equal::UnsignedGreaterThanEqualTable::<XLEN>;
+                $body
+            }
+            Self::UnsignedLessThanEqual => {
+                let $t = unsigned_less_than_equal::UnsignedLessThanEqualTable::<XLEN>;
+                $body
+            }
+            Self::UpperWord => {
+                let $t = upper_word::UpperWordTable::<XLEN>;
+                $body
+            }
+            Self::LowerHalfWord => {
+                let $t = lower_half_word::LowerHalfWordTable::<XLEN>;
+                $body
+            }
+            Self::SignExtendHalfWord => {
+                let $t = sign_extend_half_word::SignExtendHalfWordTable::<XLEN>;
+                $body
+            }
+            Self::Movsign => {
+                let $t = movsign::MovsignTable::<XLEN>;
+                $body
+            }
+            Self::Pow2 => {
+                let $t = pow2::Pow2Table::<XLEN>;
+                $body
+            }
+            Self::Pow2W => {
+                let $t = pow2_w::Pow2WTable::<XLEN>;
+                $body
+            }
+            Self::ShiftRightBitmask => {
+                let $t = shift_right_bitmask::ShiftRightBitmaskTable::<XLEN>;
+                $body
+            }
+            Self::VirtualSRL => {
+                let $t = virtual_srl::VirtualSRLTable::<XLEN>;
+                $body
+            }
+            Self::VirtualSRA => {
+                let $t = virtual_sra::VirtualSRATable::<XLEN>;
+                $body
+            }
+            Self::VirtualROTR => {
+                let $t = virtual_rotr::VirtualRotrTable::<XLEN>;
+                $body
+            }
+            Self::VirtualROTRW => {
+                let $t = virtual_rotrw::VirtualRotrWTable::<XLEN>;
+                $body
+            }
+            Self::ValidDiv0 => {
+                let $t = valid_div0::ValidDiv0Table::<XLEN>;
+                $body
+            }
+            Self::ValidUnsignedRemainder => {
+                let $t = valid_unsigned_remainder::ValidUnsignedRemainderTable::<XLEN>;
+                $body
+            }
+            Self::ValidSignedRemainder => {
+                let $t = valid_signed_remainder::ValidSignedRemainderTable::<XLEN>;
+                $body
+            }
+            Self::VirtualChangeDivisor => {
+                let $t = virtual_change_divisor::VirtualChangeDivisorTable::<XLEN>;
+                $body
+            }
+            Self::VirtualChangeDivisorW => {
+                let $t = virtual_change_divisor_w::VirtualChangeDivisorWTable::<XLEN>;
+                $body
+            }
+            Self::HalfwordAlignment => {
+                let $t = halfword_alignment::HalfwordAlignmentTable::<XLEN>;
+                $body
+            }
+            Self::WordAlignment => {
+                let $t = word_alignment::WordAlignmentTable::<XLEN>;
+                $body
+            }
+            Self::MulUNoOverflow => {
+                let $t = mulu_no_overflow::MulUNoOverflowTable::<XLEN>;
+                $body
+            }
+            Self::VirtualRev8W => {
+                let $t = virtual_rev8w::VirtualRev8WTable::<XLEN>;
+                $body
+            }
+            Self::VirtualXORROT32 => {
+                let $t = virtual_xor_rot::VirtualXORROTTable::<XLEN, 32>;
+                $body
+            }
+            Self::VirtualXORROT24 => {
+                let $t = virtual_xor_rot::VirtualXORROTTable::<XLEN, 24>;
+                $body
+            }
+            Self::VirtualXORROT16 => {
+                let $t = virtual_xor_rot::VirtualXORROTTable::<XLEN, 16>;
+                $body
+            }
+            Self::VirtualXORROT63 => {
+                let $t = virtual_xor_rot::VirtualXORROTTable::<XLEN, 63>;
+                $body
+            }
+            Self::VirtualXORROTW16 => {
+                let $t = virtual_xor_rotw::VirtualXORROTWTable::<XLEN, 16>;
+                $body
+            }
+            Self::VirtualXORROTW12 => {
+                let $t = virtual_xor_rotw::VirtualXORROTWTable::<XLEN, 12>;
+                $body
+            }
+            Self::VirtualXORROTW8 => {
+                let $t = virtual_xor_rotw::VirtualXORROTWTable::<XLEN, 8>;
+                $body
+            }
+            Self::VirtualXORROTW7 => {
+                let $t = virtual_xor_rotw::VirtualXORROTWTable::<XLEN, 7>;
+                $body
+            }
+        }
+    };
+}
+
+/// Generates identity mappings between `LookupTableKind` and `LookupTables` variants.
+macro_rules! kind_table_identity {
+    ($($variant:ident),* $(,)?) => {
+        impl<const XLEN: usize> LookupTables<XLEN> {
+            /// Returns the corresponding [`LookupTableKind`] identifier.
+            #[inline]
+            pub fn kind(self) -> LookupTableKind {
+                match self {
+                    $(Self::$variant => LookupTableKind::$variant,)*
+                }
+            }
+        }
+
+        impl<const XLEN: usize> From<LookupTableKind> for LookupTables<XLEN> {
+            #[inline]
+            fn from(kind: LookupTableKind) -> Self {
+                match kind {
+                    $(LookupTableKind::$variant => Self::$variant,)*
+                }
+            }
+        }
+
+        impl<const XLEN: usize> From<LookupTables<XLEN>> for LookupTableKind {
+            #[inline]
+            fn from(table: LookupTables<XLEN>) -> Self {
+                table.kind()
+            }
+        }
+    };
+}
+
+kind_table_identity! {
+    RangeCheck, RangeCheckAligned,
+    And, Andn, Or, Xor,
+    Equal, NotEqual,
+    SignedLessThan, UnsignedLessThan, SignedGreaterThanEqual,
+    UnsignedGreaterThanEqual, UnsignedLessThanEqual,
+    UpperWord, LowerHalfWord, SignExtendHalfWord,
+    Movsign,
+    Pow2, Pow2W,
+    ShiftRightBitmask, VirtualSRL, VirtualSRA, VirtualROTR, VirtualROTRW,
+    ValidDiv0, ValidUnsignedRemainder, ValidSignedRemainder,
+    VirtualChangeDivisor, VirtualChangeDivisorW,
+    HalfwordAlignment, WordAlignment,
+    MulUNoOverflow,
+    VirtualRev8W,
+    VirtualXORROT32, VirtualXORROT24, VirtualXORROT16, VirtualXORROT63,
+    VirtualXORROTW16, VirtualXORROTW12, VirtualXORROTW8, VirtualXORROTW7,
+}
+
+impl<const XLEN: usize> LookupTables<XLEN> {
+    /// The suffix types used in this table's prefix/suffix decomposition.
+    pub fn suffixes(&self) -> Vec<Suffixes> {
+        dispatch_table!(self, |t| PrefixSuffixDecomposition::<XLEN>::suffixes(&t))
+    }
+
+    /// Recombine evaluated prefix and suffix values into the table's MLE evaluation.
+    pub fn combine<F: Field>(&self, prefixes: &[PrefixEval<F>], suffixes: &[SuffixEval<F>]) -> F {
+        dispatch_table!(self, |t| PrefixSuffixDecomposition::<XLEN>::combine(
+            &t, prefixes, suffixes
+        ))
+    }
+}
+
+impl<const XLEN: usize> LookupTable<XLEN> for LookupTables<XLEN> {
+    #[inline]
+    fn materialize_entry(&self, index: u128) -> u64 {
+        dispatch_table!(self, |t| t.materialize_entry(index))
+    }
+
+    #[inline]
+    fn evaluate_mle<F, C>(&self, r: &[C]) -> F
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        dispatch_table!(self, |t| t.evaluate_mle(r))
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/movsign.rs b/crates/jolt-instructions/src/tables/movsign.rs
new file mode 100644
index 000000000..20d96df1b
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/movsign.rs
@@ -0,0 +1,47 @@
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::tables::prefixes::{PrefixEval, Prefixes};
+use crate::tables::suffixes::{SuffixEval, Suffixes};
+use crate::tables::PrefixSuffixDecomposition;
+use crate::traits::LookupTable;
+
+/// Returns all-ones if the MSB of the first operand is set, else zero.
+#[derive(Copy, Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
+pub struct MovsignTable<const XLEN: usize>;
+
+impl<const XLEN: usize> LookupTable<XLEN> for MovsignTable<XLEN> {
+    fn materialize_entry(&self, index: u128) -> u64 {
+        let sign_bit_pos = 2 * XLEN - 1;
+        let sign_bit = 1u128 << sign_bit_pos;
+        if index & sign_bit != 0 {
+            ((1u128 << XLEN) - 1) as u64
+        } else {
+            0
+        }
+    }
+
+    fn evaluate_mle<F, C>(&self, r: &[C]) -> F
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        debug_assert_eq!(r.len(), 2 * XLEN);
+        let sign_bit = r[0];
+        let ones: u64 = ((1u128 << XLEN) - 1) as u64;
+        sign_bit * F::from_u64(ones)
+    }
+}
+
+impl<const XLEN: usize> PrefixSuffixDecomposition<XLEN> for MovsignTable<XLEN> {
+    fn suffixes(&self) -> Vec<Suffixes> {
+        vec![Suffixes::One]
+    }
+
+    fn combine<F: Field>(&self, prefixes: &[PrefixEval<F>], suffixes: &[SuffixEval<F>]) -> F {
+        let [one] = suffixes.try_into().unwrap();
+        let ones: u64 = ((1u128 << XLEN) - 1) as u64;
+        F::from_u64(ones) * prefixes[Prefixes::LeftOperandMsb] * one
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/mulu_no_overflow.rs b/crates/jolt-instructions/src/tables/mulu_no_overflow.rs
new file mode 100644
index 000000000..f4345a244
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/mulu_no_overflow.rs
@@ -0,0 +1,43 @@
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::tables::prefixes::{PrefixEval, Prefixes};
+use crate::tables::suffixes::{SuffixEval, Suffixes};
+use crate::tables::PrefixSuffixDecomposition;
+use crate::traits::LookupTable;
+
+#[derive(Copy, Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
+pub struct MulUNoOverflowTable<const XLEN: usize>;
+
+impl<const XLEN: usize> LookupTable<XLEN> for MulUNoOverflowTable<XLEN> {
+    fn materialize_entry(&self, index: u128) -> u64 {
+        let upper_bits = index >> XLEN;
+        (upper_bits == 0) as u64
+    }
+
+    fn evaluate_mle<F, C>(&self, r: &[C]) -> F
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        debug_assert_eq!(r.len(), 2 * XLEN);
+        let mut result = F::one();
+        for r_i in &r[..XLEN] {
+            result *= F::one() - *r_i;
+        }
+        result
+    }
+}
+
+impl<const XLEN: usize> PrefixSuffixDecomposition<XLEN> for MulUNoOverflowTable<XLEN> {
+    fn suffixes(&self) -> Vec<Suffixes> {
+        vec![Suffixes::OverflowBitsZero]
+    }
+
+    fn combine<F: Field>(&self, prefixes: &[PrefixEval<F>], suffixes: &[SuffixEval<F>]) -> F {
+        debug_assert_eq!(self.suffixes().len(), suffixes.len());
+        let [overflow_bits_zero] = suffixes.try_into().unwrap();
+        prefixes[Prefixes::OverflowBitsZero] * overflow_bits_zero
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/not_equal.rs b/crates/jolt-instructions/src/tables/not_equal.rs
new file mode 100644
index 000000000..358ab0351
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/not_equal.rs
@@ -0,0 +1,40 @@
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::tables::equal::EqualTable;
+use crate::tables::prefixes::{PrefixEval, Prefixes};
+use crate::tables::suffixes::{SuffixEval, Suffixes};
+use crate::tables::PrefixSuffixDecomposition;
+use crate::traits::LookupTable;
+use crate::uninterleave_bits;
+
+#[derive(Copy, Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
+pub struct NotEqualTable<const XLEN: usize>;
+
+impl<const XLEN: usize> LookupTable<XLEN> for NotEqualTable<XLEN> {
+    fn materialize_entry(&self, index: u128) -> u64 {
+        let (x, y) = uninterleave_bits(index);
+        (x != y).into()
+    }
+
+    fn evaluate_mle<F, C>(&self, r: &[C]) -> F
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        F::one() - EqualTable::<XLEN>.evaluate_mle::<F, C>(r)
+    }
+}
+
+impl<const XLEN: usize> PrefixSuffixDecomposition<XLEN> for NotEqualTable<XLEN> {
+    fn suffixes(&self) -> Vec<Suffixes> {
+        vec![Suffixes::One, Suffixes::Eq]
+    }
+
+    fn combine<F: Field>(&self, prefixes: &[PrefixEval<F>], suffixes: &[SuffixEval<F>]) -> F {
+        debug_assert_eq!(self.suffixes().len(), suffixes.len());
+        let [one, eq] = suffixes.try_into().unwrap();
+        one - prefixes[Prefixes::Eq] * eq
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/or.rs b/crates/jolt-instructions/src/tables/or.rs
new file mode 100644
index 000000000..4e1c70262
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/or.rs
@@ -0,0 +1,45 @@
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::tables::prefixes::{PrefixEval, Prefixes};
+use crate::tables::suffixes::{SuffixEval, Suffixes};
+use crate::tables::PrefixSuffixDecomposition;
+use crate::traits::LookupTable;
+use crate::uninterleave_bits;
+
+#[derive(Copy, Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
+pub struct OrTable<const XLEN: usize>;
+
+impl<const XLEN: usize> LookupTable<XLEN> for OrTable<XLEN> {
+    fn materialize_entry(&self, index: u128) -> u64 {
+        let (x, y) = uninterleave_bits(index);
+        x | y
+    }
+
+    fn evaluate_mle<F, C>(&self, r: &[C]) -> F
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        debug_assert_eq!(r.len(), 2 * XLEN);
+        let mut result = F::zero();
+        for i in 0..XLEN {
+            let x_i = r[2 * i];
+            let y_i = r[2 * i + 1];
+            result += F::from_u64(1u64 << (XLEN - 1 - i)) * (x_i + y_i - x_i * y_i);
+        }
+        result
+    }
+}
+
+impl<const XLEN: usize> PrefixSuffixDecomposition<XLEN> for OrTable<XLEN> {
+    fn suffixes(&self) -> Vec<Suffixes> {
+        vec![Suffixes::One, Suffixes::Or]
+    }
+
+    fn combine<F: Field>(&self, prefixes: &[PrefixEval<F>], suffixes: &[SuffixEval<F>]) -> F {
+        let [one, or] = suffixes.try_into().unwrap();
+        prefixes[Prefixes::Or] * one + or
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/pow2.rs b/crates/jolt-instructions/src/tables/pow2.rs
new file mode 100644
index 000000000..048710151
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/pow2.rs
@@ -0,0 +1,43 @@
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::tables::prefixes::{PrefixEval, Prefixes};
+use crate::tables::suffixes::{SuffixEval, Suffixes};
+use crate::tables::PrefixSuffixDecomposition;
+use crate::traits::LookupTable;
+
+/// Computes `2^(index % XLEN)`.
+#[derive(Copy, Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
+pub struct Pow2Table<const XLEN: usize>;
+
+impl<const XLEN: usize> LookupTable<XLEN> for Pow2Table<XLEN> {
+    fn materialize_entry(&self, index: u128) -> u64 {
+        1 << (index % XLEN as u128) as u64
+    }
+
+    fn evaluate_mle<F, C>(&self, r: &[C]) -> F
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        debug_assert_eq!(r.len(), 2 * XLEN);
+        let log_xlen = XLEN.trailing_zeros() as usize;
+        let mut result = F::one();
+        for i in 0..log_xlen {
+            result *= F::one() + (F::from_u64((1 << (1 << i)) - 1)) * r[r.len() - i - 1];
+        }
+        result
+    }
+}
+
+impl<const XLEN: usize> PrefixSuffixDecomposition<XLEN> for Pow2Table<XLEN> {
+    fn suffixes(&self) -> Vec<Suffixes> {
+        vec![Suffixes::Pow2]
+    }
+
+    fn combine<F: Field>(&self, prefixes: &[PrefixEval<F>], suffixes: &[SuffixEval<F>]) -> F {
+        let [pow2] = suffixes.try_into().unwrap();
+        prefixes[Prefixes::Pow2] * pow2
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/pow2_w.rs b/crates/jolt-instructions/src/tables/pow2_w.rs
new file mode 100644
index 000000000..865947b06
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/pow2_w.rs
@@ -0,0 +1,42 @@
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::tables::prefixes::{PrefixEval, Prefixes};
+use crate::tables::suffixes::{SuffixEval, Suffixes};
+use crate::tables::PrefixSuffixDecomposition;
+use crate::traits::LookupTable;
+
+#[derive(Copy, Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
+pub struct Pow2WTable<const XLEN: usize>;
+
+impl<const XLEN: usize> LookupTable<XLEN> for Pow2WTable<XLEN> {
+    fn materialize_entry(&self, index: u128) -> u64 {
+        1 << (index % 32) as u64
+    }
+
+    fn evaluate_mle<F, C>(&self, r: &[C]) -> F
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        debug_assert_eq!(r.len(), 2 * XLEN);
+        let mut result = F::one();
+        for i in 0..5 {
+            result *= F::one() + (F::from_u64((1 << (1 << i)) - 1)) * r[r.len() - i - 1];
+        }
+        result
+    }
+}
+
+impl<const XLEN: usize> PrefixSuffixDecomposition<XLEN> for Pow2WTable<XLEN> {
+    fn suffixes(&self) -> Vec<Suffixes> {
+        vec![Suffixes::Pow2W]
+    }
+
+    fn combine<F: Field>(&self, prefixes: &[PrefixEval<F>], suffixes: &[SuffixEval<F>]) -> F {
+        debug_assert_eq!(self.suffixes().len(), suffixes.len());
+        let [pow2w] = suffixes.try_into().unwrap();
+        prefixes[Prefixes::Pow2W] * pow2w
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/and.rs b/crates/jolt-instructions/src/tables/prefixes/and.rs
new file mode 100644
index 000000000..24d551d66
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/and.rs
@@ -0,0 +1,59 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+pub enum AndPrefix<const XLEN: usize> {}
+
+impl<const XLEN: usize, F: Field> SparseDensePrefix<F> for AndPrefix<XLEN> {
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: Option<C>,
+        c: u32,
+        mut b: LookupBits,
+        j: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let suffix_len = 2 * XLEN - j - b.len() - 1;
+        let mut result = checkpoints[Prefixes::And].unwrap_or(F::zero());
+
+        // AND high-order variables of x and y
+        if let Some(r_x) = r_x {
+            let y = F::from_u8(c as u8);
+            let shift = XLEN - 1 - j / 2;
+            result += F::from_u64(1 << shift) * r_x * y;
+        } else {
+            let y_msb = b.pop_msb() as u32;
+            let shift = XLEN - 1 - j / 2;
+            result += F::from_u32(c * y_msb) * F::from_u64(1 << shift);
+        }
+        // AND remaining x and y bits
+        let (x, y) = b.uninterleave();
+        result += F::from_u64((u64::from(x) & u64::from(y)) << (suffix_len / 2));
+
+        result
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: C,
+        r_y: C,
+        j: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let shift = XLEN - 1 - j / 2;
+        // checkpoint += 2^shift * r_x * r_y
+        let updated =
+            checkpoints[Prefixes::And].unwrap_or(F::zero()) + F::from_u64(1 << shift) * r_x * r_y;
+        Some(updated).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/andn.rs b/crates/jolt-instructions/src/tables/prefixes/andn.rs
new file mode 100644
index 000000000..6cf5946cd
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/andn.rs
@@ -0,0 +1,61 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+pub enum AndnPrefix<const XLEN: usize> {}
+
+impl<const XLEN: usize, F: Field> SparseDensePrefix<F> for AndnPrefix<XLEN> {
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: Option<C>,
+        c: u32,
+        mut b: LookupBits,
+        j: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let suffix_len = 2 * XLEN - j - b.len() - 1;
+        let mut result = checkpoints[Prefixes::Andn].unwrap_or(F::zero());
+
+        // ANDN high-order variables: x_i * (1 - y_i)
+        if let Some(r_x) = r_x {
+            let y = F::from_u8(c as u8);
+            let shift = XLEN - 1 - j / 2;
+            result += F::from_u64(1 << shift) * r_x * (F::one() - y);
+        } else {
+            let y_msb = b.pop_msb() as u32;
+            let shift = XLEN - 1 - j / 2;
+            // c * (1 - y_msb) = c when y_msb = 0, 0 when y_msb = 1
+            result += F::from_u32(c * (1 - y_msb)) * F::from_u64(1 << shift);
+        }
+
+        // ANDN remaining x and y bits
+        let (x, y) = b.uninterleave();
+        result += F::from_u64((u64::from(x) & !u64::from(y)) << (suffix_len / 2));
+
+        result
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: C,
+        r_y: C,
+        j: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let shift = XLEN - 1 - j / 2;
+        // checkpoint += 2^shift * r_x * (1 - r_y)
+        let updated = checkpoints[Prefixes::Andn].unwrap_or(F::zero())
+            + F::from_u64(1 << shift) * r_x * (F::one() - r_y);
+        Some(updated).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/change_divisor.rs b/crates/jolt-instructions/src/tables/prefixes/change_divisor.rs
new file mode 100644
index 000000000..7cf24fd4d
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/change_divisor.rs
@@ -0,0 +1,74 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+pub enum ChangeDivisorPrefix<const XLEN: usize> {}
+
+impl<const XLEN: usize, F: Field> SparseDensePrefix<F> for ChangeDivisorPrefix<XLEN> {
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: Option<C>,
+        c: u32,
+        mut b: LookupBits,
+        j: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let mut result = checkpoints[Prefixes::ChangeDivisor]
+            .unwrap_or(F::from_u64(2) - F::from_u128(1u128 << XLEN));
+        if j == 0 {
+            let x_msb = b.pop_msb() as u32;
+            if x_msb == 0 {
+                return F::zero();
+            }
+            let (x, y) = b.uninterleave();
+            if u64::from(x) != 0 || u64::from(y) != (1u64 << y.len()) - 1 {
+                return F::zero();
+            }
+            result = result.mul_u64(c as u64);
+        } else if let Some(r_x) = r_x {
+            let (x, y) = b.uninterleave();
+            if u64::from(x) != 0 || u64::from(y) != (1u64 << y.len()) - 1 || c == 0 {
+                return F::zero();
+            }
+            if j == 1 {
+                result *= (r_x) * F::from_u64(c as u64);
+            } else {
+                result *= (F::one() - r_x) * F::from_u64(c as u64);
+            }
+        } else {
+            let (x, y) = b.uninterleave();
+            if !b.is_empty() && u64::from(x) != 0 || u64::from(y) != (1u64 << y.len()) - 1 {
+                return F::zero();
+            }
+            result *= F::one() - F::from_u64(c as u64);
+        }
+        result
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: C,
+        r_y: C,
+        j: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let updated = checkpoints[Prefixes::ChangeDivisor]
+            .unwrap_or(F::from_u64(2) - F::from_u128(1u128 << XLEN))
+            * if j == 1 {
+                r_x * r_y
+            } else {
+                (F::one() - r_x) * r_y
+            };
+        Some(updated).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/change_divisor_w.rs b/crates/jolt-instructions/src/tables/prefixes/change_divisor_w.rs
new file mode 100644
index 000000000..71a62eab6
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/change_divisor_w.rs
@@ -0,0 +1,87 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+pub enum ChangeDivisorWPrefix<const XLEN: usize> {}
+
+impl<const XLEN: usize, F: Field> SparseDensePrefix<F> for ChangeDivisorWPrefix<XLEN> {
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: Option<C>,
+        c: u32,
+        mut b: LookupBits,
+        j: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        if j < XLEN {
+            return F::zero();
+        }
+
+        let mut result = if j == XLEN || j == XLEN + 1 {
+            F::from_u64(2) - F::from_u128(1u128 << XLEN)
+        } else {
+            checkpoints[Prefixes::ChangeDivisorW].unwrap()
+        };
+
+        if j == XLEN {
+            let x_msb = b.pop_msb() as u32;
+            if x_msb == 0 {
+                return F::zero();
+            }
+            let (x, y) = b.uninterleave();
+            if u64::from(x) != 0 || u64::from(y) != (1u64 << y.len()) - 1 {
+                return F::zero();
+            }
+            result = result.mul_u64(c as u64);
+        } else if let Some(r_x) = r_x {
+            if j > XLEN {
+                let (x, y) = b.uninterleave();
+                if u64::from(x) != 0 || u64::from(y) != (1u64 << y.len()) - 1 || c == 0 {
+                    return F::zero();
+                }
+
+                if j == XLEN + 1 {
+                    result *= (r_x) * F::from_u64(c as u64);
+                } else {
+                    result *= (F::one() - r_x) * F::from_u64(c as u64);
+                }
+            }
+        } else if j > XLEN {
+            let (x, y) = b.uninterleave();
+            if !b.is_empty() && u64::from(x) != 0 || u64::from(y) != (1u64 << y.len()) - 1 {
+                return F::zero();
+            }
+            result *= F::one() - F::from_u64(c as u64);
+        }
+        result
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: C,
+        r_y: C,
+        j: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        if j < XLEN {
+            return Some(F::zero()).into();
+        }
+
+        let updated = if j == XLEN + 1 {
+            (F::from_u64(2) - F::from_u128(1u128 << XLEN)) * r_x * r_y
+        } else {
+            checkpoints[Prefixes::ChangeDivisorW].unwrap() * ((F::one() - r_x) * r_y)
+        };
+        Some(updated).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/div_by_zero.rs b/crates/jolt-instructions/src/tables/prefixes/div_by_zero.rs
new file mode 100644
index 000000000..084923711
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/div_by_zero.rs
@@ -0,0 +1,57 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+pub enum DivByZeroPrefix {}
+
+impl<F: Field> SparseDensePrefix<F> for DivByZeroPrefix {
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: Option<C>,
+        c: u32,
+        mut b: LookupBits,
+        _: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let (divisor, quotient) = b.uninterleave();
+        // If low-order bits of divisor are not 0s or low-order bits of quotient are not
+        // 1s, short-circuit and return 0.
+        if u64::from(divisor) != 0 || u64::from(quotient) != (1 << quotient.len()) - 1 {
+            return F::zero();
+        }
+
+        let mut result = checkpoints[Prefixes::DivByZero].unwrap_or(F::one());
+
+        if let Some(r_x) = r_x {
+            let y = F::from_u32(c);
+            result *= (F::one() - r_x) * y;
+        } else {
+            let x = F::from_u8(c as u8);
+            let y = F::from_u8(b.pop_msb());
+            result *= (F::one() - x) * y;
+        }
+        result
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: C,
+        r_y: C,
+        _: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        // checkpoint *= (1 - r_x) * r_y
+        let updated = checkpoints[Prefixes::DivByZero].unwrap_or(F::one()) * (F::one() - r_x) * r_y;
+        Some(updated).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/eq.rs b/crates/jolt-instructions/src/tables/prefixes/eq.rs
new file mode 100644
index 000000000..13b9f49bc
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/eq.rs
@@ -0,0 +1,57 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+pub enum EqPrefix {}
+
+impl<F: Field> SparseDensePrefix<F> for EqPrefix {
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: Option<C>,
+        c: u32,
+        mut b: LookupBits,
+        _: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let mut result = checkpoints[Prefixes::Eq].unwrap_or(F::one());
+
+        // EQ high-order variables of x and y
+        if let Some(r_x) = r_x {
+            let y = F::from_u8(c as u8);
+            result *= r_x * y + (F::one() - r_x) * (F::one() - y);
+        } else {
+            let x = F::from_u8(c as u8);
+            let y_msb = F::from_u8(b.pop_msb());
+            result *= x * y_msb + (F::one() - x) * (F::one() - y_msb);
+        }
+        // EQ remaining x and y bits
+        let (x, y) = b.uninterleave();
+        if x != y {
+            return F::zero();
+        }
+        result
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: C,
+        r_y: C,
+        _: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        // checkpoint *= r_x * r_y + (1 - r_x) * (1 - r_y)
+        let updated = checkpoints[Prefixes::Eq].unwrap_or(F::one())
+            * (r_x * r_y + (F::one() - r_x) * (F::one() - r_y));
+        Some(updated).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/left_is_zero.rs b/crates/jolt-instructions/src/tables/prefixes/left_is_zero.rs
new file mode 100644
index 000000000..6c16a27df
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/left_is_zero.rs
@@ -0,0 +1,55 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+pub enum LeftOperandIsZeroPrefix {}
+
+impl<F: Field> SparseDensePrefix<F> for LeftOperandIsZeroPrefix {
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: Option<C>,
+        c: u32,
+        b: LookupBits,
+        _: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let (x, _) = b.uninterleave();
+        // Short-circuit if low-order bits of `x` are not 0s
+        if u64::from(x) != 0 {
+            return F::zero();
+        }
+
+        let mut result = checkpoints[Prefixes::LeftOperandIsZero].unwrap_or(F::one());
+
+        if let Some(r_x) = r_x {
+            result *= F::one() - r_x;
+        } else {
+            let x = F::from_u8(c as u8);
+            result *= F::one() - x;
+        }
+        result
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: C,
+        _: C,
+        _: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        // checkpoint *= (1 - r_x)
+        let updated =
+            checkpoints[Prefixes::LeftOperandIsZero].unwrap_or(F::one()) * (F::one() - r_x);
+        Some(updated).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/left_msb.rs b/crates/jolt-instructions/src/tables/prefixes/left_msb.rs
new file mode 100644
index 000000000..52484607e
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/left_msb.rs
@@ -0,0 +1,48 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+pub enum LeftMsbPrefix {}
+
+impl<F: Field> SparseDensePrefix<F> for LeftMsbPrefix {
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: Option<C>,
+        c: u32,
+        _: LookupBits,
+        j: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        if j == 0 {
+            F::from_u32(c)
+        } else if j == 1 {
+            r_x.unwrap().into()
+        } else {
+            checkpoints[Prefixes::LeftOperandMsb].unwrap()
+        }
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: C,
+        _: C,
+        j: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        if j == 1 {
+            Some(r_x.into()).into()
+        } else {
+            checkpoints[Prefixes::LeftOperandMsb].into()
+        }
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/left_shift.rs b/crates/jolt-instructions/src/tables/prefixes/left_shift.rs
new file mode 100644
index 000000000..c02190597
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/left_shift.rs
@@ -0,0 +1,64 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+pub enum LeftShiftPrefix<const XLEN: usize> {}
+
+impl<F: Field, const XLEN: usize> SparseDensePrefix<F> for LeftShiftPrefix<XLEN> {
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: Option<C>,
+        c: u32,
+        mut b: LookupBits,
+        j: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let mut result = checkpoints[Prefixes::LeftShift].unwrap_or(F::zero());
+        let mut prod_one_plus_y = checkpoints[Prefixes::LeftShiftHelper].unwrap_or(F::one());
+
+        if let Some(r_x) = r_x {
+            result += r_x
+                * (F::one() - F::from_u8(c as u8))
+                * prod_one_plus_y
+                * F::from_u64(1 << (XLEN - 1 - j / 2));
+            prod_one_plus_y *= F::from_u8(1 + c as u8);
+        } else {
+            let y_msb = b.pop_msb();
+            result += F::from_u8(c as u8 * (1 - y_msb))
+                * prod_one_plus_y
+                * F::from_u64(1 << (XLEN - 1 - j / 2));
+            prod_one_plus_y *= F::from_u8(1 + y_msb);
+        }
+
+        let (x, y) = b.uninterleave();
+        let (x, y_u) = (u64::from(x), u64::from(y));
+        let x = x & !y_u;
+        let shift = (y.leading_ones() as usize + XLEN - 1 - j / 2 - y.len()) as u32;
+        result += F::from_u64(x.unbounded_shl(shift)) * prod_one_plus_y;
+
+        result
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: C,
+        r_y: C,
+        j: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let mut updated = checkpoints[Prefixes::LeftShift].unwrap_or(F::zero());
+        let prod_one_plus_y = checkpoints[Prefixes::LeftShiftHelper].unwrap_or(F::one());
+        updated += r_x * (F::one() - r_y) * prod_one_plus_y * F::from_u64(1 << (XLEN - 1 - j / 2));
+        Some(updated).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/left_shift_helper.rs b/crates/jolt-instructions/src/tables/prefixes/left_shift_helper.rs
new file mode 100644
index 000000000..12b335de8
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/left_shift_helper.rs
@@ -0,0 +1,52 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+pub enum LeftShiftHelperPrefix {}
+
+impl<F: Field> SparseDensePrefix<F> for LeftShiftHelperPrefix {
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: Option<C>,
+        c: u32,
+        mut b: LookupBits,
+        _: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let mut result = checkpoints[Prefixes::LeftShiftHelper].unwrap_or(F::one());
+
+        if r_x.is_some() {
+            result *= F::from_u32(1 + c);
+        } else {
+            let y_msb = b.pop_msb();
+            result *= F::from_u8(1 + y_msb);
+        }
+
+        let (_, y) = b.uninterleave();
+        result *= F::from_u32(1 << y.leading_ones());
+
+        result
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        _r_x: C,
+        r_y: C,
+        _: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let mut updated = checkpoints[Prefixes::LeftShiftHelper].unwrap_or(F::one());
+        updated *= F::one() + r_y;
+        Some(updated).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/left_shift_w.rs b/crates/jolt-instructions/src/tables/prefixes/left_shift_w.rs
new file mode 100644
index 000000000..847bc3783
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/left_shift_w.rs
@@ -0,0 +1,79 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+pub enum LeftShiftWPrefix<const XLEN: usize> {}
+
+impl<const XLEN: usize, F: Field> SparseDensePrefix<F> for LeftShiftWPrefix<XLEN> {
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: Option<C>,
+        c: u32,
+        mut b: LookupBits,
+        j: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        if j < XLEN {
+            return F::zero();
+        }
+
+        let mut result = checkpoints[Prefixes::LeftShiftW].unwrap_or(F::zero());
+        let mut prod_one_plus_y = checkpoints[Prefixes::LeftShiftWHelper].unwrap_or(F::one());
+
+        // When j >= XLEN, we're processing bits from XLEN/2-1 down to 0
+        let bit_index = XLEN - 1 - j / 2;
+
+        if let Some(r_x) = r_x {
+            result += r_x
+                * (F::one() - F::from_u8(c as u8))
+                * prod_one_plus_y
+                * F::from_u64(1u64.wrapping_shl(bit_index as u32));
+            prod_one_plus_y *= F::from_u8(1 + c as u8);
+        } else {
+            let y_msb = b.pop_msb();
+            result += F::from_u8(c as u8 * (1 - y_msb))
+                * prod_one_plus_y
+                * F::from_u64(1u64.wrapping_shl(bit_index as u32));
+            prod_one_plus_y *= F::from_u8(1 + y_msb);
+        }
+
+        let (x, y) = b.uninterleave();
+        let (x, y_u) = (u64::from(x), u64::from(y));
+        let x = x & !y_u;
+        let shift = (y.leading_ones() as usize + bit_index - y.len()) as u32;
+        result += F::from_u64(x.unbounded_shl(shift)) * prod_one_plus_y;
+
+        result
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: C,
+        r_y: C,
+        j: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        if j >= XLEN {
+            let mut updated = checkpoints[Prefixes::LeftShiftW].unwrap_or(F::zero());
+            let prod_one_plus_y = checkpoints[Prefixes::LeftShiftWHelper].unwrap_or(F::one());
+            let bit_index = XLEN - 1 - j / 2;
+            updated += r_x
+                * (F::one() - r_y)
+                * prod_one_plus_y
+                * F::from_u64(1u64.wrapping_shl(bit_index as u32));
+            Some(updated).into()
+        } else {
+            Some(F::zero()).into()
+        }
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/left_shift_w_helper.rs b/crates/jolt-instructions/src/tables/prefixes/left_shift_w_helper.rs
new file mode 100644
index 000000000..d48ae8fb6
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/left_shift_w_helper.rs
@@ -0,0 +1,60 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+pub enum LeftShiftWHelperPrefix<const XLEN: usize> {}
+
+impl<const XLEN: usize, F: Field> SparseDensePrefix<F> for LeftShiftWHelperPrefix<XLEN> {
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: Option<C>,
+        c: u32,
+        mut b: LookupBits,
+        j: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        if j < XLEN {
+            return F::one();
+        }
+
+        let mut result = checkpoints[Prefixes::LeftShiftWHelper].unwrap_or(F::one());
+
+        if r_x.is_some() {
+            result *= F::from_u32(1 + c);
+        } else {
+            let y_msb = b.pop_msb();
+            result *= F::from_u8(1 + y_msb);
+        }
+
+        let (_, y) = b.uninterleave();
+        result *= F::from_u32(1 << y.leading_ones());
+
+        result
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        _r_x: C,
+        r_y: C,
+        j: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        if j >= XLEN {
+            let mut updated = checkpoints[Prefixes::LeftShiftWHelper].unwrap_or(F::one());
+            updated *= F::one() + r_y;
+            Some(updated).into()
+        } else {
+            Some(F::one()).into()
+        }
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/lower_half_word.rs b/crates/jolt-instructions/src/tables/prefixes/lower_half_word.rs
new file mode 100644
index 000000000..355ea74bc
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/lower_half_word.rs
@@ -0,0 +1,73 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+pub enum LowerHalfWordPrefix<const XLEN: usize> {}
+
+impl<const XLEN: usize, F: Field> SparseDensePrefix<F> for LowerHalfWordPrefix<XLEN> {
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: Option<C>,
+        c: u32,
+        mut b: LookupBits,
+        j: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let suffix_len = 2 * XLEN - j - b.len() - 1;
+        let half_word_size = XLEN / 2;
+        // Ignore high-order variables (those above the half-word boundary)
+        if j < XLEN + half_word_size {
+            return F::zero();
+        }
+        let mut result = checkpoints[Prefixes::LowerHalfWord].unwrap_or(F::zero());
+
+        if let Some(r_x) = r_x {
+            let y = F::from_u8(c as u8);
+            let x_shift = 2 * XLEN - j;
+            let y_shift = 2 * XLEN - j - 1;
+            result += F::from_u128(1u128 << x_shift) * r_x;
+            result += F::from_u128(1u128 << y_shift) * y;
+        } else {
+            let x = F::from_u8(c as u8);
+            let y_msb = b.pop_msb();
+            let x_shift = 2 * XLEN - j - 1;
+            let y_shift = 2 * XLEN - j - 2;
+            result += F::from_u128(1 << x_shift) * x;
+            result += F::from_u128(1 << y_shift) * F::from_u8(y_msb);
+        }
+
+        // Add in low-order bits from `b`
+        result += F::from_u128(u128::from(b) << suffix_len);
+
+        result
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: C,
+        r_y: C,
+        j: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let half_word_size = XLEN / 2;
+        if j < XLEN + half_word_size {
+            return None.into();
+        }
+        let x_shift = 2 * XLEN - j;
+        let y_shift = 2 * XLEN - j - 1;
+        let mut updated = checkpoints[Prefixes::LowerHalfWord].unwrap_or(F::zero());
+        updated += F::from_u128(1 << x_shift) * r_x;
+        updated += F::from_u128(1 << y_shift) * r_y;
+        Some(updated).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/lower_word.rs b/crates/jolt-instructions/src/tables/prefixes/lower_word.rs
new file mode 100644
index 000000000..037dd235a
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/lower_word.rs
@@ -0,0 +1,71 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+pub enum LowerWordPrefix<const XLEN: usize> {}
+
+impl<const XLEN: usize, F: Field> SparseDensePrefix<F> for LowerWordPrefix<XLEN> {
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: Option<C>,
+        c: u32,
+        mut b: LookupBits,
+        j: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let suffix_len = 2 * XLEN - j - b.len() - 1;
+        // Ignore high-order variables
+        if j < XLEN {
+            return F::zero();
+        }
+        let mut result = checkpoints[Prefixes::LowerWord].unwrap_or(F::zero());
+
+        if let Some(r_x) = r_x {
+            let y = F::from_u8(c as u8);
+            let x_shift = 2 * XLEN - j;
+            let y_shift = 2 * XLEN - j - 1;
+            result += F::from_u128(1u128 << x_shift) * r_x;
+            result += F::from_u128(1u128 << y_shift) * y;
+        } else {
+            let x = F::from_u8(c as u8);
+            let y_msb = b.pop_msb();
+            let x_shift = 2 * XLEN - j - 1;
+            let y_shift = 2 * XLEN - j - 2;
+            result += F::from_u128(1 << x_shift) * x;
+            result += F::from_u128(1 << y_shift) * F::from_u8(y_msb);
+        }
+
+        // Add in low-order bits from `b`
+        result += F::from_u128(u128::from(b) << suffix_len);
+
+        result
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: C,
+        r_y: C,
+        j: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        if j < XLEN {
+            return None.into();
+        }
+        let x_shift = 2 * XLEN - j;
+        let y_shift = 2 * XLEN - j - 1;
+        let mut updated = checkpoints[Prefixes::LowerWord].unwrap_or(F::zero());
+        updated += F::from_u128(1 << x_shift) * r_x;
+        updated += F::from_u128(1 << y_shift) * r_y;
+        Some(updated).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/lsb.rs b/crates/jolt-instructions/src/tables/prefixes/lsb.rs
new file mode 100644
index 000000000..b8c74608c
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/lsb.rs
@@ -0,0 +1,52 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, SparseDensePrefix};
+
+pub enum LsbPrefix<const XLEN: usize> {}
+
+impl<const XLEN: usize, F: Field> SparseDensePrefix<F> for LsbPrefix<XLEN> {
+    fn prefix_mle<C>(
+        _checkpoints: &[PrefixCheckpoint<F>],
+        _r_x: Option<C>,
+        c: u32,
+        b: LookupBits,
+        j: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let suffix_len = 2 * XLEN - j - b.len() - 1;
+        if j == 2 * XLEN - 1 {
+            // in the log(K)th round, `c` corresponds to the LSB
+            debug_assert_eq!(b.len(), 0);
+            F::from_u32(c)
+        } else if suffix_len == 0 {
+            // in the (log(K)-1)th round, the LSB of `b` is the LSB
+            F::from_u32(u32::from(b) & 1)
+        } else {
+            F::one()
+        }
+    }
+
+    fn update_prefix_checkpoint<C>(
+        _: &[PrefixCheckpoint<F>],
+        _: C,
+        r_y: C,
+        j: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        if j == 2 * XLEN - 1 {
+            Some(r_y.into()).into()
+        } else {
+            Some(F::one()).into()
+        }
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/lt.rs b/crates/jolt-instructions/src/tables/prefixes/lt.rs
new file mode 100644
index 000000000..4af985abc
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/lt.rs
@@ -0,0 +1,63 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+pub enum LessThanPrefix {}
+
+impl<F: Field> SparseDensePrefix<F> for LessThanPrefix {
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: Option<C>,
+        c: u32,
+        mut b: LookupBits,
+        _: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let mut lt = checkpoints[Prefixes::LessThan].unwrap_or(F::zero());
+        let mut eq = checkpoints[Prefixes::Eq].unwrap_or(F::one());
+
+        if let Some(r_x) = r_x {
+            let c = F::from_u32(c);
+            lt += eq * (F::one() - r_x) * c;
+            let (x, y) = b.uninterleave();
+            if u64::from(x) < u64::from(y) {
+                eq *= r_x * c + (F::one() - r_x) * (F::one() - c);
+                lt += eq;
+            }
+        } else {
+            let c = F::from_u32(c);
+            let y_msb = F::from_u8(b.pop_msb());
+            lt += eq * (F::one() - c) * y_msb;
+            let (x, y) = b.uninterleave();
+            if u64::from(x) < u64::from(y) {
+                eq *= c * y_msb + (F::one() - c) * (F::one() - y_msb);
+                lt += eq;
+            }
+        }
+
+        lt
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: C,
+        r_y: C,
+        _: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let lt_checkpoint = checkpoints[Prefixes::LessThan].unwrap_or(F::zero());
+        let eq_checkpoint = checkpoints[Prefixes::Eq].unwrap_or(F::one());
+        let lt_updated = lt_checkpoint + eq_checkpoint * (F::one() - r_x) * r_y;
+        Some(lt_updated).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/mod.rs b/crates/jolt-instructions/src/tables/prefixes/mod.rs
new file mode 100644
index 000000000..eef21ac9d
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/mod.rs
@@ -0,0 +1,366 @@
+//! Prefix polynomial evaluations for the sparse-dense decomposition.
+//!
+//! Each prefix captures the "contribution" of high-order bound variables
+//! to a lookup table's MLE during sumcheck. Prefixes are field-valued
+//! (unlike suffixes which are `u64`), and maintain checkpoints that are
+//! updated every two sumcheck rounds.
+
+pub mod and;
+pub mod andn;
+pub mod change_divisor;
+pub mod change_divisor_w;
+pub mod div_by_zero;
+pub mod eq;
+pub mod left_is_zero;
+pub mod left_msb;
+pub mod left_shift;
+pub mod left_shift_helper;
+pub mod left_shift_w;
+pub mod left_shift_w_helper;
+pub mod lower_half_word;
+pub mod lower_word;
+pub mod lsb;
+pub mod lt;
+pub mod negative_divisor_equals_remainder;
+pub mod negative_divisor_greater_than_remainder;
+pub mod negative_divisor_zero_remainder;
+pub mod or;
+pub mod overflow_bits_zero;
+pub mod positive_remainder_equals_divisor;
+pub mod positive_remainder_less_than_divisor;
+pub mod pow2;
+pub mod pow2_w;
+pub mod rev8w;
+pub mod right_is_zero;
+pub mod right_msb;
+pub mod right_operand;
+pub mod right_operand_w;
+pub mod right_shift;
+pub mod right_shift_w;
+pub mod sign_extension;
+pub mod sign_extension_right_operand;
+pub mod sign_extension_upper_half;
+pub mod two_lsb;
+pub mod upper_word;
+pub mod xor;
+pub mod xor_rot;
+pub mod xor_rotw;
+
+use jolt_field::Field;
+use std::fmt::Display;
+use std::ops::Index;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+/// A prefix polynomial: evaluates bound high-order variables during sumcheck.
+///
+/// The challenge type `C` supports smaller-than-field challenge values
+/// for performance (e.g., 128-bit challenges with a 254-bit field).
+pub trait SparseDensePrefix<F: Field>: 'static + Sync {
+    /// Evaluate the prefix MLE incorporating the checkpoint, current variable `c`,
+    /// and unbound variables `b`.
+    ///
+    /// - On odd rounds (`j` odd): `r_x` is `Some(challenge)` from the previous round.
+    /// - On even rounds (`j` even): `r_x` is `None`; `c` is the current x-variable.
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: Option<C>,
+        c: u32,
+        b: LookupBits,
+        j: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>;
+
+    /// Update the checkpoint after binding two variables (`r_x`, `r_y`).
+    ///
+    /// Called every two sumcheck rounds. May depend on other prefix checkpoints.
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: C,
+        r_y: C,
+        j: usize,
+        suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>;
+}
+
+/// Wrapper for prefix polynomial evaluations, used for type safety.
+#[derive(Clone, Copy)]
+pub struct PrefixEval<F>(pub(crate) F);
+
+/// Cached prefix evaluation after each pair of address-binding rounds.
+pub type PrefixCheckpoint<F> = PrefixEval<Option<F>>;
+
+impl<F: Display> Display for PrefixEval<F> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.0)
+    }
+}
+
+impl<F> From<F> for PrefixEval<F> {
+    fn from(value: F) -> Self {
+        Self(value)
+    }
+}
+
+impl<F> PrefixCheckpoint<F> {
+    /// Unwrap the checkpoint, panicking if it hasn't been initialized.
+    pub fn unwrap(self) -> PrefixEval<F> {
+        self.0.unwrap().into()
+    }
+
+    /// Returns the inner value if set, or the provided default.
+    pub fn unwrap_or(self, default: F) -> F {
+        self.0.unwrap_or(default)
+    }
+}
+
+impl<F> Index<Prefixes> for &[PrefixEval<F>] {
+    type Output = F;
+
+    fn index(&self, prefix: Prefixes) -> &Self::Output {
+        let index = prefix as usize;
+        &self.get(index).unwrap().0
+    }
+}
+
+/// All prefix types used by Jolt's lookup tables.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+#[repr(u8)]
+pub enum Prefixes {
+    LowerWord,
+    LowerHalfWord,
+    UpperWord,
+    Eq,
+    And,
+    Andn,
+    Or,
+    Xor,
+    LessThan,
+    LeftOperandIsZero,
+    RightOperandIsZero,
+    LeftOperandMsb,
+    RightOperandMsb,
+    DivByZero,
+    PositiveRemainderEqualsDivisor,
+    PositiveRemainderLessThanDivisor,
+    NegativeDivisorZeroRemainder,
+    NegativeDivisorEqualsRemainder,
+    NegativeDivisorGreaterThanRemainder,
+    Lsb,
+    Pow2,
+    Pow2W,
+    Rev8W,
+    RightShift,
+    SignExtension,
+    LeftShift,
+    LeftShiftHelper,
+    TwoLsb,
+    SignExtensionUpperHalf,
+    ChangeDivisor,
+    ChangeDivisorW,
+    RightOperand,
+    RightOperandW,
+    SignExtensionRightOperand,
+    RightShiftW,
+    LeftShiftWHelper,
+    LeftShiftW,
+    OverflowBitsZero,
+    XorRot16,
+    XorRot24,
+    XorRot32,
+    XorRot63,
+    XorRotW7,
+    XorRotW8,
+    XorRotW12,
+    XorRotW16,
+}
+
+/// Total number of prefix variants.
+pub const NUM_PREFIXES: usize = 46;
+
+const _: () = assert!(Prefixes::XorRotW16 as usize + 1 == NUM_PREFIXES);
+
+/// All prefix variants in discriminant order.
+pub const ALL_PREFIXES: [Prefixes; NUM_PREFIXES] = [
+    Prefixes::LowerWord,
+    Prefixes::LowerHalfWord,
+    Prefixes::UpperWord,
+    Prefixes::Eq,
+    Prefixes::And,
+    Prefixes::Andn,
+    Prefixes::Or,
+    Prefixes::Xor,
+    Prefixes::LessThan,
+    Prefixes::LeftOperandIsZero,
+    Prefixes::RightOperandIsZero,
+    Prefixes::LeftOperandMsb,
+    Prefixes::RightOperandMsb,
+    Prefixes::DivByZero,
+    Prefixes::PositiveRemainderEqualsDivisor,
+    Prefixes::PositiveRemainderLessThanDivisor,
+    Prefixes::NegativeDivisorZeroRemainder,
+    Prefixes::NegativeDivisorEqualsRemainder,
+    Prefixes::NegativeDivisorGreaterThanRemainder,
+    Prefixes::Lsb,
+    Prefixes::Pow2,
+    Prefixes::Pow2W,
+    Prefixes::Rev8W,
+    Prefixes::RightShift,
+    Prefixes::SignExtension,
+    Prefixes::LeftShift,
+    Prefixes::LeftShiftHelper,
+    Prefixes::TwoLsb,
+    Prefixes::SignExtensionUpperHalf,
+    Prefixes::ChangeDivisor,
+    Prefixes::ChangeDivisorW,
+    Prefixes::RightOperand,
+    Prefixes::RightOperandW,
+    Prefixes::SignExtensionRightOperand,
+    Prefixes::RightShiftW,
+    Prefixes::LeftShiftWHelper,
+    Prefixes::LeftShiftW,
+    Prefixes::OverflowBitsZero,
+    Prefixes::XorRot16,
+    Prefixes::XorRot24,
+    Prefixes::XorRot32,
+    Prefixes::XorRot63,
+    Prefixes::XorRotW7,
+    Prefixes::XorRotW8,
+    Prefixes::XorRotW12,
+    Prefixes::XorRotW16,
+];
+
+/// Dispatches a `SparseDensePrefix` method call to the concrete type for each `Prefixes` variant.
+macro_rules! dispatch_prefix {
+    ($self:expr, $method:ident, $($args:expr),* $(,)?) => {
+        match $self {
+            Prefixes::LowerWord => lower_word::LowerWordPrefix::<XLEN>::$method($($args),*),
+            Prefixes::LowerHalfWord => lower_half_word::LowerHalfWordPrefix::<XLEN>::$method($($args),*),
+            Prefixes::UpperWord => upper_word::UpperWordPrefix::<XLEN>::$method($($args),*),
+            Prefixes::Eq => eq::EqPrefix::$method($($args),*),
+            Prefixes::And => and::AndPrefix::<XLEN>::$method($($args),*),
+            Prefixes::Andn => andn::AndnPrefix::<XLEN>::$method($($args),*),
+            Prefixes::Or => or::OrPrefix::<XLEN>::$method($($args),*),
+            Prefixes::Xor => xor::XorPrefix::<XLEN>::$method($($args),*),
+            Prefixes::LessThan => lt::LessThanPrefix::$method($($args),*),
+            Prefixes::LeftOperandIsZero => left_is_zero::LeftOperandIsZeroPrefix::$method($($args),*),
+            Prefixes::RightOperandIsZero => right_is_zero::RightOperandIsZeroPrefix::$method($($args),*),
+            Prefixes::LeftOperandMsb => left_msb::LeftMsbPrefix::$method($($args),*),
+            Prefixes::RightOperandMsb => right_msb::RightMsbPrefix::$method($($args),*),
+            Prefixes::DivByZero => div_by_zero::DivByZeroPrefix::$method($($args),*),
+            Prefixes::PositiveRemainderEqualsDivisor => positive_remainder_equals_divisor::PositiveRemainderEqualsDivisorPrefix::$method($($args),*),
+            Prefixes::PositiveRemainderLessThanDivisor => positive_remainder_less_than_divisor::PositiveRemainderLessThanDivisorPrefix::$method($($args),*),
+            Prefixes::NegativeDivisorZeroRemainder => negative_divisor_zero_remainder::NegativeDivisorZeroRemainderPrefix::$method($($args),*),
+            Prefixes::NegativeDivisorEqualsRemainder => negative_divisor_equals_remainder::NegativeDivisorEqualsRemainderPrefix::$method($($args),*),
+            Prefixes::NegativeDivisorGreaterThanRemainder => negative_divisor_greater_than_remainder::NegativeDivisorGreaterThanRemainderPrefix::$method($($args),*),
+            Prefixes::Lsb => lsb::LsbPrefix::<XLEN>::$method($($args),*),
+            Prefixes::Pow2 => pow2::Pow2Prefix::<XLEN>::$method($($args),*),
+            Prefixes::Pow2W => pow2_w::Pow2WPrefix::<XLEN>::$method($($args),*),
+            Prefixes::Rev8W => rev8w::Rev8WPrefix::$method($($args),*),
+            Prefixes::RightShift => right_shift::RightShiftPrefix::$method($($args),*),
+            Prefixes::SignExtension => sign_extension::SignExtensionPrefix::<XLEN>::$method($($args),*),
+            Prefixes::LeftShift => left_shift::LeftShiftPrefix::<XLEN>::$method($($args),*),
+            Prefixes::LeftShiftHelper => left_shift_helper::LeftShiftHelperPrefix::$method($($args),*),
+            Prefixes::TwoLsb => two_lsb::TwoLsbPrefix::<XLEN>::$method($($args),*),
+            Prefixes::SignExtensionUpperHalf => sign_extension_upper_half::SignExtensionUpperHalfPrefix::<XLEN>::$method($($args),*),
+            Prefixes::ChangeDivisor => change_divisor::ChangeDivisorPrefix::<XLEN>::$method($($args),*),
+            Prefixes::ChangeDivisorW => change_divisor_w::ChangeDivisorWPrefix::<XLEN>::$method($($args),*),
+            Prefixes::RightOperand => right_operand::RightOperandPrefix::<XLEN>::$method($($args),*),
+            Prefixes::RightOperandW => right_operand_w::RightOperandWPrefix::<XLEN>::$method($($args),*),
+            Prefixes::SignExtensionRightOperand => sign_extension_right_operand::SignExtensionRightOperandPrefix::<XLEN>::$method($($args),*),
+            Prefixes::RightShiftW => right_shift_w::RightShiftWPrefix::<XLEN>::$method($($args),*),
+            Prefixes::LeftShiftWHelper => left_shift_w_helper::LeftShiftWHelperPrefix::<XLEN>::$method($($args),*),
+            Prefixes::LeftShiftW => left_shift_w::LeftShiftWPrefix::<XLEN>::$method($($args),*),
+            Prefixes::OverflowBitsZero => overflow_bits_zero::OverflowBitsZeroPrefix::<XLEN>::$method($($args),*),
+            Prefixes::XorRot16 => xor_rot::XorRotPrefix::<XLEN, 16>::$method($($args),*),
+            Prefixes::XorRot24 => xor_rot::XorRotPrefix::<XLEN, 24>::$method($($args),*),
+            Prefixes::XorRot32 => xor_rot::XorRotPrefix::<XLEN, 32>::$method($($args),*),
+            Prefixes::XorRot63 => xor_rot::XorRotPrefix::<XLEN, 63>::$method($($args),*),
+            Prefixes::XorRotW7 => xor_rotw::XorRotWPrefix::<XLEN, 7>::$method($($args),*),
+            Prefixes::XorRotW8 => xor_rotw::XorRotWPrefix::<XLEN, 8>::$method($($args),*),
+            Prefixes::XorRotW12 => xor_rotw::XorRotWPrefix::<XLEN, 12>::$method($($args),*),
+            Prefixes::XorRotW16 => xor_rotw::XorRotWPrefix::<XLEN, 16>::$method($($args),*),
+        }
+    };
+}
+
+impl Prefixes {
+    /// Evaluate the prefix MLE for this variant.
+    pub fn prefix_mle<const XLEN: usize, F, C>(
+        &self,
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: Option<C>,
+        c: u32,
+        b: LookupBits,
+        j: usize,
+    ) -> PrefixEval<F>
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        PrefixEval(dispatch_prefix!(
+            self,
+            prefix_mle,
+            checkpoints,
+            r_x,
+            c,
+            b,
+            j
+        ))
+    }
+
+    /// Update the checkpoint for this prefix variant.
+    fn update_prefix_checkpoint<const XLEN: usize, F, C>(
+        &self,
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: C,
+        r_y: C,
+        j: usize,
+        suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        dispatch_prefix!(
+            self,
+            update_prefix_checkpoint,
+            checkpoints,
+            r_x,
+            r_y,
+            j,
+            suffix_len
+        )
+    }
+
+    /// Update all prefix checkpoints after binding two variables.
+    pub fn update_checkpoints<const XLEN: usize, F, C>(
+        checkpoints: &mut [PrefixCheckpoint<F>],
+        r_x: C,
+        r_y: C,
+        j: usize,
+        suffix_len: usize,
+    ) where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        debug_assert_eq!(checkpoints.len(), NUM_PREFIXES);
+        let previous_checkpoints: Vec<_> = checkpoints.to_vec();
+        for (index, checkpoint) in checkpoints.iter_mut().enumerate() {
+            let prefix = ALL_PREFIXES[index];
+            *checkpoint = prefix.update_prefix_checkpoint::<XLEN, F, C>(
+                &previous_checkpoints,
+                r_x,
+                r_y,
+                j,
+                suffix_len,
+            );
+        }
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/negative_divisor_equals_remainder.rs b/crates/jolt-instructions/src/tables/prefixes/negative_divisor_equals_remainder.rs
new file mode 100644
index 000000000..ca6651a04
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/negative_divisor_equals_remainder.rs
@@ -0,0 +1,93 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+pub enum NegativeDivisorEqualsRemainderPrefix {}
+
+impl<F: Field> SparseDensePrefix<F> for NegativeDivisorEqualsRemainderPrefix {
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: Option<C>,
+        c: u32,
+        mut b: LookupBits,
+        j: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        if j == 0 {
+            let divisor_sign = F::from_u8(b.pop_msb());
+            let (remainder, divisor) = b.uninterleave();
+            if u64::from(remainder) != u64::from(divisor) {
+                return F::zero();
+            }
+            // `c` is the sign "bit" of the remainder.
+            // This prefix handles the case where both remainder and
+            // divisor are negative, i.e. their sign bits are one.
+            return F::from_u32(c) * divisor_sign;
+        }
+        if j == 1 {
+            let (remainder, divisor) = b.uninterleave();
+            if u64::from(remainder) != u64::from(divisor) {
+                return F::zero();
+            }
+            // `r_x` is the sign "bit" of the remainder.
+            // `c` is the sign "bit" of the divisor.
+            // This prefix handles the case where both remainder and
+            // divisor are negative, i.e. their sign bits are one.
+            return r_x.unwrap() * F::from_u32(c);
+        }
+
+        let negative_divisor_equals_remainder =
+            checkpoints[Prefixes::NegativeDivisorEqualsRemainder].unwrap();
+
+        if let Some(r_x) = r_x {
+            let (remainder, divisor) = b.uninterleave();
+            // Short-circuit if low-order bits of remainder and divisor are not equal
+            if remainder != divisor {
+                return F::zero();
+            }
+            let y = F::from_u32(c);
+            negative_divisor_equals_remainder * (r_x * y + (F::one() - r_x) * (F::one() - y))
+        } else {
+            let y_msb = F::from_u8(b.pop_msb());
+            let (remainder, divisor) = b.uninterleave();
+            // Short-circuit if low-order bits of remainder and divisor are not equal
+            if remainder != divisor {
+                return F::zero();
+            }
+            let x = F::from_u32(c);
+            negative_divisor_equals_remainder * (x * y_msb + (F::one() - x) * (F::one() - y_msb))
+        }
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: C,
+        r_y: C,
+        j: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        if j == 1 {
+            // `r_x` is the sign bit of the remainder
+            // `r_y` is the sign bit of the divisor
+            // This prefix handles the case where both remainder and
+            // divisor are negative.
+            return Some(r_x * r_y).into();
+        }
+
+        let mut negative_divisor_equals_remainder =
+            checkpoints[Prefixes::NegativeDivisorEqualsRemainder].unwrap();
+        // checkpoint *= EQ(r_x, r_y)
+        negative_divisor_equals_remainder *= r_x * r_y + (F::one() - r_x) * (F::one() - r_y);
+        Some(negative_divisor_equals_remainder).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/negative_divisor_greater_than_remainder.rs b/crates/jolt-instructions/src/tables/prefixes/negative_divisor_greater_than_remainder.rs
new file mode 100644
index 000000000..e519a41ef
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/negative_divisor_greater_than_remainder.rs
@@ -0,0 +1,125 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+pub enum NegativeDivisorGreaterThanRemainderPrefix {}
+
+impl<F: Field> SparseDensePrefix<F> for NegativeDivisorGreaterThanRemainderPrefix {
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: Option<C>,
+        c: u32,
+        mut b: LookupBits,
+        j: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        if j == 0 {
+            let divisor_sign = F::from_u8(b.pop_msb());
+            let (remainder, divisor) = b.uninterleave();
+            if u64::from(remainder) <= u64::from(divisor) {
+                return F::zero();
+            }
+            // `c` is the sign "bit" of the remainder.
+            // This prefix handles the case where both remainder and
+            // divisor are negative, i.e. their sign bits are one.
+            return F::from_u32(c) * divisor_sign;
+        }
+        if j == 1 {
+            let (remainder, divisor) = b.uninterleave();
+            if u64::from(remainder) <= u64::from(divisor) {
+                return F::zero();
+            }
+            // `r_x` is the sign "bit" of the remainder.
+            // `c` is the sign "bit" of the divisor.
+            // This prefix handles the case where both remainder and
+            // divisor are negative, i.e. their sign bits are one.
+            return r_x.unwrap() * F::from_u32(c);
+        }
+
+        let mut gt = checkpoints[Prefixes::NegativeDivisorGreaterThanRemainder].unwrap();
+        let mut eq = checkpoints[Prefixes::NegativeDivisorEqualsRemainder].unwrap();
+
+        // For j=2 and j=3, the two checkpoints are the same (they both store isNegative(divisor))
+        // so to avoid double-counting we multiply `gt` by x * (1 - y) instead of adding
+        // eq * x * (1 - y) as we do in subsequent rounds.
+        if j == 2 {
+            let c = F::from_u32(c);
+            let y_msb = F::from_u8(b.pop_msb());
+            let (x, y) = b.uninterleave();
+            gt *= c * (F::one() - y_msb);
+            if u64::from(x) > u64::from(y) {
+                eq *= c * y_msb + (F::one() - c) * (F::one() - y_msb);
+                gt += eq;
+            }
+            return gt;
+        }
+        if j == 3 {
+            let r_x = r_x.unwrap();
+            let c = F::from_u32(c);
+            let (x, y) = b.uninterleave();
+            gt *= r_x * (F::one() - c);
+            if u64::from(x) > u64::from(y) {
+                eq *= r_x * c + (F::one() - r_x) * (F::one() - c);
+                gt += eq;
+            }
+            return gt;
+        }
+
+        if let Some(r_x) = r_x {
+            let c = F::from_u32(c);
+            gt += eq * r_x * (F::one() - c);
+            let (x, y) = b.uninterleave();
+            if u64::from(x) > u64::from(y) {
+                eq *= r_x * c + (F::one() - r_x) * (F::one() - c);
+                gt += eq;
+            }
+        } else {
+            let c = F::from_u32(c);
+            let y_msb = F::from_u8(b.pop_msb());
+            gt += eq * c * (F::one() - y_msb);
+            let (x, y) = b.uninterleave();
+            if u64::from(x) > u64::from(y) {
+                eq *= c * y_msb + (F::one() - c) * (F::one() - y_msb);
+                gt += eq;
+            }
+        }
+
+        gt
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: C,
+        r_y: C,
+        j: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        if j == 1 {
+            // `r_x` is the sign bit of the remainder
+            // `r_y` is the sign bit of the divisor
+            // This prefix handles the case where both remainder and
+            // divisor are negative.
+            return Some(r_x * r_y).into();
+        }
+
+        let gt_checkpoint = checkpoints[Prefixes::NegativeDivisorGreaterThanRemainder].unwrap();
+        let eq_checkpoint = checkpoints[Prefixes::NegativeDivisorEqualsRemainder].unwrap();
+
+        if j == 3 {
+            return Some(gt_checkpoint * r_x * (F::one() - r_y)).into();
+        }
+
+        let gt_updated = gt_checkpoint + eq_checkpoint * r_x * (F::one() - r_y);
+        Some(gt_updated).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/negative_divisor_zero_remainder.rs b/crates/jolt-instructions/src/tables/prefixes/negative_divisor_zero_remainder.rs
new file mode 100644
index 000000000..7d9afe5c3
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/negative_divisor_zero_remainder.rs
@@ -0,0 +1,92 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+pub enum NegativeDivisorZeroRemainderPrefix {}
+
+impl<F: Field> SparseDensePrefix<F> for NegativeDivisorZeroRemainderPrefix {
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: Option<C>,
+        c: u32,
+        mut b: LookupBits,
+        j: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        if j == 0 {
+            let divisor_sign = F::from_u8(b.pop_msb());
+            let (remainder, _) = b.uninterleave();
+            if u64::from(remainder) != 0 {
+                return F::zero();
+            }
+            // `c` is the sign "bit" of the remainder.
+            // This prefix handles the case where the remainder is zero
+            // and the divisor is negative.
+            return (F::one() - F::from_u32(c)) * divisor_sign;
+        }
+        if j == 1 {
+            let (remainder, _) = b.uninterleave();
+            if u64::from(remainder) != 0 {
+                return F::zero();
+            }
+            // `r_x` is the sign "bit" of the remainder.
+            // `c` is the sign "bit" of the divisor.
+            // This prefix handles the case where the remainder is zero
+            // and the divisor is negative.
+            return (F::one() - r_x.unwrap()) * F::from_u32(c);
+        }
+
+        let negative_divisor_zero_remainder =
+            checkpoints[Prefixes::NegativeDivisorZeroRemainder].unwrap();
+
+        if let Some(r_x) = r_x {
+            let (remainder, _) = b.uninterleave();
+            // Short-circuit if low-order bits of remainder are not 0s
+            if u64::from(remainder) != 0 {
+                return F::zero();
+            }
+
+            negative_divisor_zero_remainder * (F::one() - r_x)
+        } else {
+            let _ = b.pop_msb();
+            let (remainder, _) = b.uninterleave();
+            // Short-circuit if low-order bits of remainder are not 0s
+            if u64::from(remainder) != 0 {
+                return F::zero();
+            }
+
+            negative_divisor_zero_remainder * (F::one() - F::from_u32(c))
+        }
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: C,
+        r_y: C,
+        j: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        if j == 1 {
+            // `r_x` is the sign bit of the remainder
+            // `r_y` is the sign bit of the divisor
+            // This prefix handles the case where the remainder is zero
+            // and the divisor is negative.
+            return Some((F::one() - r_x) * r_y).into();
+        }
+
+        let mut negative_divisor_zero_remainder =
+            checkpoints[Prefixes::NegativeDivisorZeroRemainder].unwrap();
+        negative_divisor_zero_remainder *= F::one() - r_x;
+        Some(negative_divisor_zero_remainder).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/or.rs b/crates/jolt-instructions/src/tables/prefixes/or.rs
new file mode 100644
index 000000000..5f145eb16
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/or.rs
@@ -0,0 +1,59 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+pub enum OrPrefix<const XLEN: usize> {}
+
+impl<const XLEN: usize, F: Field> SparseDensePrefix<F> for OrPrefix<XLEN> {
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: Option<C>,
+        c: u32,
+        mut b: LookupBits,
+        j: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let suffix_len = 2 * XLEN - j - b.len() - 1;
+        let mut result = checkpoints[Prefixes::Or].unwrap_or(F::zero());
+
+        // OR high-order variables of x and y
+        if let Some(r_x) = r_x {
+            let y = F::from_u8(c as u8);
+            let shift = XLEN - 1 - j / 2;
+            result += F::from_u64(1 << shift) * (r_x + y - (r_x * y));
+        } else {
+            let y_msb = b.pop_msb() as u32;
+            let shift = XLEN - 1 - j / 2;
+            result += F::from_u32(c + y_msb - c * y_msb) * F::from_u64(1 << shift);
+        }
+        // OR remaining x and y bits
+        let (x, y) = b.uninterleave();
+        result += F::from_u64((u64::from(x) | u64::from(y)) << (suffix_len / 2));
+
+        result
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: C,
+        r_y: C,
+        j: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let shift = XLEN - 1 - j / 2;
+        // checkpoint += 2^shift * (r_x + r_y - r_x * r_y)
+        let updated = checkpoints[Prefixes::Or].unwrap_or(F::zero())
+            + F::from_u64(1 << shift) * (r_x + r_y - r_x * r_y);
+        Some(updated).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/overflow_bits_zero.rs b/crates/jolt-instructions/src/tables/prefixes/overflow_bits_zero.rs
new file mode 100644
index 000000000..267a9cc64
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/overflow_bits_zero.rs
@@ -0,0 +1,65 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+pub enum OverflowBitsZeroPrefix<const XLEN: usize> {}
+
+impl<const XLEN: usize, F: Field> SparseDensePrefix<F> for OverflowBitsZeroPrefix<XLEN> {
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: Option<C>,
+        c: u32,
+        mut b: LookupBits,
+        j: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let suffix_len = 2 * XLEN - j - b.len() - 1;
+        if j >= 128 - XLEN {
+            return checkpoints[Prefixes::OverflowBitsZero].unwrap_or(F::one());
+        }
+
+        let mut result = checkpoints[Prefixes::OverflowBitsZero].unwrap_or(F::one());
+
+        if let Some(r_x) = r_x {
+            let y = F::from_u8(c as u8);
+            result *= (F::one() - r_x) * (F::one() - y);
+        } else {
+            let x = F::from_u32(c);
+            let y = F::from_u8(b.pop_msb());
+            result *= (F::one() - x) * (F::one() - y);
+        }
+
+        let rest = u128::from(b);
+        let temp = F::from_u64((((rest << suffix_len) >> XLEN) == 0) as u64);
+        result *= temp;
+
+        result
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: C,
+        r_y: C,
+        j: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        if j >= 128 - XLEN {
+            return checkpoints[Prefixes::OverflowBitsZero].into();
+        }
+        let updated = checkpoints[Prefixes::OverflowBitsZero].unwrap_or(F::one())
+            * (F::one() - r_x)
+            * (F::one() - r_y);
+
+        Some(updated).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/positive_remainder_equals_divisor.rs b/crates/jolt-instructions/src/tables/prefixes/positive_remainder_equals_divisor.rs
new file mode 100644
index 000000000..4c884e92e
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/positive_remainder_equals_divisor.rs
@@ -0,0 +1,92 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+pub enum PositiveRemainderEqualsDivisorPrefix {}
+
+impl<F: Field> SparseDensePrefix<F> for PositiveRemainderEqualsDivisorPrefix {
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: Option<C>,
+        c: u32,
+        mut b: LookupBits,
+        j: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        if j == 0 {
+            let divisor_sign = F::from_u8(b.pop_msb());
+            let (remainder, divisor) = b.uninterleave();
+            if u64::from(remainder) != u64::from(divisor) {
+                return F::zero();
+            }
+            // `c` is the sign "bit" of the remainder.
+            // This prefix handles the case where both remainder and divisor
+            // are positive, i.e. their sign bits are zero.
+            return (F::one() - F::from_u32(c)) * (F::one() - divisor_sign);
+        }
+        if j == 1 {
+            let (remainder, divisor) = b.uninterleave();
+            if u64::from(remainder) != u64::from(divisor) {
+                return F::zero();
+            }
+            // `r_x` is the sign "bit" of the remainder.
+            // `c` is the sign "bit" of the divisor.
+            // This prefix handles the case where both remainder and divisor
+            // are positive, i.e. their sign bits are zero.
+            return (F::one() - r_x.unwrap()) * (F::one() - F::from_u32(c));
+        }
+
+        let positive_remainder_equals_divisor =
+            checkpoints[Prefixes::PositiveRemainderEqualsDivisor].unwrap();
+
+        if let Some(r_x) = r_x {
+            let (remainder, divisor) = b.uninterleave();
+            // Short-circuit if low-order bits of remainder and divisor are not equal
+            if u64::from(remainder) != u64::from(divisor) {
+                return F::zero();
+            }
+            let y = F::from_u32(c);
+            positive_remainder_equals_divisor * (r_x * y + (F::one() - r_x) * (F::one() - y))
+        } else {
+            let y = F::from_u8(b.pop_msb());
+            let (remainder, divisor) = b.uninterleave();
+            // Short-circuit if low-order bits of remainder and divisor are not equal
+            if u64::from(remainder) != u64::from(divisor) {
+                return F::zero();
+            }
+            let x = F::from_u32(c);
+            positive_remainder_equals_divisor * (x * y + (F::one() - x) * (F::one() - y))
+        }
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: C,
+        r_y: C,
+        j: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        if j == 1 {
+            // `r_x` is the sign bit of the remainder
+            // `r_y` is the sign bit of the divisor
+            // This prefix handles the case where both remainder and divisor
+            // are positive, i.e. their sign bits are zero.
+            return Some((F::one() - r_x) * (F::one() - r_y)).into();
+        }
+
+        let mut positive_remainder_equals_divisor =
+            checkpoints[Prefixes::PositiveRemainderEqualsDivisor].unwrap();
+        positive_remainder_equals_divisor *= r_x * r_y + (F::one() - r_x) * (F::one() - r_y);
+        Some(positive_remainder_equals_divisor).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/positive_remainder_less_than_divisor.rs b/crates/jolt-instructions/src/tables/prefixes/positive_remainder_less_than_divisor.rs
new file mode 100644
index 000000000..6b4a94770
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/positive_remainder_less_than_divisor.rs
@@ -0,0 +1,125 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+pub enum PositiveRemainderLessThanDivisorPrefix {}
+
+impl<F: Field> SparseDensePrefix<F> for PositiveRemainderLessThanDivisorPrefix {
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: Option<C>,
+        c: u32,
+        mut b: LookupBits,
+        j: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        if j == 0 {
+            let divisor_sign = F::from_u8(b.pop_msb());
+            let (remainder, divisor) = b.uninterleave();
+            if u64::from(remainder) >= u64::from(divisor) {
+                return F::zero();
+            }
+            // `c` is the sign "bit" of the remainder.
+            // This prefix handles the case where both remainder and divisor
+            // are positive, i.e. their sign bits are zero.
+            return (F::one() - F::from_u32(c)) * (F::one() - divisor_sign);
+        }
+        if j == 1 {
+            let (remainder, divisor) = b.uninterleave();
+            if u64::from(remainder) >= u64::from(divisor) {
+                return F::zero();
+            }
+            // `r_x` is the sign "bit" of the remainder.
+            // `c` is the sign "bit" of the divisor.
+            // This prefix handles the case where both remainder and divisor
+            // are positive, i.e. their sign bits are zero.
+            return (F::one() - r_x.unwrap()) * (F::one() - F::from_u32(c));
+        }
+
+        let mut lt = checkpoints[Prefixes::PositiveRemainderLessThanDivisor].unwrap();
+        let mut eq = checkpoints[Prefixes::PositiveRemainderEqualsDivisor].unwrap();
+
+        // For j=2 and j=3, the two checkpoints are the same (they both store isNegative(divisor))
+        // so to avoid double-counting we multiply `lt` by (1 - x) * y instead of adding
+        // eq * (1 - x) * y as we do in subsequent rounds.
+        if j == 2 {
+            let c = F::from_u32(c);
+            let y_msb = F::from_u8(b.pop_msb());
+            let (x, y) = b.uninterleave();
+            lt *= (F::one() - c) * y_msb;
+            if u64::from(x) < u64::from(y) {
+                eq *= c * y_msb + (F::one() - c) * (F::one() - y_msb);
+                lt += eq;
+            }
+            return lt;
+        }
+        if j == 3 {
+            let r_x = r_x.unwrap();
+            let c = F::from_u32(c);
+            let (x, y) = b.uninterleave();
+            lt *= (F::one() - r_x) * c;
+            if u64::from(x) < u64::from(y) {
+                eq *= r_x * c + (F::one() - r_x) * (F::one() - c);
+                lt += eq;
+            }
+            return lt;
+        }
+
+        if let Some(r_x) = r_x {
+            let c = F::from_u32(c);
+            lt += eq * (F::one() - r_x) * c;
+            let (x, y) = b.uninterleave();
+            if u64::from(x) < u64::from(y) {
+                eq *= r_x * c + (F::one() - r_x) * (F::one() - c);
+                lt += eq;
+            }
+        } else {
+            let c = F::from_u32(c);
+            let y_msb = F::from_u8(b.pop_msb());
+            lt += eq * (F::one() - c) * y_msb;
+            let (x, y) = b.uninterleave();
+            if u64::from(x) < u64::from(y) {
+                eq *= c * y_msb + (F::one() - c) * (F::one() - y_msb);
+                lt += eq;
+            }
+        }
+
+        lt
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: C,
+        r_y: C,
+        j: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        if j == 1 {
+            // `r_x` is the sign bit of the remainder
+            // `r_y` is the sign bit of the divisor
+            // This prefix handles the case where both remainder and divisor
+            // are positive, i.e. their sign bits are zero.
+            return Some((F::one() - r_x) * (F::one() - r_y)).into();
+        }
+
+        let lt_checkpoint = checkpoints[Prefixes::PositiveRemainderLessThanDivisor].unwrap();
+        let eq_checkpoint = checkpoints[Prefixes::PositiveRemainderEqualsDivisor].unwrap();
+
+        if j == 3 {
+            return Some(lt_checkpoint * (F::one() - r_x) * r_y).into();
+        }
+
+        let lt_updated = lt_checkpoint + eq_checkpoint * (F::one() - r_x) * r_y;
+        Some(lt_updated).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/pow2.rs b/crates/jolt-instructions/src/tables/prefixes/pow2.rs
new file mode 100644
index 000000000..547d60dee
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/pow2.rs
@@ -0,0 +1,81 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+pub enum Pow2Prefix<const XLEN: usize> {}
+
+impl<const XLEN: usize, F: Field> SparseDensePrefix<F> for Pow2Prefix<XLEN> {
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: Option<C>,
+        c: u32,
+        b: LookupBits,
+        j: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let suffix_len = 2 * XLEN - j - b.len() - 1;
+        if suffix_len != 0 {
+            return F::one();
+        }
+
+        if b.len() >= XLEN.trailing_zeros() as usize {
+            return F::from_u64(1 << (b & (XLEN - 1)));
+        }
+
+        let mut result = F::from_u64(1 << (b & (XLEN - 1)));
+        let mut num_bits = b.len();
+        let mut shift = 1u64 << (1u64 << num_bits);
+        result *= F::from_u64(1 + (shift - 1) * c as u64);
+
+        if b.len() == XLEN.trailing_zeros() as usize - 1 {
+            return result;
+        }
+
+        num_bits += 1;
+        shift = 1 << (1 << num_bits);
+        if let Some(r_x) = r_x {
+            result *= F::one() + F::from_u64(shift - 1) * r_x;
+        }
+
+        result *= checkpoints[Prefixes::Pow2].unwrap_or(F::one());
+        result
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: C,
+        r_y: C,
+        j: usize,
+        suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        if suffix_len != 0 {
+            return Some(F::one()).into();
+        }
+
+        if j == 2 * XLEN - XLEN.trailing_zeros() as usize {
+            let shift = 1 << (XLEN / 2);
+            return Some(F::one() + F::from_u64(shift - 1) * r_y).into();
+        }
+
+        if 2 * XLEN - j < XLEN.trailing_zeros() as usize {
+            let mut checkpoint = checkpoints[Prefixes::Pow2].unwrap();
+            let shift = 1 << (1 << (2 * XLEN - j));
+            checkpoint *= F::one() + F::from_u64(shift - 1) * r_x;
+            let shift = 1 << (1 << (2 * XLEN - j - 1));
+            checkpoint *= F::one() + F::from_u64(shift - 1) * r_y;
+            return Some(checkpoint).into();
+        }
+
+        Some(F::one()).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/pow2_w.rs b/crates/jolt-instructions/src/tables/prefixes/pow2_w.rs
new file mode 100644
index 000000000..8dcde648f
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/pow2_w.rs
@@ -0,0 +1,82 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+pub enum Pow2WPrefix<const XLEN: usize> {}
+
+impl<const XLEN: usize, F: Field> SparseDensePrefix<F> for Pow2WPrefix<XLEN> {
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: Option<C>,
+        c: u32,
+        b: LookupBits,
+        j: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let suffix_len = 2 * XLEN - j - b.len() - 1;
+        if suffix_len != 0 {
+            return F::one();
+        }
+
+        // Shift amount is the last 5 bits of b (for modulo 32)
+        if b.len() >= 5 {
+            return F::from_u64(1 << (b & (0b11111)));
+        }
+
+        let mut result = F::from_u64(1 << (b & (0b11111)));
+        let mut num_bits = b.len();
+        let mut shift = 1u64 << (1u64 << num_bits);
+        result *= F::from_u64(1 + (shift - 1) * c as u64);
+
+        if b.len() == 4 {
+            return result;
+        }
+
+        num_bits += 1;
+        shift = 1 << (1 << num_bits);
+        if let Some(r_x) = r_x {
+            result *= F::one() + F::from_u64(shift - 1) * r_x;
+        }
+
+        result *= checkpoints[Prefixes::Pow2W].unwrap_or(F::one());
+        result
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: C,
+        r_y: C,
+        j: usize,
+        suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        if suffix_len != 0 {
+            return Some(F::one()).into();
+        }
+
+        if j == 2 * XLEN - 5 {
+            let shift = 1 << 16;
+            return Some(F::one() + F::from_u64(shift - 1) * r_y).into();
+        }
+
+        if 2 * XLEN - j < 5 {
+            let mut checkpoint = checkpoints[Prefixes::Pow2W].unwrap();
+            let shift = 1 << (1 << (2 * XLEN - j));
+            checkpoint *= F::one() + F::from_u64(shift - 1) * r_x;
+            let shift = 1 << (1 << (2 * XLEN - j - 1));
+            checkpoint *= F::one() + F::from_u64(shift - 1) * r_y;
+            return Some(checkpoint).into();
+        }
+
+        Some(F::one()).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/rev8w.rs b/crates/jolt-instructions/src/tables/prefixes/rev8w.rs
new file mode 100644
index 000000000..537c55457
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/rev8w.rs
@@ -0,0 +1,84 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+use crate::tables::virtual_rev8w::rev8w;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+const XLEN: usize = 64;
+
+pub enum Rev8WPrefix {}
+
+impl<F: Field> SparseDensePrefix<F> for Rev8WPrefix {
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: Option<C>,
+        c: u32,
+        b: LookupBits,
+        j: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let suffix_len = 2 * XLEN - j - b.len() - 1;
+        // The prefix-suffix MLE is only defined on the 64 LSBs.
+        let suffix_n_bits = suffix_len;
+        if suffix_n_bits >= 64 {
+            return F::zero();
+        }
+
+        let mut eval = checkpoints[Prefixes::Rev8W].unwrap_or(F::zero());
+
+        // Add `c` contribution.
+        let c_bit_index = suffix_n_bits + b.len();
+        if c_bit_index < 64 {
+            let shift = rev8w(1 << c_bit_index).trailing_zeros();
+            eval += F::from_u128((c as u128) << shift);
+        }
+
+        // Add `r_x` contribution.
+        let r_x_bit_index = c_bit_index + 1;
+        if r_x_bit_index < 64 {
+            if let Some(r_x) = r_x {
+                let rev_pow2 = rev8w(1 << r_x_bit_index);
+                eval += r_x.into().mul_u64(rev_pow2);
+            }
+        }
+
+        // Add `b` contribution.
+        let b_contribution = rev8w(u64::from(b) << suffix_n_bits);
+        eval += F::from_u64(b_contribution);
+
+        eval
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: C,
+        r_y: C,
+        j: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let mut res = checkpoints[Prefixes::Rev8W].unwrap_or(F::zero());
+
+        let r_y_bit_index = 2 * XLEN - 1 - j;
+        if r_y_bit_index < 64 {
+            let rev_pow2 = rev8w(1 << r_y_bit_index);
+            res += r_y.into().mul_u64(rev_pow2);
+        }
+
+        let r_x_bit_index = r_y_bit_index + 1;
+        if r_x_bit_index < 64 {
+            let rev_pow2 = rev8w(1 << r_x_bit_index);
+            res += r_x.into().mul_u64(rev_pow2);
+        }
+
+        Some(res).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/right_is_zero.rs b/crates/jolt-instructions/src/tables/prefixes/right_is_zero.rs
new file mode 100644
index 000000000..33c73ce35
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/right_is_zero.rs
@@ -0,0 +1,56 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+pub enum RightOperandIsZeroPrefix {}
+
+impl<F: Field> SparseDensePrefix<F> for RightOperandIsZeroPrefix {
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: Option<C>,
+        c: u32,
+        mut b: LookupBits,
+        _: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let (_, y) = b.uninterleave();
+        // Short-circuit if low-order bits of `y` are not 0s
+        if u64::from(y) != 0 {
+            return F::zero();
+        }
+
+        let mut result = checkpoints[Prefixes::RightOperandIsZero].unwrap_or(F::one());
+
+        if r_x.is_some() {
+            let y = F::from_u8(c as u8);
+            result *= F::one() - y;
+        } else {
+            let y = F::from_u8(b.pop_msb());
+            result *= F::one() - y;
+        }
+        result
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        _: C,
+        r_y: C,
+        _: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        // checkpoint *= (1 - r_y)
+        let updated =
+            checkpoints[Prefixes::RightOperandIsZero].unwrap_or(F::one()) * (F::one() - r_y);
+        Some(updated).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/right_msb.rs b/crates/jolt-instructions/src/tables/prefixes/right_msb.rs
new file mode 100644
index 000000000..4dbdbf3d4
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/right_msb.rs
@@ -0,0 +1,49 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+pub enum RightMsbPrefix {}
+
+impl<F: Field> SparseDensePrefix<F> for RightMsbPrefix {
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        _: Option<C>,
+        c: u32,
+        mut b: LookupBits,
+        j: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        if j == 0 {
+            let y_msb = b.pop_msb();
+            F::from_u8(y_msb)
+        } else if j == 1 {
+            F::from_u32(c)
+        } else {
+            checkpoints[Prefixes::RightOperandMsb].unwrap()
+        }
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        _: C,
+        r_y: C,
+        j: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        if j == 1 {
+            Some(r_y.into()).into()
+        } else {
+            checkpoints[Prefixes::RightOperandMsb].into()
+        }
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/right_operand.rs b/crates/jolt-instructions/src/tables/prefixes/right_operand.rs
new file mode 100644
index 000000000..75d1a7999
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/right_operand.rs
@@ -0,0 +1,53 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+pub enum RightOperandPrefix<const XLEN: usize> {}
+
+impl<const XLEN: usize, F: Field> SparseDensePrefix<F> for RightOperandPrefix<XLEN> {
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        _r_x: Option<C>,
+        c: u32,
+        b: LookupBits,
+        j: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let suffix_len = 2 * XLEN - j - b.len() - 1;
+        let mut result = checkpoints[Prefixes::RightOperand].unwrap_or(F::zero());
+
+        if j % 2 == 1 {
+            // c is of the right operand
+            let shift = XLEN - 1 - j / 2;
+            result += F::from_u128((c as u128) << shift);
+        }
+
+        let (_x, y) = b.uninterleave();
+        result += F::from_u128(u128::from(y) << (suffix_len / 2));
+
+        result
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        _r_x: C,
+        r_y: C,
+        j: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let shift = XLEN - 1 - j / 2;
+        let updated = checkpoints[Prefixes::RightOperand].unwrap_or(F::zero())
+            + (F::from_u64(1 << shift) * r_y);
+        Some(updated).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/right_operand_w.rs b/crates/jolt-instructions/src/tables/prefixes/right_operand_w.rs
new file mode 100644
index 000000000..a0295d00d
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/right_operand_w.rs
@@ -0,0 +1,59 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+pub enum RightOperandWPrefix<const XLEN: usize> {}
+
+impl<const XLEN: usize, F: Field> SparseDensePrefix<F> for RightOperandWPrefix<XLEN> {
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        _r_x: Option<C>,
+        c: u32,
+        b: LookupBits,
+        j: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let suffix_len = 2 * XLEN - j - b.len() - 1;
+        let mut result = checkpoints[Prefixes::RightOperandW].unwrap_or(F::zero());
+
+        if j % 2 == 1 && j > XLEN {
+            // c is of the right operand
+            let shift = XLEN - 1 - j / 2;
+            result += F::from_u128((c as u128) << shift);
+        }
+
+        if suffix_len < XLEN {
+            let (_x, y) = b.uninterleave();
+            result += F::from_u128(u128::from(y) << (suffix_len / 2));
+        }
+
+        result
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        _r_x: C,
+        r_y: C,
+        j: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        if j > XLEN {
+            let shift = XLEN - 1 - j / 2;
+            let updated = checkpoints[Prefixes::RightOperandW].unwrap_or(F::zero())
+                + (F::from_u64(1 << shift) * r_y);
+            Some(updated).into()
+        } else {
+            checkpoints[Prefixes::RightOperandW].into()
+        }
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/right_shift.rs b/crates/jolt-instructions/src/tables/prefixes/right_shift.rs
new file mode 100644
index 000000000..de9784992
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/right_shift.rs
@@ -0,0 +1,54 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+pub enum RightShiftPrefix {}
+
+impl<F: Field> SparseDensePrefix<F> for RightShiftPrefix {
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: Option<C>,
+        c: u32,
+        mut b: LookupBits,
+        _: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let mut result = checkpoints[Prefixes::RightShift].unwrap_or(F::zero());
+        if let Some(r_x) = r_x {
+            result *= F::from_u32(1 + c);
+            result += r_x * F::from_u32(c);
+        } else {
+            let y_msb = b.pop_msb();
+            result *= F::from_u8(1 + y_msb);
+            result += F::from_u8(c as u8 * y_msb);
+        }
+        let (x, y) = b.uninterleave();
+        result *= F::from_u32(1 << y.leading_ones());
+        result += F::from_u32(u32::from(x) >> y.trailing_zeros());
+
+        result
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: C,
+        r_y: C,
+        _: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let mut updated = checkpoints[Prefixes::RightShift].unwrap_or(F::zero());
+        updated *= F::one() + r_y;
+        updated += r_x * r_y;
+        Some(updated).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/right_shift_w.rs b/crates/jolt-instructions/src/tables/prefixes/right_shift_w.rs
new file mode 100644
index 000000000..1d10557c4
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/right_shift_w.rs
@@ -0,0 +1,62 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+pub enum RightShiftWPrefix<const XLEN: usize> {}
+
+impl<const XLEN: usize, F: Field> SparseDensePrefix<F> for RightShiftWPrefix<XLEN> {
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: Option<C>,
+        c: u32,
+        mut b: LookupBits,
+        j: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        if j < XLEN {
+            return F::zero();
+        }
+
+        let mut result = checkpoints[Prefixes::RightShiftW].unwrap_or(F::zero());
+        if let Some(r_x) = r_x {
+            result *= F::from_u32(1 + c);
+            result += r_x * F::from_u32(c);
+        } else {
+            let y_msb = b.pop_msb();
+            result *= F::from_u8(1 + y_msb);
+            result += F::from_u8(c as u8 * y_msb);
+        }
+        let (x, y) = b.uninterleave();
+        result *= F::from_u32(1 << y.leading_ones());
+        result += F::from_u32(u32::from(x) >> y.trailing_zeros());
+
+        result
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: C,
+        r_y: C,
+        j: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        if j >= XLEN {
+            let mut updated = checkpoints[Prefixes::RightShiftW].unwrap_or(F::zero());
+            updated *= F::one() + r_y;
+            updated += r_x * r_y;
+            Some(updated).into()
+        } else {
+            Some(F::zero()).into()
+        }
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/sign_extension.rs b/crates/jolt-instructions/src/tables/prefixes/sign_extension.rs
new file mode 100644
index 000000000..ba0c48b4d
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/sign_extension.rs
@@ -0,0 +1,96 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+pub enum SignExtensionPrefix<const XLEN: usize> {}
+
+impl<const XLEN: usize, F: Field> SparseDensePrefix<F> for SignExtensionPrefix<XLEN> {
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: Option<C>,
+        c: u32,
+        mut b: LookupBits,
+        j: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        if j == 0 {
+            let sign_bit = F::from_u8(c as u8);
+            if sign_bit.is_zero() {
+                return F::zero();
+            }
+            let _ = b.pop_msb();
+            let (_, mut y) = b.uninterleave();
+            let mut result = F::zero();
+            let mut index = 1;
+            for _ in 0..y.len() {
+                let y_i = y.pop_msb() as u64;
+                result += F::from_u64((1 - y_i) << index);
+                index += 1;
+            }
+            return result * sign_bit;
+        }
+        if j == 1 {
+            let sign_bit = r_x.unwrap();
+            let (_, mut y) = b.uninterleave();
+            let mut result = F::zero();
+            let mut index = 1;
+            for _ in 0..y.len() {
+                let y_i = y.pop_msb() as u64;
+                result += F::from_u64((1 - y_i) << index);
+                index += 1;
+            }
+            return result * sign_bit;
+        }
+
+        let sign_bit = checkpoints[Prefixes::LeftOperandMsb].unwrap();
+        let mut result = checkpoints[Prefixes::SignExtension].unwrap_or(F::zero());
+
+        if r_x.is_some() {
+            result += F::from_u64(1 << (j / 2)) * (F::one() - F::from_u32(c));
+        } else {
+            let y_msb = b.pop_msb();
+            if y_msb == 0 {
+                result += F::from_u64(1 << (j / 2));
+            }
+        }
+        let (_, mut y) = b.uninterleave();
+        let mut index = j / 2;
+        for _ in 0..y.len() {
+            index += 1;
+            if y.pop_msb() == 0 {
+                result += F::from_u64(1 << index);
+            }
+        }
+
+        result *= sign_bit;
+        result
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        _: C,
+        r_y: C,
+        j: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        if j == 1 {
+            return None.into();
+        }
+        let mut updated = checkpoints[Prefixes::SignExtension].unwrap_or(F::zero());
+        updated += F::from_u64(1 << (j / 2)) * (F::one() - r_y);
+        if j == 2 * XLEN - 1 {
+            updated *= checkpoints[Prefixes::LeftOperandMsb].unwrap();
+        }
+        Some(updated).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/sign_extension_right_operand.rs b/crates/jolt-instructions/src/tables/prefixes/sign_extension_right_operand.rs
new file mode 100644
index 000000000..e6ac37966
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/sign_extension_right_operand.rs
@@ -0,0 +1,63 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+pub enum SignExtensionRightOperandPrefix<const XLEN: usize> {}
+
+impl<const XLEN: usize, F: Field> SparseDensePrefix<F> for SignExtensionRightOperandPrefix<XLEN> {
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        _r_x: Option<C>,
+        c: u32,
+        mut b: LookupBits,
+        j: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let suffix_len = 2 * XLEN - j - b.len() - 1;
+
+        // If suffix handles sign extension, return 1
+        if suffix_len >= XLEN {
+            return F::one();
+        }
+
+        if j == XLEN {
+            // Sign bit is msb of b
+            let sign_bit = b.pop_msb();
+            F::from_u128((1u128 << XLEN) - (1u128 << (XLEN / 2))).mul_u64(sign_bit as u64)
+        } else if j == XLEN + 1 {
+            // Sign bit is in c
+            F::from_u128((1u128 << XLEN) - (1u128 << (XLEN / 2))).mul_u64(c as u64)
+        } else if j >= XLEN + 2 {
+            // Sign bit has been processed, use checkpoint
+            checkpoints[Prefixes::SignExtensionRightOperand].unwrap_or(F::zero())
+        } else {
+            unreachable!("This case should never happen if our prefixes start at half_word_size");
+        }
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        _r_x: C,
+        r_y: C,
+        j: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        if j == XLEN + 1 {
+            // Sign bit is in r_y
+            let value = F::from_u128((1u128 << XLEN) - (1u128 << (XLEN / 2))) * r_y;
+            Some(value).into()
+        } else {
+            checkpoints[Prefixes::SignExtensionRightOperand].into()
+        }
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/sign_extension_upper_half.rs b/crates/jolt-instructions/src/tables/prefixes/sign_extension_upper_half.rs
new file mode 100644
index 000000000..27425e4f5
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/sign_extension_upper_half.rs
@@ -0,0 +1,60 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+pub enum SignExtensionUpperHalfPrefix<const XLEN: usize> {}
+
+impl<const XLEN: usize, F: Field> SparseDensePrefix<F> for SignExtensionUpperHalfPrefix<XLEN> {
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: Option<C>,
+        c: u32,
+        b: LookupBits,
+        j: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let suffix_len = 2 * XLEN - j - b.len() - 1;
+        let half_word_size = XLEN / 2;
+
+        if suffix_len >= half_word_size {
+            return F::one();
+        }
+
+        if j == XLEN + half_word_size {
+            F::from_u128(((1u128 << (half_word_size)) - 1) << (half_word_size)).mul_u64(c as u64)
+        } else if j == XLEN + half_word_size + 1 {
+            F::from_u128(((1u128 << (half_word_size)) - 1) << (half_word_size)) * r_x.unwrap()
+        } else if j > XLEN + half_word_size + 1 {
+            checkpoints[Prefixes::SignExtensionUpperHalf].unwrap_or(F::zero())
+        } else {
+            unreachable!("This case should never happen if our prefixes start at half_word_size");
+        }
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: C,
+        _r_y: C,
+        j: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let half_word_size = XLEN / 2;
+
+        if j == XLEN + half_word_size + 1 {
+            let value = F::from_u128(((1u128 << (half_word_size)) - 1) << (half_word_size)) * r_x;
+            Some(value).into()
+        } else {
+            checkpoints[Prefixes::SignExtensionUpperHalf].into()
+        }
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/two_lsb.rs b/crates/jolt-instructions/src/tables/prefixes/two_lsb.rs
new file mode 100644
index 000000000..b6763e373
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/two_lsb.rs
@@ -0,0 +1,62 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+pub enum TwoLsbPrefix<const XLEN: usize> {}
+
+impl<const XLEN: usize, F: Field> SparseDensePrefix<F> for TwoLsbPrefix<XLEN> {
+    fn prefix_mle<C>(
+        _: &[PrefixCheckpoint<F>],
+        r_x: Option<C>,
+        c: u32,
+        b: LookupBits,
+        j: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let suffix_len = 2 * XLEN - j - b.len() - 1;
+        if j == 2 * XLEN - 1 {
+            // in the log(K)th round, `c` corresponds to bit 0
+            // and `r_x` corresponds to bit 1
+            debug_assert_eq!(b.len(), 0);
+            (F::one() - F::from_u32(c)) * (F::one() - r_x.unwrap())
+        } else if j == 2 * XLEN - 2 {
+            // in the (log(K)-1)th round, `c` corresponds to bit 1
+            debug_assert_eq!(b.len(), 1);
+            let bit0 = u32::from(b) & 1;
+            let bit1 = c;
+            (F::one() - F::from_u32(bit0)) * (F::one() - F::from_u32(bit1))
+        } else if suffix_len == 0 {
+            // in the (log(K)-2)th round, the two LSBs of `b` are the two LSBs
+            match u32::from(b) & 0b11 {
+                0b00 => F::one(),
+                _ => F::zero(),
+            }
+        } else {
+            F::one()
+        }
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: C,
+        r_y: C,
+        j: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        if j == 2 * XLEN - 1 {
+            Some((F::one() - r_x) * (F::one() - r_y)).into()
+        } else {
+            checkpoints[Prefixes::TwoLsb].into()
+        }
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/upper_word.rs b/crates/jolt-instructions/src/tables/prefixes/upper_word.rs
new file mode 100644
index 000000000..bf032126a
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/upper_word.rs
@@ -0,0 +1,77 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+#[derive(Default)]
+pub struct UpperWordPrefix<const XLEN: usize>;
+
+impl<const XLEN: usize, F: Field> SparseDensePrefix<F> for UpperWordPrefix<XLEN> {
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: Option<C>,
+        c: u32,
+        mut b: LookupBits,
+        j: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let suffix_len = 2 * XLEN - j - b.len() - 1;
+        let mut result = checkpoints[Prefixes::UpperWord].unwrap_or(F::zero());
+        // Ignore low-order variables
+        if j >= XLEN {
+            return result;
+        }
+
+        if let Some(r_x) = r_x {
+            let y = F::from_u8(c as u8);
+            let x_shift = XLEN - j;
+            let y_shift = XLEN - j - 1;
+            result += F::from_u64(1 << x_shift) * r_x;
+            result += F::from_u64(1 << y_shift) * y;
+        } else {
+            let x = F::from_u8(c as u8);
+            let y_msb = b.pop_msb();
+            let x_shift = XLEN - j - 1;
+            let y_shift = XLEN - j - 2;
+            result += F::from_u64(1 << x_shift) * x;
+            result += F::from_u64(1 << y_shift) * F::from_u8(y_msb);
+        }
+
+        // Add in bits of `b` that fall in upper word
+        if suffix_len > XLEN {
+            result += F::from_u64(u64::from(b) << (suffix_len - XLEN));
+        } else {
+            let (b_high, _) = b.split(XLEN - suffix_len);
+            result += F::from_u64(u64::from(b_high));
+        }
+
+        result
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: C,
+        r_y: C,
+        j: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        if j >= XLEN {
+            return checkpoints[Prefixes::UpperWord].into();
+        }
+        let x_shift = XLEN - j;
+        let y_shift = XLEN - j - 1;
+        let updated = checkpoints[Prefixes::UpperWord].unwrap_or(F::zero())
+            + F::from_u64(1 << x_shift) * r_x
+            + F::from_u64(1 << y_shift) * r_y;
+        Some(updated).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/xor.rs b/crates/jolt-instructions/src/tables/prefixes/xor.rs
new file mode 100644
index 000000000..b4853155c
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/xor.rs
@@ -0,0 +1,60 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+pub enum XorPrefix<const XLEN: usize> {}
+
+impl<const XLEN: usize, F: Field> SparseDensePrefix<F> for XorPrefix<XLEN> {
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: Option<C>,
+        c: u32,
+        mut b: LookupBits,
+        j: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let suffix_len = 2 * XLEN - j - b.len() - 1;
+        let mut result = checkpoints[Prefixes::Xor].unwrap_or(F::zero());
+
+        // XOR high-order variables of x and y
+        if let Some(r_x) = r_x {
+            let y = F::from_u8(c as u8);
+            let shift = XLEN - 1 - j / 2;
+            result += F::from_u64(1 << shift) * ((F::one() - r_x) * y + r_x * (F::one() - y));
+        } else {
+            let x = F::from_u32(c);
+            let y_msb = F::from_u8(b.pop_msb());
+            let shift = XLEN - 1 - j / 2;
+            result += F::from_u64(1 << shift) * ((F::one() - x) * y_msb + x * (F::one() - y_msb));
+        }
+        // XOR remaining x and y bits
+        let (x, y) = b.uninterleave();
+        result += F::from_u64((u64::from(x) ^ u64::from(y)) << (suffix_len / 2));
+
+        result
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: C,
+        r_y: C,
+        j: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let shift = XLEN - 1 - j / 2;
+        // checkpoint += 2^shift * ((1 - r_x) * r_y + r_x * (1 - r_y))
+        let updated = checkpoints[Prefixes::Xor].unwrap_or(F::zero())
+            + F::from_u64(1 << shift) * ((F::one() - r_x) * r_y + r_x * (F::one() - r_y));
+        Some(updated).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/xor_rot.rs b/crates/jolt-instructions/src/tables/prefixes/xor_rot.rs
new file mode 100644
index 000000000..28d2afd7a
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/xor_rot.rs
@@ -0,0 +1,92 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+pub enum XorRotPrefix<const XLEN: usize, const ROTATION: u32> {}
+
+impl<const XLEN: usize, const ROTATION: u32, F: Field> SparseDensePrefix<F>
+    for XorRotPrefix<XLEN, ROTATION>
+{
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: Option<C>,
+        c: u32,
+        mut b: LookupBits,
+        j: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let suffix_len = 2 * XLEN - j - b.len() - 1;
+        let prefix_idx = match ROTATION {
+            16 => Prefixes::XorRot16,
+            24 => Prefixes::XorRot24,
+            32 => Prefixes::XorRot32,
+            63 => Prefixes::XorRot63,
+            _ => unreachable!(),
+        };
+        let mut result = checkpoints[prefix_idx].unwrap_or(F::zero());
+
+        if let Some(r_x) = r_x {
+            let y = F::from_u8(c as u8);
+            let xor_bit = (F::one() - r_x) * y + r_x * (F::one() - y);
+
+            let original_pos = j / 2;
+            let rotated_pos = (original_pos + ROTATION as usize) % XLEN;
+            let shift = XLEN - 1 - rotated_pos;
+
+            result += F::from_u64(1 << shift) * xor_bit;
+        } else {
+            let x = F::from_u32(c);
+            let y_msb = F::from_u8(b.pop_msb());
+            let xor_bit = (F::one() - x) * y_msb + x * (F::one() - y_msb);
+
+            let original_pos = j / 2;
+            let rotated_pos = (original_pos + ROTATION as usize) % XLEN;
+            let shift = XLEN - 1 - rotated_pos;
+
+            result += F::from_u64(1 << shift) * xor_bit;
+        }
+
+        let (x, y) = b.uninterleave();
+
+        let shift = if suffix_len as i32 / 2 - ROTATION as i32 >= 0 {
+            suffix_len / 2 - ROTATION as usize
+        } else {
+            (XLEN as i32 + (suffix_len as i32 / 2 - ROTATION as i32)) as usize
+        };
+
+        result += F::from_u64((u64::from(x) ^ u64::from(y)).rotate_left(shift as u32));
+        result
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: C,
+        r_y: C,
+        j: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let prefix_idx = match ROTATION {
+            16 => Prefixes::XorRot16,
+            24 => Prefixes::XorRot24,
+            32 => Prefixes::XorRot32,
+            63 => Prefixes::XorRot63,
+            _ => unreachable!(),
+        };
+        let original_pos = j / 2;
+        let rotated_pos = (original_pos + ROTATION as usize) % XLEN;
+        let shift = XLEN - 1 - rotated_pos;
+        let updated = checkpoints[prefix_idx].unwrap_or(F::zero())
+            + F::from_u64(1 << shift) * ((F::one() - r_x) * r_y + r_x * (F::one() - r_y));
+        Some(updated).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/prefixes/xor_rotw.rs b/crates/jolt-instructions/src/tables/prefixes/xor_rotw.rs
new file mode 100644
index 000000000..ae05353c9
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/prefixes/xor_rotw.rs
@@ -0,0 +1,100 @@
+use jolt_field::Field;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+
+use super::{PrefixCheckpoint, Prefixes, SparseDensePrefix};
+
+pub enum XorRotWPrefix<const XLEN: usize, const ROTATION: u32> {}
+
+impl<const XLEN: usize, const ROTATION: u32, F: Field> SparseDensePrefix<F>
+    for XorRotWPrefix<XLEN, ROTATION>
+{
+    fn prefix_mle<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: Option<C>,
+        c: u32,
+        mut b: LookupBits,
+        j: usize,
+    ) -> F
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        let suffix_len = 2 * XLEN - j - b.len() - 1;
+        if j < XLEN {
+            return F::zero();
+        }
+
+        let prefix_idx = match ROTATION {
+            7 => Prefixes::XorRotW7,
+            8 => Prefixes::XorRotW8,
+            12 => Prefixes::XorRotW12,
+            16 => Prefixes::XorRotW16,
+            _ => unreachable!(),
+        };
+        let mut result = checkpoints[prefix_idx].unwrap_or(F::zero());
+
+        if let Some(r_x) = r_x {
+            let y = F::from_u8(c as u8);
+            let xor_bit = (F::one() - r_x) * y + r_x * (F::one() - y);
+            let position = (j - XLEN) / 2;
+            let mut rotated_position = (position + ROTATION as usize) % 32;
+            rotated_position = 32 - 1 - rotated_position;
+            result += F::from_u64(1 << rotated_position) * xor_bit;
+        } else {
+            let x = F::from_u32(c);
+            let y_msb = F::from_u8(b.pop_msb());
+            let xor_bit = (F::one() - x) * y_msb + x * (F::one() - y_msb);
+            let position = (j - XLEN) / 2;
+            let mut rotated_position = (position + ROTATION as usize) % 32;
+            rotated_position = 32 - 1 - rotated_position;
+            result += F::from_u64(1 << rotated_position) * xor_bit;
+        }
+
+        let (x, y) = b.uninterleave();
+        let x_32 = u64::from(x) as u32;
+        let y_32 = u64::from(y) as u32;
+        let xor_result = x_32 ^ y_32;
+
+        let shift = if suffix_len as i32 / 2 - ROTATION as i32 >= 0 {
+            suffix_len / 2 - ROTATION as usize
+        } else {
+            (32_i32 + (suffix_len as i32 / 2 - ROTATION as i32)) as usize
+        };
+
+        let shifted = xor_result.rotate_left(shift as u32);
+        result += F::from_u32(shifted);
+        result
+    }
+
+    fn update_prefix_checkpoint<C>(
+        checkpoints: &[PrefixCheckpoint<F>],
+        r_x: C,
+        r_y: C,
+        j: usize,
+        _suffix_len: usize,
+    ) -> PrefixCheckpoint<F>
+    where
+        C: ChallengeOps<F>,
+        F: FieldOps<C>,
+    {
+        if j >= XLEN {
+            let prefix_idx = match ROTATION {
+                7 => Prefixes::XorRotW7,
+                8 => Prefixes::XorRotW8,
+                12 => Prefixes::XorRotW12,
+                16 => Prefixes::XorRotW16,
+                _ => unreachable!(),
+            };
+            let original_pos = (j - XLEN) / 2;
+            let rotated_pos = (original_pos + ROTATION as usize) % 32;
+            let shift = 32 - 1 - rotated_pos;
+            let updated = checkpoints[prefix_idx].unwrap_or(F::zero())
+                + F::from_u64(1 << shift) * ((F::one() - r_x) * r_y + r_x * (F::one() - r_y));
+            Some(updated).into()
+        } else {
+            Some(F::zero()).into()
+        }
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/range_check.rs b/crates/jolt-instructions/src/tables/range_check.rs
new file mode 100644
index 000000000..c69f90987
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/range_check.rs
@@ -0,0 +1,46 @@
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::tables::prefixes::{PrefixEval, Prefixes};
+use crate::tables::suffixes::{SuffixEval, Suffixes};
+use crate::tables::PrefixSuffixDecomposition;
+use crate::traits::LookupTable;
+
+#[derive(Copy, Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
+pub struct RangeCheckTable<const XLEN: usize>;
+
+impl<const XLEN: usize> LookupTable<XLEN> for RangeCheckTable<XLEN> {
+    fn materialize_entry(&self, index: u128) -> u64 {
+        if XLEN == 64 {
+            index as u64
+        } else {
+            (index % (1u128 << XLEN)) as u64
+        }
+    }
+
+    fn evaluate_mle<F, C>(&self, r: &[C]) -> F
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        debug_assert_eq!(r.len(), 2 * XLEN);
+        let mut result = F::zero();
+        for i in 0..XLEN {
+            let shift = XLEN - 1 - i;
+            result += F::from_u128(1u128 << shift) * r[XLEN + i];
+        }
+        result
+    }
+}
+
+impl<const XLEN: usize> PrefixSuffixDecomposition<XLEN> for RangeCheckTable<XLEN> {
+    fn suffixes(&self) -> Vec<Suffixes> {
+        vec![Suffixes::One, Suffixes::LowerWord]
+    }
+
+    fn combine<F: Field>(&self, prefixes: &[PrefixEval<F>], suffixes: &[SuffixEval<F>]) -> F {
+        let [one, lower_word] = suffixes.try_into().unwrap();
+        prefixes[Prefixes::LowerWord] * one + lower_word
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/range_check_aligned.rs b/crates/jolt-instructions/src/tables/range_check_aligned.rs
new file mode 100644
index 000000000..a3b4651ce
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/range_check_aligned.rs
@@ -0,0 +1,49 @@
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::tables::prefixes::{PrefixEval, Prefixes};
+use crate::tables::suffixes::{SuffixEval, Suffixes};
+use crate::tables::PrefixSuffixDecomposition;
+use crate::traits::LookupTable;
+
+#[derive(Copy, Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
+pub struct RangeCheckAlignedTable<const XLEN: usize>;
+
+impl<const XLEN: usize> LookupTable<XLEN> for RangeCheckAlignedTable<XLEN> {
+    fn materialize_entry(&self, index: u128) -> u64 {
+        if XLEN == 64 {
+            (index as u64) & !1
+        } else {
+            ((index % (1u128 << XLEN)) as u64) & !1
+        }
+    }
+
+    fn evaluate_mle<F, C>(&self, r: &[C]) -> F
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        debug_assert_eq!(r.len(), 2 * XLEN);
+        let mut result = F::zero();
+        // Skip the LSB (last bit position)
+        for i in 0..XLEN - 1 {
+            let shift = XLEN - 1 - i;
+            result += F::from_u128(1u128 << shift) * r[XLEN + i];
+        }
+        result
+    }
+}
+
+impl<const XLEN: usize> PrefixSuffixDecomposition<XLEN> for RangeCheckAlignedTable<XLEN> {
+    fn suffixes(&self) -> Vec<Suffixes> {
+        vec![Suffixes::One, Suffixes::LowerWord, Suffixes::Lsb]
+    }
+
+    fn combine<F: Field>(&self, prefixes: &[PrefixEval<F>], suffixes: &[SuffixEval<F>]) -> F {
+        let [one, lower_word, lsb] = suffixes.try_into().unwrap();
+        let lower_word_contribution = prefixes[Prefixes::LowerWord] * one + lower_word;
+        let lsb_contribution = prefixes[Prefixes::Lsb] * lsb;
+        lower_word_contribution - lsb_contribution
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/shift_right_bitmask.rs b/crates/jolt-instructions/src/tables/shift_right_bitmask.rs
new file mode 100644
index 000000000..ee7dedb6f
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/shift_right_bitmask.rs
@@ -0,0 +1,57 @@
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::tables::prefixes::{PrefixEval, Prefixes};
+use crate::tables::suffixes::{SuffixEval, Suffixes};
+use crate::tables::PrefixSuffixDecomposition;
+use crate::traits::LookupTable;
+
+#[derive(Copy, Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
+pub struct ShiftRightBitmaskTable<const XLEN: usize>;
+
+impl<const XLEN: usize> LookupTable<XLEN> for ShiftRightBitmaskTable<XLEN> {
+    fn materialize_entry(&self, index: u128) -> u64 {
+        let shift = (index % XLEN as u128) as usize;
+        let ones = ((1u128 << (XLEN - shift)) - 1) as u64;
+        ones << shift
+    }
+
+    fn evaluate_mle<F, C>(&self, r: &[C]) -> F
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        debug_assert_eq!(r.len(), 2 * XLEN);
+        let log_w = XLEN.trailing_zeros() as usize;
+        let r = &r[r.len() - log_w..];
+
+        let mut dp = vec![F::zero(); 1 << log_w];
+        for (s, dp_s) in dp.iter_mut().enumerate().take(XLEN) {
+            let bitmask = ((1u128 << (XLEN - s)) - 1) << s;
+            let mut eq_val = F::one();
+            for i in 0..log_w {
+                let bit = (s >> i) & 1;
+                eq_val *= if bit == 0 {
+                    F::one() - r[log_w - i - 1]
+                } else {
+                    r[log_w - i - 1].into()
+                };
+            }
+            *dp_s = F::from_u128(bitmask) * eq_val;
+        }
+        dp.into_iter().sum()
+    }
+}
+
+impl<const XLEN: usize> PrefixSuffixDecomposition<XLEN> for ShiftRightBitmaskTable<XLEN> {
+    fn suffixes(&self) -> Vec<Suffixes> {
+        vec![Suffixes::One, Suffixes::Pow2]
+    }
+
+    fn combine<F: Field>(&self, prefixes: &[PrefixEval<F>], suffixes: &[SuffixEval<F>]) -> F {
+        debug_assert_eq!(self.suffixes().len(), suffixes.len());
+        let [one, pow2] = suffixes.try_into().unwrap();
+        F::from_u128(1 << XLEN) * one - prefixes[Prefixes::Pow2] * pow2
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/sign_extend_half_word.rs b/crates/jolt-instructions/src/tables/sign_extend_half_word.rs
new file mode 100644
index 000000000..fe06a9fa0
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/sign_extend_half_word.rs
@@ -0,0 +1,67 @@
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::tables::prefixes::{PrefixEval, Prefixes};
+use crate::tables::suffixes::{SuffixEval, Suffixes};
+use crate::tables::PrefixSuffixDecomposition;
+use crate::traits::LookupTable;
+
+/// Sign-extends the lower half of a word to the full word width.
+/// For XLEN=64, sign-extends a 32-bit value to 64 bits.
+#[derive(Copy, Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
+pub struct SignExtendHalfWordTable<const XLEN: usize>;
+
+impl<const XLEN: usize> LookupTable<XLEN> for SignExtendHalfWordTable<XLEN> {
+    fn materialize_entry(&self, index: u128) -> u64 {
+        let half_word_size = XLEN / 2;
+        let lower_half = (index % (1u128 << half_word_size)) as u64;
+        let sign_bit = (lower_half >> (half_word_size - 1)) & 1;
+
+        if sign_bit == 1 {
+            lower_half | (((1u64 << half_word_size) - 1) << half_word_size)
+        } else {
+            lower_half
+        }
+    }
+
+    fn evaluate_mle<F, C>(&self, r: &[C]) -> F
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        debug_assert_eq!(r.len(), 2 * XLEN);
+        let half_word_size = XLEN / 2;
+
+        let mut lower_half = F::zero();
+        for i in 0..half_word_size {
+            lower_half += F::from_u64(1 << (half_word_size - 1 - i)) * r[XLEN + half_word_size + i];
+        }
+
+        let sign_bit = r[XLEN + half_word_size];
+
+        let mut upper_half = F::zero();
+        for i in 0..half_word_size {
+            upper_half += F::from_u64(1 << (half_word_size - 1 - i)) * sign_bit;
+        }
+
+        lower_half + upper_half * F::from_u64(1 << half_word_size)
+    }
+}
+
+impl<const XLEN: usize> PrefixSuffixDecomposition<XLEN> for SignExtendHalfWordTable<XLEN> {
+    fn suffixes(&self) -> Vec<Suffixes> {
+        vec![
+            Suffixes::One,
+            Suffixes::LowerHalfWord,
+            Suffixes::SignExtensionUpperHalf,
+        ]
+    }
+
+    fn combine<F: Field>(&self, prefixes: &[PrefixEval<F>], suffixes: &[SuffixEval<F>]) -> F {
+        let [one, lower_half_word, sign_extension_upper_half] = suffixes.try_into().unwrap();
+        prefixes[Prefixes::LowerHalfWord] * one
+            + lower_half_word
+            + prefixes[Prefixes::SignExtensionUpperHalf] * sign_extension_upper_half
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/signed_greater_than_equal.rs b/crates/jolt-instructions/src/tables/signed_greater_than_equal.rs
new file mode 100644
index 000000000..3c6fd7b76
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/signed_greater_than_equal.rs
@@ -0,0 +1,50 @@
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::tables::prefixes::{PrefixEval, Prefixes};
+use crate::tables::signed_less_than::SignedLessThanTable;
+use crate::tables::suffixes::{SuffixEval, Suffixes};
+use crate::tables::PrefixSuffixDecomposition;
+use crate::traits::LookupTable;
+use crate::uninterleave_bits;
+
+#[derive(Copy, Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
+pub struct SignedGreaterThanEqualTable<const XLEN: usize>;
+
+impl<const XLEN: usize> LookupTable<XLEN> for SignedGreaterThanEqualTable<XLEN> {
+    fn materialize_entry(&self, index: u128) -> u64 {
+        let (x, y) = uninterleave_bits(index);
+        match XLEN {
+            #[cfg(test)]
+            8 => (x as i8 >= y as i8).into(),
+            32 => (x as i32 >= y as i32).into(),
+            64 => (x as i64 >= y as i64).into(),
+            _ => panic!("{XLEN}-bit word size is unsupported"),
+        }
+    }
+
+    fn evaluate_mle<F, C>(&self, r: &[C]) -> F
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        F::one() - SignedLessThanTable::<XLEN>.evaluate_mle(r)
+    }
+}
+
+impl<const XLEN: usize> PrefixSuffixDecomposition<XLEN> for SignedGreaterThanEqualTable<XLEN> {
+    fn suffixes(&self) -> Vec<Suffixes> {
+        vec![Suffixes::One, Suffixes::LessThan]
+    }
+
+    fn combine<F: Field>(&self, prefixes: &[PrefixEval<F>], suffixes: &[SuffixEval<F>]) -> F {
+        debug_assert_eq!(self.suffixes().len(), suffixes.len());
+        let [one, less_than] = suffixes.try_into().unwrap();
+        // 1 - LT(x, y) = 1 - (isNegative(x) && isPositive(y)) - LTU(x, y)
+        one + prefixes[Prefixes::RightOperandMsb] * one
+            - prefixes[Prefixes::LeftOperandMsb] * one
+            - prefixes[Prefixes::LessThan] * one
+            - prefixes[Prefixes::Eq] * less_than
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/signed_less_than.rs b/crates/jolt-instructions/src/tables/signed_less_than.rs
new file mode 100644
index 000000000..07bed8e1c
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/signed_less_than.rs
@@ -0,0 +1,59 @@
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::tables::prefixes::{PrefixEval, Prefixes};
+use crate::tables::suffixes::{SuffixEval, Suffixes};
+use crate::tables::PrefixSuffixDecomposition;
+use crate::traits::LookupTable;
+use crate::uninterleave_bits;
+
+#[derive(Copy, Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
+pub struct SignedLessThanTable<const XLEN: usize>;
+
+impl<const XLEN: usize> LookupTable<XLEN> for SignedLessThanTable<XLEN> {
+    fn materialize_entry(&self, index: u128) -> u64 {
+        let (x, y) = uninterleave_bits(index);
+        match XLEN {
+            #[cfg(test)]
+            8 => ((x as i8) < y as i8).into(),
+            32 => ((x as i32) < y as i32).into(),
+            64 => ((x as i64) < y as i64).into(),
+            _ => panic!("{XLEN}-bit word size is unsupported"),
+        }
+    }
+
+    fn evaluate_mle<F, C>(&self, r: &[C]) -> F
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        let x_sign = r[0];
+        let y_sign = r[1];
+
+        let mut lt = F::zero();
+        let mut eq = F::one();
+        for i in 0..XLEN {
+            let x_i = r[2 * i];
+            let y_i = r[2 * i + 1];
+            lt += (F::one() - x_i) * y_i * eq;
+            eq *= x_i * y_i + (F::one() - x_i) * (F::one() - y_i);
+        }
+
+        x_sign - y_sign + lt
+    }
+}
+
+impl<const XLEN: usize> PrefixSuffixDecomposition<XLEN> for SignedLessThanTable<XLEN> {
+    fn suffixes(&self) -> Vec<Suffixes> {
+        vec![Suffixes::One, Suffixes::LessThan]
+    }
+
+    fn combine<F: Field>(&self, prefixes: &[PrefixEval<F>], suffixes: &[SuffixEval<F>]) -> F {
+        debug_assert_eq!(self.suffixes().len(), suffixes.len());
+        let [one, less_than] = suffixes.try_into().unwrap();
+        prefixes[Prefixes::LeftOperandMsb] * one - prefixes[Prefixes::RightOperandMsb] * one
+            + prefixes[Prefixes::LessThan] * one
+            + prefixes[Prefixes::Eq] * less_than
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/and.rs b/crates/jolt-instructions/src/tables/suffixes/and.rs
new file mode 100644
index 000000000..a85210e8e
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/and.rs
@@ -0,0 +1,11 @@
+use super::SparseDenseSuffix;
+use crate::lookup_bits::LookupBits;
+
+pub enum AndSuffix {}
+
+impl SparseDenseSuffix for AndSuffix {
+    fn suffix_mle(b: LookupBits) -> u64 {
+        let (x, y) = b.uninterleave();
+        u64::from(x) & u64::from(y)
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/change_divisor.rs b/crates/jolt-instructions/src/tables/suffixes/change_divisor.rs
new file mode 100644
index 000000000..449db3969
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/change_divisor.rs
@@ -0,0 +1,11 @@
+use super::SparseDenseSuffix;
+use crate::lookup_bits::LookupBits;
+
+pub enum ChangeDivisorSuffix {}
+
+impl SparseDenseSuffix for ChangeDivisorSuffix {
+    fn suffix_mle(b: LookupBits) -> u64 {
+        let (x, y) = b.uninterleave();
+        (((1u64 << y.len()) - 1 == u64::from(y)) && u64::from(x) == 0).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/change_divisor_w.rs b/crates/jolt-instructions/src/tables/suffixes/change_divisor_w.rs
new file mode 100644
index 000000000..b5ac83a51
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/change_divisor_w.rs
@@ -0,0 +1,13 @@
+use super::SparseDenseSuffix;
+use crate::lookup_bits::LookupBits;
+
+pub enum ChangeDivisorWSuffix<const XLEN: usize> {}
+
+impl<const XLEN: usize> SparseDenseSuffix for ChangeDivisorWSuffix<XLEN> {
+    fn suffix_mle(b: LookupBits) -> u64 {
+        let (x, y) = b.uninterleave();
+        let y_len = y.len().min(XLEN / 2);
+        let (x, y) = (u64::from(x) as u32 as u64, u64::from(y) as u32 as u64);
+        (((1u64 << y_len) - 1 == y) && x == 0).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/div_by_zero.rs b/crates/jolt-instructions/src/tables/suffixes/div_by_zero.rs
new file mode 100644
index 000000000..68384e20e
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/div_by_zero.rs
@@ -0,0 +1,14 @@
+use super::SparseDenseSuffix;
+use crate::lookup_bits::LookupBits;
+
+/// 1 if divisor (x) is all zeros AND quotient (y) is all ones; 0 otherwise.
+pub enum DivByZeroSuffix {}
+
+impl SparseDenseSuffix for DivByZeroSuffix {
+    fn suffix_mle(b: LookupBits) -> u64 {
+        let (divisor, quotient) = b.uninterleave();
+        let divisor_is_zero = u64::from(divisor) == 0;
+        let quotient_is_all_ones = u64::from(quotient) == (1 << quotient.len()) - 1;
+        (divisor_is_zero && quotient_is_all_ones).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/eq.rs b/crates/jolt-instructions/src/tables/suffixes/eq.rs
new file mode 100644
index 000000000..0870caf28
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/eq.rs
@@ -0,0 +1,11 @@
+use super::SparseDenseSuffix;
+use crate::lookup_bits::LookupBits;
+
+pub enum EqSuffix {}
+
+impl SparseDenseSuffix for EqSuffix {
+    fn suffix_mle(b: LookupBits) -> u64 {
+        let (x, y) = b.uninterleave();
+        (x == y).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/gt.rs b/crates/jolt-instructions/src/tables/suffixes/gt.rs
new file mode 100644
index 000000000..0b350048f
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/gt.rs
@@ -0,0 +1,11 @@
+use super::SparseDenseSuffix;
+use crate::lookup_bits::LookupBits;
+
+pub enum GreaterThanSuffix {}
+
+impl SparseDenseSuffix for GreaterThanSuffix {
+    fn suffix_mle(b: LookupBits) -> u64 {
+        let (x, y) = b.uninterleave();
+        (u64::from(x) > u64::from(y)).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/left_is_zero.rs b/crates/jolt-instructions/src/tables/suffixes/left_is_zero.rs
new file mode 100644
index 000000000..c76396762
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/left_is_zero.rs
@@ -0,0 +1,11 @@
+use super::SparseDenseSuffix;
+use crate::lookup_bits::LookupBits;
+
+pub enum LeftOperandIsZeroSuffix {}
+
+impl SparseDenseSuffix for LeftOperandIsZeroSuffix {
+    fn suffix_mle(b: LookupBits) -> u64 {
+        let (x, _) = b.uninterleave();
+        (u64::from(x) == 0).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/left_shift.rs b/crates/jolt-instructions/src/tables/suffixes/left_shift.rs
new file mode 100644
index 000000000..c67fa7946
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/left_shift.rs
@@ -0,0 +1,14 @@
+use super::SparseDenseSuffix;
+use crate::lookup_bits::LookupBits;
+
+/// Left-shifts x by the number of leading 1s in y, masking out matched bits.
+pub enum LeftShiftSuffix {}
+
+impl SparseDenseSuffix for LeftShiftSuffix {
+    fn suffix_mle(b: LookupBits) -> u64 {
+        let (x, y) = b.uninterleave();
+        let (x, y_u) = (u64::from(x), u64::from(y));
+        let x = x & !y_u;
+        x.unbounded_shl(y.leading_ones())
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/left_shift_w.rs b/crates/jolt-instructions/src/tables/suffixes/left_shift_w.rs
new file mode 100644
index 000000000..b92bfe3a1
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/left_shift_w.rs
@@ -0,0 +1,15 @@
+use super::SparseDenseSuffix;
+use crate::lookup_bits::LookupBits;
+
+/// Left-shift W variant: processes lower 32 bits.
+pub enum LeftShiftWSuffix<const XLEN: usize> {}
+
+impl<const XLEN: usize> SparseDenseSuffix for LeftShiftWSuffix<XLEN> {
+    fn suffix_mle(b: LookupBits) -> u64 {
+        let (x, y) = b.uninterleave();
+        let y = LookupBits::new(u128::from(y), y.len().min(XLEN / 2));
+        let (x, y_u) = (u64::from(x) as u32, u32::from(y));
+        let x = x & !y_u;
+        x.unbounded_shl(y.leading_ones()) as u64
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/left_shift_w_helper.rs b/crates/jolt-instructions/src/tables/suffixes/left_shift_w_helper.rs
new file mode 100644
index 000000000..f2dad1c64
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/left_shift_w_helper.rs
@@ -0,0 +1,12 @@
+use super::SparseDenseSuffix;
+use crate::lookup_bits::LookupBits;
+
+/// 2^(y.leading_ones()) truncated to 32 bits.
+pub enum LeftShiftWHelperSuffix {}
+
+impl SparseDenseSuffix for LeftShiftWHelperSuffix {
+    fn suffix_mle(b: LookupBits) -> u64 {
+        let (_, y) = b.uninterleave();
+        (1u32 << y.leading_ones()) as u64
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/lower_half_word.rs b/crates/jolt-instructions/src/tables/suffixes/lower_half_word.rs
new file mode 100644
index 000000000..39e578cff
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/lower_half_word.rs
@@ -0,0 +1,15 @@
+use super::SparseDenseSuffix;
+use crate::lookup_bits::LookupBits;
+
+pub enum LowerHalfWordSuffix<const XLEN: usize> {}
+
+impl<const XLEN: usize> SparseDenseSuffix for LowerHalfWordSuffix<XLEN> {
+    fn suffix_mle(b: LookupBits) -> u64 {
+        let half_word_size = XLEN / 2;
+        if half_word_size == 64 {
+            u128::from(b) as u64
+        } else {
+            (u128::from(b) % (1 << half_word_size)) as u64
+        }
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/lower_word.rs b/crates/jolt-instructions/src/tables/suffixes/lower_word.rs
new file mode 100644
index 000000000..6499430ec
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/lower_word.rs
@@ -0,0 +1,10 @@
+use super::SparseDenseSuffix;
+use crate::lookup_bits::LookupBits;
+
+pub enum LowerWordSuffix<const XLEN: usize> {}
+
+impl<const XLEN: usize> SparseDenseSuffix for LowerWordSuffix<XLEN> {
+    fn suffix_mle(b: LookupBits) -> u64 {
+        (u128::from(b) % (1 << XLEN)) as u64
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/lsb.rs b/crates/jolt-instructions/src/tables/suffixes/lsb.rs
new file mode 100644
index 000000000..ed9669657
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/lsb.rs
@@ -0,0 +1,14 @@
+use super::SparseDenseSuffix;
+use crate::lookup_bits::LookupBits;
+
+pub enum LsbSuffix {}
+
+impl SparseDenseSuffix for LsbSuffix {
+    fn suffix_mle(b: LookupBits) -> u64 {
+        if b.is_empty() {
+            1
+        } else {
+            (u128::from(b) & 1) as u64
+        }
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/lt.rs b/crates/jolt-instructions/src/tables/suffixes/lt.rs
new file mode 100644
index 000000000..e814ccb1e
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/lt.rs
@@ -0,0 +1,11 @@
+use super::SparseDenseSuffix;
+use crate::lookup_bits::LookupBits;
+
+pub enum LessThanSuffix {}
+
+impl SparseDenseSuffix for LessThanSuffix {
+    fn suffix_mle(b: LookupBits) -> u64 {
+        let (x, y) = b.uninterleave();
+        (u64::from(x) < u64::from(y)).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/mod.rs b/crates/jolt-instructions/src/tables/suffixes/mod.rs
new file mode 100644
index 000000000..160979ae5
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/mod.rs
@@ -0,0 +1,236 @@
+//! Suffix polynomial evaluations for the sparse-dense decomposition.
+//!
+//! Each suffix computes a function over the "unbound" low-order bits of a
+//! lookup index during the sumcheck protocol. Suffixes evaluate to `u64`
+//! values (not field elements), making them cheap to compute and
+//! field-independent.
+//!
+//! The decomposition works as: `table_mle(r) = Σ prefix_i(r_high) · suffix_i(b_low)`,
+//! where `b_low` ranges over the Boolean hypercube.
+
+use crate::lookup_bits::LookupBits;
+
+mod and;
+mod change_divisor;
+mod change_divisor_w;
+mod div_by_zero;
+mod eq;
+mod gt;
+mod left_is_zero;
+mod left_shift;
+mod left_shift_w;
+mod left_shift_w_helper;
+mod lower_half_word;
+mod lower_word;
+mod lsb;
+mod lt;
+mod notand;
+mod one;
+mod or;
+mod overflow_bits_zero;
+mod pow2;
+mod pow2_w;
+mod rev8w;
+mod right_is_zero;
+mod right_operand;
+mod right_operand_w;
+mod right_shift;
+mod right_shift_helper;
+mod right_shift_padding;
+mod right_shift_w;
+mod right_shift_w_helper;
+mod sign_extension;
+mod sign_extension_right_operand;
+mod sign_extension_upper_half;
+mod two_lsb;
+mod upper_word;
+mod xor;
+mod xor_rot;
+mod xor_rotw;
+
+use and::AndSuffix;
+use change_divisor::ChangeDivisorSuffix;
+use change_divisor_w::ChangeDivisorWSuffix;
+use div_by_zero::DivByZeroSuffix;
+use eq::EqSuffix;
+use gt::GreaterThanSuffix;
+use left_is_zero::LeftOperandIsZeroSuffix;
+use left_shift::LeftShiftSuffix;
+use left_shift_w::LeftShiftWSuffix;
+use left_shift_w_helper::LeftShiftWHelperSuffix;
+use lower_half_word::LowerHalfWordSuffix;
+use lower_word::LowerWordSuffix;
+use lsb::LsbSuffix;
+use lt::LessThanSuffix;
+use notand::NotAndSuffix;
+use one::OneSuffix;
+use or::OrSuffix;
+use overflow_bits_zero::OverflowBitsZeroSuffix;
+use pow2::Pow2Suffix;
+use pow2_w::Pow2WSuffix;
+use rev8w::Rev8WSuffix;
+use right_is_zero::RightOperandIsZeroSuffix;
+use right_operand::RightOperandSuffix;
+use right_operand_w::RightOperandWSuffix;
+use right_shift::RightShiftSuffix;
+use right_shift_helper::RightShiftHelperSuffix;
+use right_shift_padding::RightShiftPaddingSuffix;
+use right_shift_w::RightShiftWSuffix;
+use right_shift_w_helper::RightShiftWHelperSuffix;
+use sign_extension::SignExtensionSuffix;
+use sign_extension_right_operand::SignExtensionRightOperandSuffix;
+use sign_extension_upper_half::SignExtensionUpperHalfSuffix;
+use two_lsb::TwoLsbSuffix;
+use upper_word::UpperWordSuffix;
+use xor::XorSuffix;
+use xor_rot::XorRotSuffix;
+use xor_rotw::XorRotWSuffix;
+
+use jolt_field::Field;
+
+/// A suffix polynomial: evaluates on unbound Boolean variables during sumcheck.
+///
+/// Suffixes return `u64` values (not field elements) to avoid unnecessary
+/// field arithmetic when the result is a small integer.
+pub trait SparseDenseSuffix: 'static + Sync {
+    /// Evaluate this suffix's MLE on bitvector `b`, where `b.len()` variables
+    /// are set to Boolean values.
+    fn suffix_mle(b: LookupBits) -> u64;
+}
+
+/// Type alias for suffix evaluations promoted to field elements.
+pub type SuffixEval<F> = F;
+
+/// All suffix types used by Jolt's lookup tables.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+#[repr(u8)]
+pub enum Suffixes {
+    One,
+    And,
+    NotAnd,
+    Xor,
+    Or,
+    RightOperand,
+    RightOperandW,
+    ChangeDivisor,
+    ChangeDivisorW,
+    UpperWord,
+    LowerWord,
+    LowerHalfWord,
+    LessThan,
+    GreaterThan,
+    Eq,
+    LeftOperandIsZero,
+    RightOperandIsZero,
+    Lsb,
+    DivByZero,
+    Pow2,
+    Pow2W,
+    Rev8W,
+    RightShiftPadding,
+    RightShift,
+    RightShiftHelper,
+    SignExtension,
+    LeftShift,
+    TwoLsb,
+    SignExtensionUpperHalf,
+    SignExtensionRightOperand,
+    RightShiftW,
+    RightShiftWHelper,
+    LeftShiftWHelper,
+    LeftShiftW,
+    OverflowBitsZero,
+    XorRot16,
+    XorRot24,
+    XorRot32,
+    XorRot63,
+    XorRotW16,
+    XorRotW12,
+    XorRotW8,
+    XorRotW7,
+}
+
+/// Total number of suffix variants.
+pub const NUM_SUFFIXES: usize = 43;
+
+impl Suffixes {
+    /// Returns `true` if this suffix's output is guaranteed to be in {0, 1}.
+    ///
+    /// This enables micro-optimizations in the sumcheck prover that avoid
+    /// multiplying by 1 (directly adding the unreduced field element instead).
+    #[inline(always)]
+    pub fn is_01_valued(&self) -> bool {
+        matches!(
+            self,
+            Suffixes::One
+                | Suffixes::Eq
+                | Suffixes::LessThan
+                | Suffixes::GreaterThan
+                | Suffixes::LeftOperandIsZero
+                | Suffixes::RightOperandIsZero
+                | Suffixes::Lsb
+                | Suffixes::TwoLsb
+                | Suffixes::DivByZero
+                | Suffixes::OverflowBitsZero
+                | Suffixes::ChangeDivisor
+                | Suffixes::ChangeDivisorW
+        )
+    }
+
+    /// Evaluate this suffix's MLE on bitvector `b`.
+    pub fn suffix_mle<const XLEN: usize>(&self, b: LookupBits) -> u64 {
+        match self {
+            Suffixes::One => OneSuffix::suffix_mle(b),
+            Suffixes::And => AndSuffix::suffix_mle(b),
+            Suffixes::NotAnd => NotAndSuffix::suffix_mle(b),
+            Suffixes::Or => OrSuffix::suffix_mle(b),
+            Suffixes::Xor => XorSuffix::suffix_mle(b),
+            Suffixes::RightOperand => RightOperandSuffix::suffix_mle(b),
+            Suffixes::RightOperandW => RightOperandWSuffix::suffix_mle(b),
+            Suffixes::ChangeDivisor => ChangeDivisorSuffix::suffix_mle(b),
+            Suffixes::ChangeDivisorW => ChangeDivisorWSuffix::<XLEN>::suffix_mle(b),
+            Suffixes::UpperWord => UpperWordSuffix::<XLEN>::suffix_mle(b),
+            Suffixes::LowerWord => LowerWordSuffix::<XLEN>::suffix_mle(b),
+            Suffixes::LowerHalfWord => LowerHalfWordSuffix::<XLEN>::suffix_mle(b),
+            Suffixes::LessThan => LessThanSuffix::suffix_mle(b),
+            Suffixes::GreaterThan => GreaterThanSuffix::suffix_mle(b),
+            Suffixes::Eq => EqSuffix::suffix_mle(b),
+            Suffixes::LeftOperandIsZero => LeftOperandIsZeroSuffix::suffix_mle(b),
+            Suffixes::RightOperandIsZero => RightOperandIsZeroSuffix::suffix_mle(b),
+            Suffixes::Lsb => LsbSuffix::suffix_mle(b),
+            Suffixes::DivByZero => DivByZeroSuffix::suffix_mle(b),
+            Suffixes::Pow2 => Pow2Suffix::<XLEN>::suffix_mle(b),
+            Suffixes::Pow2W => Pow2WSuffix::<XLEN>::suffix_mle(b),
+            Suffixes::Rev8W => Rev8WSuffix::suffix_mle(b),
+            Suffixes::RightShiftPadding => RightShiftPaddingSuffix::<XLEN>::suffix_mle(b),
+            Suffixes::RightShift => RightShiftSuffix::suffix_mle(b),
+            Suffixes::RightShiftHelper => RightShiftHelperSuffix::suffix_mle(b),
+            Suffixes::SignExtension => SignExtensionSuffix::<XLEN>::suffix_mle(b),
+            Suffixes::LeftShift => LeftShiftSuffix::suffix_mle(b),
+            Suffixes::TwoLsb => TwoLsbSuffix::suffix_mle(b),
+            Suffixes::SignExtensionUpperHalf => SignExtensionUpperHalfSuffix::<XLEN>::suffix_mle(b),
+            Suffixes::SignExtensionRightOperand => {
+                SignExtensionRightOperandSuffix::<XLEN>::suffix_mle(b)
+            }
+            Suffixes::RightShiftW => RightShiftWSuffix::<XLEN>::suffix_mle(b),
+            Suffixes::RightShiftWHelper => RightShiftWHelperSuffix::<XLEN>::suffix_mle(b),
+            Suffixes::LeftShiftWHelper => LeftShiftWHelperSuffix::suffix_mle(b),
+            Suffixes::LeftShiftW => LeftShiftWSuffix::<XLEN>::suffix_mle(b),
+            Suffixes::OverflowBitsZero => OverflowBitsZeroSuffix::<XLEN>::suffix_mle(b),
+            Suffixes::XorRot16 => XorRotSuffix::<16>::suffix_mle(b),
+            Suffixes::XorRot24 => XorRotSuffix::<24>::suffix_mle(b),
+            Suffixes::XorRot32 => XorRotSuffix::<32>::suffix_mle(b),
+            Suffixes::XorRot63 => XorRotSuffix::<63>::suffix_mle(b),
+            Suffixes::XorRotW7 => XorRotWSuffix::<7>::suffix_mle(b),
+            Suffixes::XorRotW8 => XorRotWSuffix::<8>::suffix_mle(b),
+            Suffixes::XorRotW12 => XorRotWSuffix::<12>::suffix_mle(b),
+            Suffixes::XorRotW16 => XorRotWSuffix::<16>::suffix_mle(b),
+        }
+    }
+
+    /// Evaluate and promote to a field element.
+    #[inline]
+    pub fn evaluate<const XLEN: usize, F: Field>(&self, b: LookupBits) -> SuffixEval<F> {
+        F::from_u64(self.suffix_mle::<XLEN>(b))
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/notand.rs b/crates/jolt-instructions/src/tables/suffixes/notand.rs
new file mode 100644
index 000000000..60bc007d0
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/notand.rs
@@ -0,0 +1,11 @@
+use super::SparseDenseSuffix;
+use crate::lookup_bits::LookupBits;
+
+pub enum NotAndSuffix {}
+
+impl SparseDenseSuffix for NotAndSuffix {
+    fn suffix_mle(b: LookupBits) -> u64 {
+        let (x, y) = b.uninterleave();
+        u64::from(x) & !u64::from(y)
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/one.rs b/crates/jolt-instructions/src/tables/suffixes/one.rs
new file mode 100644
index 000000000..e166dc2f5
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/one.rs
@@ -0,0 +1,10 @@
+use super::SparseDenseSuffix;
+use crate::lookup_bits::LookupBits;
+
+pub enum OneSuffix {}
+
+impl SparseDenseSuffix for OneSuffix {
+    fn suffix_mle(_: LookupBits) -> u64 {
+        1
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/or.rs b/crates/jolt-instructions/src/tables/suffixes/or.rs
new file mode 100644
index 000000000..31be3084a
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/or.rs
@@ -0,0 +1,11 @@
+use super::SparseDenseSuffix;
+use crate::lookup_bits::LookupBits;
+
+pub enum OrSuffix {}
+
+impl SparseDenseSuffix for OrSuffix {
+    fn suffix_mle(b: LookupBits) -> u64 {
+        let (x, y) = b.uninterleave();
+        u64::from(x) | u64::from(y)
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/overflow_bits_zero.rs b/crates/jolt-instructions/src/tables/suffixes/overflow_bits_zero.rs
new file mode 100644
index 000000000..bbbed9bb5
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/overflow_bits_zero.rs
@@ -0,0 +1,12 @@
+use super::SparseDenseSuffix;
+use crate::lookup_bits::LookupBits;
+
+/// 1 if the upper 128-XLEN bits are all zero (no overflow), 0 otherwise.
+pub enum OverflowBitsZeroSuffix<const XLEN: usize> {}
+
+impl<const XLEN: usize> SparseDenseSuffix for OverflowBitsZeroSuffix<XLEN> {
+    fn suffix_mle(b: LookupBits) -> u64 {
+        let upper_bits = u128::from(b) >> XLEN;
+        (upper_bits == 0).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/pow2.rs b/crates/jolt-instructions/src/tables/suffixes/pow2.rs
new file mode 100644
index 000000000..09b7731e9
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/pow2.rs
@@ -0,0 +1,17 @@
+use super::SparseDenseSuffix;
+use crate::lookup_bits::LookupBits;
+
+/// 2^shift where shift is the lower log2(XLEN) bits of the second operand.
+pub enum Pow2Suffix<const XLEN: usize> {}
+
+impl<const XLEN: usize> SparseDenseSuffix for Pow2Suffix<XLEN> {
+    fn suffix_mle(b: LookupBits) -> u64 {
+        if b.is_empty() {
+            1
+        } else {
+            let log_xlen = XLEN.trailing_zeros() as usize;
+            let (_, shift) = b.split(log_xlen);
+            1 << u64::from(shift)
+        }
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/pow2_w.rs b/crates/jolt-instructions/src/tables/suffixes/pow2_w.rs
new file mode 100644
index 000000000..b5c1272a6
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/pow2_w.rs
@@ -0,0 +1,16 @@
+use super::SparseDenseSuffix;
+use crate::lookup_bits::LookupBits;
+
+/// 2^shift where shift is the lower 5 bits (modulo 32).
+pub enum Pow2WSuffix<const XLEN: usize> {}
+
+impl<const XLEN: usize> SparseDenseSuffix for Pow2WSuffix<XLEN> {
+    fn suffix_mle(b: LookupBits) -> u64 {
+        if b.is_empty() {
+            1
+        } else {
+            let (_, shift) = b.split(5);
+            1 << u64::from(shift)
+        }
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/rev8w.rs b/crates/jolt-instructions/src/tables/suffixes/rev8w.rs
new file mode 100644
index 000000000..b935b93b7
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/rev8w.rs
@@ -0,0 +1,11 @@
+use super::SparseDenseSuffix;
+use crate::lookup_bits::LookupBits;
+
+pub enum Rev8WSuffix {}
+
+impl SparseDenseSuffix for Rev8WSuffix {
+    fn suffix_mle(b: LookupBits) -> u64 {
+        let val = u128::from(b) as u32;
+        val.swap_bytes() as u64
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/right_is_zero.rs b/crates/jolt-instructions/src/tables/suffixes/right_is_zero.rs
new file mode 100644
index 000000000..3064bb18e
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/right_is_zero.rs
@@ -0,0 +1,11 @@
+use super::SparseDenseSuffix;
+use crate::lookup_bits::LookupBits;
+
+pub enum RightOperandIsZeroSuffix {}
+
+impl SparseDenseSuffix for RightOperandIsZeroSuffix {
+    fn suffix_mle(b: LookupBits) -> u64 {
+        let (_, y) = b.uninterleave();
+        (u64::from(y) == 0).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/right_operand.rs b/crates/jolt-instructions/src/tables/suffixes/right_operand.rs
new file mode 100644
index 000000000..cb196f33f
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/right_operand.rs
@@ -0,0 +1,11 @@
+use super::SparseDenseSuffix;
+use crate::lookup_bits::LookupBits;
+
+pub enum RightOperandSuffix {}
+
+impl SparseDenseSuffix for RightOperandSuffix {
+    fn suffix_mle(b: LookupBits) -> u64 {
+        let (_, y) = b.uninterleave();
+        u64::from(y)
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/right_operand_w.rs b/crates/jolt-instructions/src/tables/suffixes/right_operand_w.rs
new file mode 100644
index 000000000..c876ed06e
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/right_operand_w.rs
@@ -0,0 +1,11 @@
+use super::SparseDenseSuffix;
+use crate::lookup_bits::LookupBits;
+
+pub enum RightOperandWSuffix {}
+
+impl SparseDenseSuffix for RightOperandWSuffix {
+    fn suffix_mle(b: LookupBits) -> u64 {
+        let (_, y) = b.uninterleave();
+        u64::from(y) as u32 as u64
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/right_shift.rs b/crates/jolt-instructions/src/tables/suffixes/right_shift.rs
new file mode 100644
index 000000000..107867728
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/right_shift.rs
@@ -0,0 +1,12 @@
+use super::SparseDenseSuffix;
+use crate::lookup_bits::LookupBits;
+
+/// Right-aligns the masked bits of the left operand.
+pub enum RightShiftSuffix {}
+
+impl SparseDenseSuffix for RightShiftSuffix {
+    fn suffix_mle(b: LookupBits) -> u64 {
+        let (x, y) = b.uninterleave();
+        u64::from(x).unbounded_shr(y.trailing_zeros())
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/right_shift_helper.rs b/crates/jolt-instructions/src/tables/suffixes/right_shift_helper.rs
new file mode 100644
index 000000000..3619f645a
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/right_shift_helper.rs
@@ -0,0 +1,12 @@
+use super::SparseDenseSuffix;
+use crate::lookup_bits::LookupBits;
+
+/// 2^(y.leading_ones()) where y is the right operand.
+pub enum RightShiftHelperSuffix {}
+
+impl SparseDenseSuffix for RightShiftHelperSuffix {
+    fn suffix_mle(b: LookupBits) -> u64 {
+        let (_, y) = b.uninterleave();
+        1 << y.leading_ones()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/right_shift_padding.rs b/crates/jolt-instructions/src/tables/suffixes/right_shift_padding.rs
new file mode 100644
index 000000000..a7ae23d32
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/right_shift_padding.rs
@@ -0,0 +1,24 @@
+use super::SparseDenseSuffix;
+use crate::lookup_bits::LookupBits;
+
+/// Bitmask helper for arithmetic right shift padding.
+///
+/// Together with `RightShiftPaddingPrefix`, computes:
+/// - 2^XLEN if shift == 0
+/// - 2^shift otherwise
+///
+/// This gets subtracted from 2^XLEN to obtain the desired padding bitmask.
+pub enum RightShiftPaddingSuffix<const XLEN: usize> {}
+
+impl<const XLEN: usize> SparseDenseSuffix for RightShiftPaddingSuffix<XLEN> {
+    fn suffix_mle(b: LookupBits) -> u64 {
+        if b.is_empty() {
+            return 1;
+        }
+        let log_xlen = XLEN.trailing_zeros() as usize;
+        let (_, shift) = b.split(log_xlen);
+        let shift = u64::from(shift);
+        // Subtract 1 from exponent to avoid overflow; prefix compensates with factor of 2
+        1 << (XLEN - 1 - shift as usize)
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/right_shift_w.rs b/crates/jolt-instructions/src/tables/suffixes/right_shift_w.rs
new file mode 100644
index 000000000..526d6626b
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/right_shift_w.rs
@@ -0,0 +1,12 @@
+use super::SparseDenseSuffix;
+use crate::lookup_bits::LookupBits;
+
+/// Right-shift W variant: processes lower 32 bits.
+pub enum RightShiftWSuffix<const XLEN: usize> {}
+
+impl<const XLEN: usize> SparseDenseSuffix for RightShiftWSuffix<XLEN> {
+    fn suffix_mle(b: LookupBits) -> u64 {
+        let (x, y) = b.uninterleave();
+        (u64::from(x) as u32).unbounded_shr(y.trailing_zeros().min(XLEN as u32 / 2)) as u64
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/right_shift_w_helper.rs b/crates/jolt-instructions/src/tables/suffixes/right_shift_w_helper.rs
new file mode 100644
index 000000000..d04f43831
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/right_shift_w_helper.rs
@@ -0,0 +1,13 @@
+use super::SparseDenseSuffix;
+use crate::lookup_bits::LookupBits;
+
+/// 2^(y.leading_ones()) for W variant, truncated to XLEN/2 bits.
+pub enum RightShiftWHelperSuffix<const XLEN: usize> {}
+
+impl<const XLEN: usize> SparseDenseSuffix for RightShiftWHelperSuffix<XLEN> {
+    fn suffix_mle(b: LookupBits) -> u64 {
+        let (_, y) = b.uninterleave();
+        let y = LookupBits::new(u128::from(y), y.len().min(XLEN / 2));
+        1 << y.leading_ones()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/sign_extension.rs b/crates/jolt-instructions/src/tables/suffixes/sign_extension.rs
new file mode 100644
index 000000000..883f2f0c6
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/sign_extension.rs
@@ -0,0 +1,13 @@
+use super::SparseDenseSuffix;
+use crate::lookup_bits::LookupBits;
+
+pub enum SignExtensionSuffix<const XLEN: usize> {}
+
+impl<const XLEN: usize> SparseDenseSuffix for SignExtensionSuffix<XLEN> {
+    fn suffix_mle(b: LookupBits) -> u64 {
+        let (_, y) = b.uninterleave();
+        let padding_len = std::cmp::min(u64::from(y).trailing_zeros() as usize, y.len());
+        // 0b11...100...0
+        ((1u128 << XLEN) - (1u128 << (XLEN - padding_len))) as u64
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/sign_extension_right_operand.rs b/crates/jolt-instructions/src/tables/suffixes/sign_extension_right_operand.rs
new file mode 100644
index 000000000..2ddc95d53
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/sign_extension_right_operand.rs
@@ -0,0 +1,21 @@
+use super::SparseDenseSuffix;
+use crate::lookup_bits::LookupBits;
+
+pub enum SignExtensionRightOperandSuffix<const XLEN: usize> {}
+
+impl<const XLEN: usize> SparseDenseSuffix for SignExtensionRightOperandSuffix<XLEN> {
+    fn suffix_mle(b: LookupBits) -> u64 {
+        if b.len() >= XLEN {
+            let bits = u128::from(b);
+            let sign_bit_position = XLEN - 2;
+            let sign_bit = (bits >> sign_bit_position) & 1;
+            if sign_bit == 1 {
+                ((1u128 << XLEN) - (1u128 << (XLEN / 2))) as u64
+            } else {
+                0
+            }
+        } else {
+            1
+        }
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/sign_extension_upper_half.rs b/crates/jolt-instructions/src/tables/suffixes/sign_extension_upper_half.rs
new file mode 100644
index 000000000..c19ed103b
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/sign_extension_upper_half.rs
@@ -0,0 +1,22 @@
+use super::SparseDenseSuffix;
+use crate::lookup_bits::LookupBits;
+
+pub enum SignExtensionUpperHalfSuffix<const XLEN: usize> {}
+
+impl<const XLEN: usize> SparseDenseSuffix for SignExtensionUpperHalfSuffix<XLEN> {
+    fn suffix_mle(b: LookupBits) -> u64 {
+        let half_word_size = XLEN / 2;
+
+        if b.len() >= half_word_size {
+            let bits = u128::from(b);
+            let sign_bit = (bits >> (half_word_size - 1)) & 1;
+            if sign_bit == 1 {
+                ((1u64 << half_word_size) - 1) << half_word_size
+            } else {
+                0
+            }
+        } else {
+            1
+        }
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/two_lsb.rs b/crates/jolt-instructions/src/tables/suffixes/two_lsb.rs
new file mode 100644
index 000000000..729c8589a
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/two_lsb.rs
@@ -0,0 +1,11 @@
+use super::SparseDenseSuffix;
+use crate::lookup_bits::LookupBits;
+
+pub struct TwoLsbSuffix;
+
+impl SparseDenseSuffix for TwoLsbSuffix {
+    fn suffix_mle(b: LookupBits) -> u64 {
+        // 1 if the two least significant bits are 0, else 0
+        (b.is_empty() || u128::from(b).trailing_zeros() >= 2).into()
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/upper_word.rs b/crates/jolt-instructions/src/tables/suffixes/upper_word.rs
new file mode 100644
index 000000000..4cef13781
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/upper_word.rs
@@ -0,0 +1,10 @@
+use super::SparseDenseSuffix;
+use crate::lookup_bits::LookupBits;
+
+pub enum UpperWordSuffix<const XLEN: usize> {}
+
+impl<const XLEN: usize> SparseDenseSuffix for UpperWordSuffix<XLEN> {
+    fn suffix_mle(b: LookupBits) -> u64 {
+        (u128::from(b) >> XLEN) as u64
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/xor.rs b/crates/jolt-instructions/src/tables/suffixes/xor.rs
new file mode 100644
index 000000000..9901544d5
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/xor.rs
@@ -0,0 +1,11 @@
+use super::SparseDenseSuffix;
+use crate::lookup_bits::LookupBits;
+
+pub enum XorSuffix {}
+
+impl SparseDenseSuffix for XorSuffix {
+    fn suffix_mle(b: LookupBits) -> u64 {
+        let (x, y) = b.uninterleave();
+        u64::from(x) ^ u64::from(y)
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/xor_rot.rs b/crates/jolt-instructions/src/tables/suffixes/xor_rot.rs
new file mode 100644
index 000000000..d23f04ba6
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/xor_rot.rs
@@ -0,0 +1,13 @@
+use super::SparseDenseSuffix;
+use crate::lookup_bits::LookupBits;
+
+/// XOR operands then rotate right by a constant.
+pub enum XorRotSuffix<const ROTATION: u32> {}
+
+impl<const ROTATION: u32> SparseDenseSuffix for XorRotSuffix<ROTATION> {
+    fn suffix_mle(b: LookupBits) -> u64 {
+        let (x, y) = b.uninterleave();
+        let xor_result = u64::from(x) ^ u64::from(y);
+        xor_result.rotate_right(ROTATION)
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/suffixes/xor_rotw.rs b/crates/jolt-instructions/src/tables/suffixes/xor_rotw.rs
new file mode 100644
index 000000000..09112654b
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/suffixes/xor_rotw.rs
@@ -0,0 +1,13 @@
+use super::SparseDenseSuffix;
+use crate::lookup_bits::LookupBits;
+
+/// XOR lower 32 bits of operands then rotate right by a constant.
+pub enum XorRotWSuffix<const ROTATION: u32> {}
+
+impl<const ROTATION: u32> SparseDenseSuffix for XorRotWSuffix<ROTATION> {
+    fn suffix_mle(b: LookupBits) -> u64 {
+        let (x, y) = b.uninterleave();
+        let xor_result = (u64::from(x) as u32) ^ (u64::from(y) as u32);
+        xor_result.rotate_right(ROTATION) as u64
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/test_utils.rs b/crates/jolt-instructions/src/tables/test_utils.rs
new file mode 100644
index 000000000..8982a4273
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/test_utils.rs
@@ -0,0 +1,162 @@
+use jolt_field::Field;
+use rand::prelude::*;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::interleave::interleave_bits;
+use crate::lookup_bits::LookupBits;
+use crate::tables::prefixes::{PrefixCheckpoint, Prefixes, ALL_PREFIXES, NUM_PREFIXES};
+use crate::tables::suffixes::SuffixEval;
+use crate::tables::PrefixSuffixDecomposition;
+use crate::traits::LookupTable;
+
+/// Convert an integer to a vector of field elements representing its binary decomposition.
+///
+/// Returns MSB-first: index 0 is the highest bit.
+pub fn index_to_field_bitvector<F: Field + ChallengeOps<F>>(value: u128, bits: usize) -> Vec<F> {
+    if bits != 128 {
+        assert!(value < 1u128 << bits);
+    }
+    let mut bitvector: Vec<F> = Vec::with_capacity(bits);
+    for i in (0..bits).rev() {
+        if (value >> i) & 1 == 1 {
+            bitvector.push(F::one());
+        } else {
+            bitvector.push(F::zero());
+        }
+    }
+    bitvector
+}
+
+/// Generate a lookup index where the right operand is a bitmask of the form `111...000`.
+///
+/// Used by shift/rotate tables whose inputs must have this structure.
+pub fn gen_bitmask_lookup_index<const XLEN: usize>(rng: &mut StdRng) -> u128 {
+    let x = rng.next_u64();
+    let zeros = rng.gen_range(0..=XLEN);
+    let y = (!0u64).wrapping_shl(zeros as u32);
+    interleave_bits(x, y)
+}
+
+/// Verify that `evaluate_mle` matches `materialize_entry` at 1000 random points.
+///
+/// Uses the production XLEN (64) challenge points.
+pub fn mle_random_test<const XLEN: usize, F, T>()
+where
+    F: Field + FieldOps<F> + ChallengeOps<F>,
+    T: LookupTable<XLEN> + Default,
+{
+    let mut rng = StdRng::seed_from_u64(12345);
+    for _ in 0..1000 {
+        let index: u128 = rng.gen();
+        assert_eq!(
+            F::from_u64(T::default().materialize_entry(index)),
+            T::default().evaluate_mle::<F, F>(&index_to_field_bitvector(index, XLEN * 2)),
+            "MLE did not match materialized table at index {index}",
+        );
+    }
+}
+
+/// Verify that `evaluate_mle` matches `materialize_entry` on the full 2^16 hypercube (XLEN=8).
+pub fn mle_full_hypercube_test<F, T>()
+where
+    F: Field + FieldOps<F> + ChallengeOps<F>,
+    T: LookupTable<8> + Default,
+{
+    let materialized = T::default().materialize();
+    for (i, entry) in materialized.iter().enumerate() {
+        assert_eq!(
+            F::from_u64(*entry),
+            T::default().evaluate_mle::<F, F>(&index_to_field_bitvector(i as u128, 16)),
+            "MLE did not match materialized table at index {i}",
+        );
+    }
+}
+
+/// Verify the prefix/suffix decomposition matches `evaluate_mle` across all sumcheck rounds.
+///
+/// For 300 random lookup indices, walks through every sumcheck round and checks that
+/// `combine(prefix_evals, suffix_evals) == evaluate_mle(partially_bound_point)`.
+/// This is the most comprehensive correctness test for the sparse-dense decomposition.
+pub fn prefix_suffix_test<const XLEN: usize, F, T>()
+where
+    F: Field + FieldOps<F> + ChallengeOps<F>,
+    T: PrefixSuffixDecomposition<XLEN>,
+{
+    const ROUNDS_PER_PHASE: usize = 16;
+    let total_phases: usize = XLEN * 2 / ROUNDS_PER_PHASE;
+    let mut rng = StdRng::seed_from_u64(12345);
+
+    for _ in 0..300 {
+        let mut prefix_checkpoints: Vec<PrefixCheckpoint<F>> = vec![None.into(); NUM_PREFIXES];
+        let lookup_index = T::random_lookup_index(&mut rng);
+        let mut j = 0;
+        let mut r: Vec<F> = vec![];
+        for phase in 0..total_phases {
+            let suffix_len = (total_phases - 1 - phase) * ROUNDS_PER_PHASE;
+            let (mut prefix_bits, suffix_bits) =
+                LookupBits::new(lookup_index, XLEN * 2 - phase * ROUNDS_PER_PHASE)
+                    .split(suffix_len);
+
+            let suffix_evals: Vec<_> = T::default()
+                .suffixes()
+                .iter()
+                .map(|suffix| SuffixEval::from(F::from_u64(suffix.suffix_mle::<XLEN>(suffix_bits))))
+                .collect();
+
+            for _ in 0..ROUNDS_PER_PHASE {
+                let mut eval_point = r.clone();
+                let c = if rng.next_u64().is_multiple_of(2) {
+                    0
+                } else {
+                    2
+                };
+                eval_point.push(F::from_u32(c));
+                let _ = prefix_bits.pop_msb();
+
+                eval_point.extend(
+                    index_to_field_bitvector::<F>(prefix_bits.into(), prefix_bits.len()).iter(),
+                );
+                eval_point.extend(
+                    index_to_field_bitvector::<F>(suffix_bits.into(), suffix_bits.len()).iter(),
+                );
+
+                let mle_eval: F = T::default().evaluate_mle(&eval_point);
+
+                let r_x = if j % 2 == 1 {
+                    Some(*r.last().unwrap())
+                } else {
+                    None
+                };
+
+                let prefix_evals: Vec<_> = ALL_PREFIXES
+                    .iter()
+                    .map(|prefix| {
+                        prefix.prefix_mle::<XLEN, F, F>(&prefix_checkpoints, r_x, c, prefix_bits, j)
+                    })
+                    .collect();
+
+                let combined = T::default().combine(&prefix_evals, &suffix_evals);
+                assert_eq!(
+                    combined, mle_eval,
+                    "prefix/suffix decomposition mismatch at round {j}, \
+                     lookup_index={lookup_index}, prefix_bits={prefix_bits}, \
+                     suffix_bits={suffix_bits}"
+                );
+
+                r.push(F::from_u64(rng.next_u64()));
+
+                if r.len().is_multiple_of(2) {
+                    Prefixes::update_checkpoints::<XLEN, F, F>(
+                        &mut prefix_checkpoints,
+                        r[r.len() - 2],
+                        r[r.len() - 1],
+                        j,
+                        suffix_len,
+                    );
+                }
+
+                j += 1;
+            }
+        }
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/unsigned_greater_than_equal.rs b/crates/jolt-instructions/src/tables/unsigned_greater_than_equal.rs
new file mode 100644
index 000000000..70901aa9b
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/unsigned_greater_than_equal.rs
@@ -0,0 +1,47 @@
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::tables::prefixes::{PrefixEval, Prefixes};
+use crate::tables::suffixes::{SuffixEval, Suffixes};
+use crate::tables::unsigned_less_than::UnsignedLessThanTable;
+use crate::tables::PrefixSuffixDecomposition;
+use crate::traits::LookupTable;
+use crate::uninterleave_bits;
+
+#[derive(Copy, Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
+pub struct UnsignedGreaterThanEqualTable<const XLEN: usize>;
+
+impl<const XLEN: usize> LookupTable<XLEN> for UnsignedGreaterThanEqualTable<XLEN> {
+    fn materialize_entry(&self, index: u128) -> u64 {
+        let (x, y) = uninterleave_bits(index);
+        match XLEN {
+            #[cfg(test)]
+            8 => (x >= y).into(),
+            32 => (x >= y).into(),
+            64 => (x >= y).into(),
+            _ => panic!("{XLEN}-bit word size is unsupported"),
+        }
+    }
+
+    fn evaluate_mle<F, C>(&self, r: &[C]) -> F
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        F::one() - UnsignedLessThanTable::<XLEN>.evaluate_mle::<F, C>(r)
+    }
+}
+
+impl<const XLEN: usize> PrefixSuffixDecomposition<XLEN> for UnsignedGreaterThanEqualTable<XLEN> {
+    fn suffixes(&self) -> Vec<Suffixes> {
+        vec![Suffixes::One, Suffixes::LessThan]
+    }
+
+    fn combine<F: Field>(&self, prefixes: &[PrefixEval<F>], suffixes: &[SuffixEval<F>]) -> F {
+        debug_assert_eq!(self.suffixes().len(), suffixes.len());
+        let [one, less_than] = suffixes.try_into().unwrap();
+        // 1 - LTU(x, y)
+        one - prefixes[Prefixes::LessThan] * one - prefixes[Prefixes::Eq] * less_than
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/unsigned_less_than.rs b/crates/jolt-instructions/src/tables/unsigned_less_than.rs
new file mode 100644
index 000000000..1e89a17e6
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/unsigned_less_than.rs
@@ -0,0 +1,50 @@
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::tables::prefixes::{PrefixEval, Prefixes};
+use crate::tables::suffixes::{SuffixEval, Suffixes};
+use crate::tables::PrefixSuffixDecomposition;
+use crate::traits::LookupTable;
+use crate::uninterleave_bits;
+
+#[derive(Copy, Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
+pub struct UnsignedLessThanTable<const XLEN: usize>;
+
+impl<const XLEN: usize> LookupTable<XLEN> for UnsignedLessThanTable<XLEN> {
+    fn materialize_entry(&self, index: u128) -> u64 {
+        let (x, y) = uninterleave_bits(index);
+        (x < y).into()
+    }
+
+    fn evaluate_mle<F, C>(&self, r: &[C]) -> F
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        debug_assert_eq!(r.len(), 2 * XLEN);
+
+        // \sum_i (1 - x_i) * y_i * \prod_{j < i} ((1 - x_j) * (1 - y_j) + x_j * y_j)
+        let mut result = F::zero();
+        let mut eq_term = F::one();
+        for i in 0..XLEN {
+            let x_i = r[2 * i];
+            let y_i = r[2 * i + 1];
+            result += (F::one() - x_i) * y_i * eq_term;
+            eq_term *= x_i * y_i + (F::one() - x_i) * (F::one() - y_i);
+        }
+        result
+    }
+}
+
+impl<const XLEN: usize> PrefixSuffixDecomposition<XLEN> for UnsignedLessThanTable<XLEN> {
+    fn suffixes(&self) -> Vec<Suffixes> {
+        vec![Suffixes::One, Suffixes::LessThan]
+    }
+
+    fn combine<F: Field>(&self, prefixes: &[PrefixEval<F>], suffixes: &[SuffixEval<F>]) -> F {
+        debug_assert_eq!(self.suffixes().len(), suffixes.len());
+        let [one, less_than] = suffixes.try_into().unwrap();
+        prefixes[Prefixes::LessThan] * one + prefixes[Prefixes::Eq] * less_than
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/unsigned_less_than_equal.rs b/crates/jolt-instructions/src/tables/unsigned_less_than_equal.rs
new file mode 100644
index 000000000..c7069fc9c
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/unsigned_less_than_equal.rs
@@ -0,0 +1,53 @@
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::tables::prefixes::{PrefixEval, Prefixes};
+use crate::tables::suffixes::{SuffixEval, Suffixes};
+use crate::tables::PrefixSuffixDecomposition;
+use crate::traits::LookupTable;
+use crate::uninterleave_bits;
+
+#[derive(Copy, Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
+pub struct UnsignedLessThanEqualTable<const XLEN: usize>;
+
+impl<const XLEN: usize> LookupTable<XLEN> for UnsignedLessThanEqualTable<XLEN> {
+    fn materialize_entry(&self, index: u128) -> u64 {
+        let (x, y) = uninterleave_bits(index);
+        (x <= y).into()
+    }
+
+    fn evaluate_mle<F, C>(&self, r: &[C]) -> F
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        debug_assert_eq!(r.len(), 2 * XLEN);
+
+        let mut lt = F::zero();
+        let mut eq = F::one();
+        for i in 0..XLEN {
+            let x_i = r[2 * i];
+            let y_i = r[2 * i + 1];
+            lt += (F::one() - x_i) * y_i * eq;
+            eq *= x_i * y_i + (F::one() - x_i) * (F::one() - y_i);
+        }
+
+        lt + eq
+    }
+}
+
+impl<const XLEN: usize> PrefixSuffixDecomposition<XLEN> for UnsignedLessThanEqualTable<XLEN> {
+    fn suffixes(&self) -> Vec<Suffixes> {
+        vec![Suffixes::One, Suffixes::LessThan, Suffixes::Eq]
+    }
+
+    fn combine<F: Field>(&self, prefixes: &[PrefixEval<F>], suffixes: &[SuffixEval<F>]) -> F {
+        debug_assert_eq!(self.suffixes().len(), suffixes.len());
+        let [one, less_than, eq] = suffixes.try_into().unwrap();
+        // LT(x, y) + EQ(x, y)
+        prefixes[Prefixes::LessThan] * one
+            + prefixes[Prefixes::Eq] * less_than
+            + prefixes[Prefixes::Eq] * eq
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/upper_word.rs b/crates/jolt-instructions/src/tables/upper_word.rs
new file mode 100644
index 000000000..ffb653b98
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/upper_word.rs
@@ -0,0 +1,41 @@
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::tables::prefixes::{PrefixEval, Prefixes};
+use crate::tables::suffixes::{SuffixEval, Suffixes};
+use crate::tables::PrefixSuffixDecomposition;
+use crate::traits::LookupTable;
+
+#[derive(Copy, Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
+pub struct UpperWordTable<const XLEN: usize>;
+
+impl<const XLEN: usize> LookupTable<XLEN> for UpperWordTable<XLEN> {
+    fn materialize_entry(&self, index: u128) -> u64 {
+        (index >> XLEN) as u64
+    }
+
+    fn evaluate_mle<F, C>(&self, r: &[C]) -> F
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        debug_assert_eq!(r.len(), 2 * XLEN);
+        let mut result = F::zero();
+        for (i, r_i) in r[..XLEN].iter().enumerate() {
+            result += F::from_u64(1 << (XLEN - 1 - i)) * *r_i;
+        }
+        result
+    }
+}
+
+impl<const XLEN: usize> PrefixSuffixDecomposition<XLEN> for UpperWordTable<XLEN> {
+    fn suffixes(&self) -> Vec<Suffixes> {
+        vec![Suffixes::One, Suffixes::UpperWord]
+    }
+
+    fn combine<F: Field>(&self, prefixes: &[PrefixEval<F>], suffixes: &[SuffixEval<F>]) -> F {
+        let [one, upper_word] = suffixes.try_into().unwrap();
+        prefixes[Prefixes::UpperWord] * one + upper_word
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/valid_div0.rs b/crates/jolt-instructions/src/tables/valid_div0.rs
new file mode 100644
index 000000000..d23c69c06
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/valid_div0.rs
@@ -0,0 +1,65 @@
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::tables::prefixes::{PrefixEval, Prefixes};
+use crate::tables::suffixes::{SuffixEval, Suffixes};
+use crate::tables::PrefixSuffixDecomposition;
+use crate::traits::LookupTable;
+use crate::uninterleave_bits;
+
+/// (divisor, quotient)
+#[derive(Copy, Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
+pub struct ValidDiv0Table<const XLEN: usize>;
+
+impl<const XLEN: usize> LookupTable<XLEN> for ValidDiv0Table<XLEN> {
+    fn materialize_entry(&self, index: u128) -> u64 {
+        let (divisor, quotient) = uninterleave_bits(index);
+        if divisor == 0 {
+            match XLEN {
+                #[cfg(test)]
+                8 => (quotient == u8::MAX as u64).into(),
+                32 => (quotient == u32::MAX as u64).into(),
+                64 => (quotient == u64::MAX).into(),
+                _ => panic!("{XLEN}-bit word size is unsupported"),
+            }
+        } else {
+            1
+        }
+    }
+
+    fn evaluate_mle<F, C>(&self, r: &[C]) -> F
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        let mut divisor_is_zero = F::one();
+        let mut is_valid_div_by_zero = F::one();
+
+        for i in 0..XLEN {
+            let x_i = r[2 * i];
+            let y_i = r[2 * i + 1];
+            divisor_is_zero *= F::one() - x_i;
+            is_valid_div_by_zero *= (F::one() - x_i) * y_i;
+        }
+
+        F::one() - divisor_is_zero + is_valid_div_by_zero
+    }
+}
+
+impl<const XLEN: usize> PrefixSuffixDecomposition<XLEN> for ValidDiv0Table<XLEN> {
+    fn suffixes(&self) -> Vec<Suffixes> {
+        vec![
+            Suffixes::One,
+            Suffixes::LeftOperandIsZero,
+            Suffixes::DivByZero,
+        ]
+    }
+
+    fn combine<F: Field>(&self, prefixes: &[PrefixEval<F>], suffixes: &[SuffixEval<F>]) -> F {
+        debug_assert_eq!(self.suffixes().len(), suffixes.len());
+        let [one, left_operand_is_zero, div_by_zero] = suffixes.try_into().unwrap();
+        one - prefixes[Prefixes::LeftOperandIsZero] * left_operand_is_zero
+            + prefixes[Prefixes::DivByZero] * div_by_zero
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/valid_signed_remainder.rs b/crates/jolt-instructions/src/tables/valid_signed_remainder.rs
new file mode 100644
index 000000000..a22a15bda
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/valid_signed_remainder.rs
@@ -0,0 +1,122 @@
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::tables::prefixes::{PrefixEval, Prefixes};
+use crate::tables::suffixes::{SuffixEval, Suffixes};
+use crate::tables::PrefixSuffixDecomposition;
+use crate::traits::LookupTable;
+use crate::uninterleave_bits;
+
+/// (remainder, divisor)
+#[derive(Copy, Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
+pub struct ValidSignedRemainderTable<const XLEN: usize>;
+
+impl<const XLEN: usize> LookupTable<XLEN> for ValidSignedRemainderTable<XLEN> {
+    fn materialize_entry(&self, index: u128) -> u64 {
+        let (x, y) = uninterleave_bits(index);
+        match XLEN {
+            #[cfg(test)]
+            8 => {
+                let (remainder, divisor) = (x as u8 as i8, y as u8 as i8);
+                if remainder == 0 || divisor == 0 {
+                    1
+                } else {
+                    let remainder_sign = remainder >> (XLEN - 1);
+                    let divisor_sign = divisor >> (XLEN - 1);
+                    (remainder.unsigned_abs() < divisor.unsigned_abs()
+                        && remainder_sign == divisor_sign)
+                        .into()
+                }
+            }
+            32 => {
+                let (remainder, divisor) = (x as i32, y as i32);
+                if remainder == 0 || divisor == 0 {
+                    1
+                } else {
+                    let remainder_sign = remainder >> (XLEN - 1);
+                    let divisor_sign = divisor >> (XLEN - 1);
+                    (remainder.unsigned_abs() < divisor.unsigned_abs()
+                        && remainder_sign == divisor_sign)
+                        .into()
+                }
+            }
+            64 => {
+                let (remainder, divisor) = (x as i64, y as i64);
+                if remainder == 0 || divisor == 0 {
+                    1
+                } else {
+                    let remainder_sign = remainder >> (XLEN - 1);
+                    let divisor_sign = divisor >> (XLEN - 1);
+                    (remainder.unsigned_abs() < divisor.unsigned_abs()
+                        && remainder_sign == divisor_sign)
+                        .into()
+                }
+            }
+            _ => panic!("{XLEN}-bit word size is unsupported"),
+        }
+    }
+
+    fn evaluate_mle<F, C>(&self, r: &[C]) -> F
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        let x_sign = r[0];
+        let y_sign = r[1];
+
+        let mut remainder_is_zero = F::one() - r[0];
+        let mut divisor_is_zero = F::one() - r[1];
+        let mut positive_remainder_equals_divisor = (F::one() - x_sign) * (F::one() - y_sign);
+        let mut positive_remainder_less_than_divisor = (F::one() - x_sign) * (F::one() - y_sign);
+        let mut negative_divisor_equals_remainder = x_sign * y_sign;
+        let mut negative_divisor_greater_than_remainder = x_sign * y_sign;
+
+        for i in 1..XLEN {
+            let x_i = r[2 * i];
+            let y_i = r[2 * i + 1];
+            if i == 1 {
+                positive_remainder_less_than_divisor *= (F::one() - x_i) * y_i;
+                negative_divisor_greater_than_remainder *= x_i * (F::one() - y_i);
+            } else {
+                positive_remainder_less_than_divisor +=
+                    positive_remainder_equals_divisor * (F::one() - x_i) * y_i;
+                negative_divisor_greater_than_remainder +=
+                    negative_divisor_equals_remainder * x_i * (F::one() - y_i);
+            }
+            positive_remainder_equals_divisor *= x_i * y_i + (F::one() - x_i) * (F::one() - y_i);
+            negative_divisor_equals_remainder *= x_i * y_i + (F::one() - x_i) * (F::one() - y_i);
+            remainder_is_zero *= F::one() - x_i;
+            divisor_is_zero *= F::one() - y_i;
+        }
+
+        positive_remainder_less_than_divisor
+            + negative_divisor_greater_than_remainder
+            + y_sign * remainder_is_zero
+            + divisor_is_zero
+    }
+}
+
+impl<const XLEN: usize> PrefixSuffixDecomposition<XLEN> for ValidSignedRemainderTable<XLEN> {
+    fn suffixes(&self) -> Vec<Suffixes> {
+        vec![
+            Suffixes::One,
+            Suffixes::LessThan,
+            Suffixes::GreaterThan,
+            Suffixes::LeftOperandIsZero,
+            Suffixes::RightOperandIsZero,
+        ]
+    }
+
+    fn combine<F: Field>(&self, prefixes: &[PrefixEval<F>], suffixes: &[SuffixEval<F>]) -> F {
+        debug_assert_eq!(self.suffixes().len(), suffixes.len());
+        let [one, less_than, greater_than, left_operand_is_zero, right_operand_is_zero] =
+            suffixes.try_into().unwrap();
+        prefixes[Prefixes::RightOperandIsZero] * right_operand_is_zero
+            + prefixes[Prefixes::PositiveRemainderEqualsDivisor] * less_than
+            + prefixes[Prefixes::PositiveRemainderLessThanDivisor] * one
+            + prefixes[Prefixes::NegativeDivisorZeroRemainder] * left_operand_is_zero
+            + prefixes[Prefixes::NegativeDivisorEqualsRemainder] * greater_than
+            + prefixes[Prefixes::NegativeDivisorGreaterThanRemainder] * one
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/valid_unsigned_remainder.rs b/crates/jolt-instructions/src/tables/valid_unsigned_remainder.rs
new file mode 100644
index 000000000..56ce3b0fd
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/valid_unsigned_remainder.rs
@@ -0,0 +1,57 @@
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::tables::prefixes::{PrefixEval, Prefixes};
+use crate::tables::suffixes::{SuffixEval, Suffixes};
+use crate::tables::PrefixSuffixDecomposition;
+use crate::traits::LookupTable;
+use crate::uninterleave_bits;
+
+#[derive(Copy, Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
+pub struct ValidUnsignedRemainderTable<const XLEN: usize>;
+
+impl<const XLEN: usize> LookupTable<XLEN> for ValidUnsignedRemainderTable<XLEN> {
+    fn materialize_entry(&self, index: u128) -> u64 {
+        let (remainder, divisor) = uninterleave_bits(index);
+        (divisor == 0 || remainder < divisor).into()
+    }
+
+    fn evaluate_mle<F, C>(&self, r: &[C]) -> F
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        let mut divisor_is_zero = F::one();
+        let mut lt = F::zero();
+        let mut eq = F::one();
+
+        for i in 0..XLEN {
+            let x_i = r[2 * i];
+            let y_i = r[2 * i + 1];
+            divisor_is_zero *= F::one() - y_i;
+            lt += (F::one() - x_i) * y_i * eq;
+            eq *= x_i * y_i + (F::one() - x_i) * (F::one() - y_i);
+        }
+
+        lt + divisor_is_zero
+    }
+}
+
+impl<const XLEN: usize> PrefixSuffixDecomposition<XLEN> for ValidUnsignedRemainderTable<XLEN> {
+    fn suffixes(&self) -> Vec<Suffixes> {
+        vec![
+            Suffixes::One,
+            Suffixes::LessThan,
+            Suffixes::RightOperandIsZero,
+        ]
+    }
+
+    fn combine<F: Field>(&self, prefixes: &[PrefixEval<F>], suffixes: &[SuffixEval<F>]) -> F {
+        debug_assert_eq!(self.suffixes().len(), suffixes.len());
+        let [one, less_than, right_operand_is_zero] = suffixes.try_into().unwrap();
+        prefixes[Prefixes::RightOperandIsZero] * right_operand_is_zero
+            + prefixes[Prefixes::LessThan] * one
+            + prefixes[Prefixes::Eq] * less_than
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/virtual_change_divisor.rs b/crates/jolt-instructions/src/tables/virtual_change_divisor.rs
new file mode 100644
index 000000000..429bc2adb
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/virtual_change_divisor.rs
@@ -0,0 +1,98 @@
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::tables::prefixes::{PrefixEval, Prefixes};
+use crate::tables::suffixes::{SuffixEval, Suffixes};
+use crate::tables::PrefixSuffixDecomposition;
+use crate::traits::LookupTable;
+use crate::uninterleave_bits;
+
+#[derive(Copy, Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
+pub struct VirtualChangeDivisorTable<const XLEN: usize>;
+
+impl<const XLEN: usize> LookupTable<XLEN> for VirtualChangeDivisorTable<XLEN> {
+    fn materialize_entry(&self, index: u128) -> u64 {
+        let (dividend, divisor) = uninterleave_bits(index);
+
+        match XLEN {
+            #[cfg(test)]
+            8 => {
+                let dividend = dividend as i8;
+                let divisor = divisor as i8;
+                if dividend == i8::MIN && divisor == -1 {
+                    1
+                } else {
+                    divisor as u8 as u64
+                }
+            }
+            32 => {
+                let dividend = dividend as i32;
+                let divisor = divisor as i32;
+                if dividend == i32::MIN && divisor == -1 {
+                    1
+                } else {
+                    divisor as u32 as u64
+                }
+            }
+            64 => {
+                let dividend = dividend as i64;
+                let divisor = divisor as i64;
+                if dividend == i64::MIN && divisor == -1 {
+                    1
+                } else {
+                    divisor as u64
+                }
+            }
+            _ => panic!("{XLEN}-bit word size is unsupported"),
+        }
+    }
+
+    fn evaluate_mle<F, C>(&self, r: &[C]) -> F
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        debug_assert_eq!(r.len(), 2 * XLEN);
+
+        let mut divisor_value = F::zero();
+        for i in 0..XLEN {
+            let bit_value = r[2 * i + 1];
+            let shift = XLEN - 1 - i;
+            divisor_value += F::from_u128(1u128 << shift) * bit_value;
+        }
+
+        let mut x_product = r[0].into();
+        for i in 1..XLEN {
+            x_product *= F::one() - r[2 * i];
+        }
+
+        let mut y_product = F::one();
+        for i in 0..XLEN {
+            y_product = y_product * r[2 * i + 1];
+        }
+
+        let adjustment = F::from_u64(2) - F::from_u128(1u128 << XLEN);
+
+        divisor_value + x_product * y_product * adjustment
+    }
+}
+
+impl<const XLEN: usize> PrefixSuffixDecomposition<XLEN> for VirtualChangeDivisorTable<XLEN> {
+    fn suffixes(&self) -> Vec<Suffixes> {
+        vec![
+            Suffixes::One,
+            Suffixes::RightOperand,
+            Suffixes::ChangeDivisor,
+        ]
+    }
+
+    fn combine<F: Field>(&self, prefixes: &[PrefixEval<F>], suffixes: &[SuffixEval<F>]) -> F {
+        debug_assert_eq!(self.suffixes().len(), suffixes.len());
+        let [one, right_operand, change_divisor] = suffixes.try_into().unwrap();
+
+        prefixes[Prefixes::RightOperand] * one
+            + right_operand
+            + prefixes[Prefixes::ChangeDivisor] * change_divisor
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/virtual_change_divisor_w.rs b/crates/jolt-instructions/src/tables/virtual_change_divisor_w.rs
new file mode 100644
index 000000000..655f0cf1b
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/virtual_change_divisor_w.rs
@@ -0,0 +1,92 @@
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::tables::prefixes::{PrefixEval, Prefixes};
+use crate::tables::suffixes::{SuffixEval, Suffixes};
+use crate::tables::PrefixSuffixDecomposition;
+use crate::traits::LookupTable;
+use crate::uninterleave_bits;
+
+#[derive(Copy, Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
+pub struct VirtualChangeDivisorWTable<const XLEN: usize>;
+
+impl<const XLEN: usize> LookupTable<XLEN> for VirtualChangeDivisorWTable<XLEN> {
+    fn materialize_entry(&self, index: u128) -> u64 {
+        let (dividend, divisor) = uninterleave_bits(index);
+        match XLEN {
+            #[cfg(test)]
+            8 => {
+                let dividend = ((dividend & 0xF) as i8) << 4 >> 4;
+                let divisor = ((divisor & 0xF) as i8) << 4 >> 4;
+                if dividend == -8 && divisor == -1 {
+                    1
+                } else {
+                    divisor as u8 as u64
+                }
+            }
+            64 => {
+                let dividend = dividend as u32 as i32;
+                let divisor = divisor as u32 as i32;
+                if dividend == i32::MIN && divisor == -1 {
+                    1
+                } else {
+                    divisor as i64 as u64
+                }
+            }
+            _ => panic!("Unsupported {XLEN} word size"),
+        }
+    }
+
+    fn evaluate_mle<F, C>(&self, r: &[C]) -> F
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        debug_assert_eq!(r.len(), 2 * XLEN);
+
+        let sign_bit = r[XLEN + 1];
+
+        let mut divisor_value = F::zero();
+        for i in XLEN / 2..XLEN {
+            let bit_value = r[2 * i + 1];
+            let shift = XLEN - 1 - i;
+            divisor_value += F::from_u64(1u64 << shift) * bit_value;
+        }
+
+        let mut x_product = r[XLEN].into();
+        for i in XLEN / 2 + 1..XLEN {
+            x_product *= F::one() - r[2 * i];
+        }
+
+        let mut y_product = F::one();
+        for i in XLEN / 2..XLEN {
+            y_product = y_product * r[2 * i + 1];
+        }
+
+        let sign_extension = F::from_u128((1u128 << XLEN) - (1u128 << (XLEN / 2))) * sign_bit;
+        let adjustment = F::from_u64(2) - F::from_u128(1u128 << XLEN);
+
+        divisor_value + adjustment * x_product * y_product + sign_extension
+    }
+}
+
+impl<const XLEN: usize> PrefixSuffixDecomposition<XLEN> for VirtualChangeDivisorWTable<XLEN> {
+    fn suffixes(&self) -> Vec<Suffixes> {
+        vec![
+            Suffixes::One,
+            Suffixes::RightOperandW,
+            Suffixes::ChangeDivisorW,
+            Suffixes::SignExtensionRightOperand,
+        ]
+    }
+
+    fn combine<F: Field>(&self, prefixes: &[PrefixEval<F>], suffixes: &[SuffixEval<F>]) -> F {
+        debug_assert_eq!(self.suffixes().len(), suffixes.len());
+        let [one, right_operand_w, change_divisor_w, sign_extension] = suffixes.try_into().unwrap();
+        prefixes[Prefixes::RightOperandW] * one
+            + right_operand_w
+            + prefixes[Prefixes::ChangeDivisorW] * change_divisor_w
+            + prefixes[Prefixes::SignExtensionRightOperand] * sign_extension
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/virtual_rev8w.rs b/crates/jolt-instructions/src/tables/virtual_rev8w.rs
new file mode 100644
index 000000000..07dce264d
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/virtual_rev8w.rs
@@ -0,0 +1,69 @@
+use std::array;
+use std::iter;
+
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::tables::prefixes::{PrefixEval, Prefixes};
+use crate::tables::suffixes::{SuffixEval, Suffixes};
+use crate::tables::PrefixSuffixDecomposition;
+use crate::traits::LookupTable;
+
+#[inline]
+pub(crate) fn rev8w(v: u64) -> u64 {
+    let lo = (v as u32).swap_bytes();
+    let hi = ((v >> 32) as u32).swap_bytes();
+    lo as u64 + ((hi as u64) << 32)
+}
+
+#[derive(Copy, Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
+pub struct VirtualRev8WTable<const XLEN: usize>;
+
+impl<const XLEN: usize> LookupTable<XLEN> for VirtualRev8WTable<XLEN> {
+    fn materialize_entry(&self, index: u128) -> u64 {
+        rev8w(index as u64)
+    }
+
+    fn evaluate_mle<F, C>(&self, r: &[C]) -> F
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        let mut bits = r.iter().rev();
+        let mut bytes = iter::from_fn(|| {
+            let bit_chunk = (&mut bits).take(8).enumerate();
+            Some(
+                bit_chunk
+                    .map(|(i, b)| Into::<F>::into(*b).mul_u64(1 << i))
+                    .sum::<F>(),
+            )
+        });
+
+        let [a, b, c, d, e, f, g, h] = array::from_fn(|_| bytes.next().unwrap());
+        [d, c, b, a, h, g, f, e]
+            .iter()
+            .enumerate()
+            .map(|(i, b)| b.mul_u64(1 << (i * 8)))
+            .sum()
+    }
+}
+
+impl<const XLEN: usize> PrefixSuffixDecomposition<XLEN> for VirtualRev8WTable<XLEN> {
+    fn suffixes(&self) -> Vec<Suffixes> {
+        vec![Suffixes::One, Suffixes::Rev8W]
+    }
+
+    fn combine<F: Field>(&self, prefixes: &[PrefixEval<F>], suffixes: &[SuffixEval<F>]) -> F {
+        let [one, rev8w] = suffixes.try_into().unwrap();
+        prefixes[Prefixes::Rev8W] * one + rev8w
+    }
+
+    // Rev8WPrefix returns zero when suffix >= 64 bits, and Rev8WSuffix only
+    // handles the lower 32 bits. The decomposition is therefore only valid for
+    // lookup indices whose value fits in 32 bits (upper word = 0).
+    #[cfg(test)]
+    fn random_lookup_index(rng: &mut rand::rngs::StdRng) -> u128 {
+        rand::RngCore::next_u32(rng) as u128
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/virtual_rotr.rs b/crates/jolt-instructions/src/tables/virtual_rotr.rs
new file mode 100644
index 000000000..793b66384
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/virtual_rotr.rs
@@ -0,0 +1,86 @@
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::tables::prefixes::{PrefixEval, Prefixes};
+use crate::tables::suffixes::{SuffixEval, Suffixes};
+use crate::tables::PrefixSuffixDecomposition;
+use crate::traits::LookupTable;
+use crate::uninterleave_bits;
+
+#[derive(Copy, Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
+pub struct VirtualRotrTable<const XLEN: usize>;
+
+impl<const XLEN: usize> LookupTable<XLEN> for VirtualRotrTable<XLEN> {
+    fn materialize_entry(&self, index: u128) -> u64 {
+        let (x_bits, y_bits) = uninterleave_bits(index);
+
+        let mut prod_one_plus_y: u128 = 1;
+        let mut first_sum = 0;
+        let mut second_sum = 0;
+
+        (0..XLEN).rev().for_each(|i| {
+            let x = x_bits >> i & 1;
+            let y = y_bits >> i & 1;
+            first_sum *= 1 + y;
+            first_sum += x * y;
+            second_sum += x * ((1 - y as u128) * prod_one_plus_y) as u64 * (1 << i);
+            prod_one_plus_y *= 1 + y as u128;
+        });
+
+        first_sum + second_sum
+    }
+
+    fn evaluate_mle<F, C>(&self, r: &[C]) -> F
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        assert_eq!(r.len() % 2, 0, "r must have even length");
+        assert_eq!(r.len() / 2, XLEN, "r must have length 2 * XLEN");
+
+        let mut prod_one_plus_y = F::one();
+        let mut first_sum = F::zero();
+        let mut second_sum = F::zero();
+
+        for (i, chunk) in r.chunks_exact(2).enumerate() {
+            let r_x = chunk[0];
+            let r_y = chunk[1];
+
+            first_sum *= F::one() + r_y;
+            first_sum += r_x * r_y;
+
+            second_sum +=
+                r_x * (F::one() - r_y) * prod_one_plus_y * F::from_u64(1 << (XLEN - 1 - i));
+
+            prod_one_plus_y *= F::one() + r_y;
+        }
+
+        first_sum + second_sum
+    }
+}
+
+impl<const XLEN: usize> PrefixSuffixDecomposition<XLEN> for VirtualRotrTable<XLEN> {
+    fn suffixes(&self) -> Vec<Suffixes> {
+        vec![
+            Suffixes::RightShiftHelper,
+            Suffixes::RightShift,
+            Suffixes::LeftShift,
+            Suffixes::One,
+        ]
+    }
+
+    fn combine<F: Field>(&self, prefixes: &[PrefixEval<F>], suffixes: &[SuffixEval<F>]) -> F {
+        debug_assert_eq!(self.suffixes().len(), suffixes.len());
+        let [right_shift_helper, right_shift, left_shift, one] = suffixes.try_into().unwrap();
+        prefixes[Prefixes::RightShift] * right_shift_helper
+            + right_shift
+            + prefixes[Prefixes::LeftShiftHelper] * left_shift
+            + prefixes[Prefixes::LeftShift] * one
+    }
+
+    #[cfg(test)]
+    fn random_lookup_index(rng: &mut rand::rngs::StdRng) -> u128 {
+        crate::tables::test_utils::gen_bitmask_lookup_index::<XLEN>(rng)
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/virtual_rotrw.rs b/crates/jolt-instructions/src/tables/virtual_rotrw.rs
new file mode 100644
index 000000000..53fd1fce7
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/virtual_rotrw.rs
@@ -0,0 +1,86 @@
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::tables::prefixes::{PrefixEval, Prefixes};
+use crate::tables::suffixes::{SuffixEval, Suffixes};
+use crate::tables::PrefixSuffixDecomposition;
+use crate::traits::LookupTable;
+use crate::uninterleave_bits;
+
+#[derive(Copy, Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
+pub struct VirtualRotrWTable<const XLEN: usize>;
+
+impl<const XLEN: usize> LookupTable<XLEN> for VirtualRotrWTable<XLEN> {
+    fn materialize_entry(&self, index: u128) -> u64 {
+        let (x_bits, y_bits) = uninterleave_bits(index);
+
+        let mut prod_one_plus_y = 1;
+        let mut first_sum = 0;
+        let mut second_sum = 0;
+
+        (0..XLEN).rev().skip(XLEN / 2).for_each(|i| {
+            let x = x_bits >> i & 1;
+            let y = y_bits >> i & 1;
+            first_sum *= 1 + y;
+            first_sum += x * y;
+            second_sum += x * (1 - y) * prod_one_plus_y * (1 << i);
+            prod_one_plus_y *= 1 + y;
+        });
+
+        first_sum + second_sum
+    }
+
+    fn evaluate_mle<F, C>(&self, r: &[C]) -> F
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        assert_eq!(r.len() % 2, 0, "r must have even length");
+        assert_eq!(r.len() / 2, XLEN, "r must have length 2 * XLEN");
+
+        let mut prod_one_plus_y = F::one();
+        let mut first_sum = F::zero();
+        let mut second_sum = F::zero();
+
+        for (i, chunk) in r.chunks_exact(2).enumerate().skip(XLEN / 2) {
+            let r_x = chunk[0];
+            let r_y = chunk[1];
+
+            first_sum *= F::one() + r_y;
+            first_sum += r_x * r_y;
+
+            second_sum +=
+                r_x * (F::one() - r_y) * prod_one_plus_y * F::from_u64(1 << (XLEN - 1 - i));
+
+            prod_one_plus_y *= F::one() + r_y;
+        }
+
+        first_sum + second_sum
+    }
+}
+
+impl<const XLEN: usize> PrefixSuffixDecomposition<XLEN> for VirtualRotrWTable<XLEN> {
+    fn suffixes(&self) -> Vec<Suffixes> {
+        vec![
+            Suffixes::RightShiftWHelper,
+            Suffixes::RightShiftW,
+            Suffixes::LeftShiftW,
+            Suffixes::One,
+        ]
+    }
+
+    fn combine<F: Field>(&self, prefixes: &[PrefixEval<F>], suffixes: &[SuffixEval<F>]) -> F {
+        debug_assert_eq!(self.suffixes().len(), suffixes.len());
+        let [right_shift_w_helper, right_shift_w, left_shift_w, one] = suffixes.try_into().unwrap();
+        prefixes[Prefixes::RightShiftW] * right_shift_w_helper
+            + right_shift_w
+            + prefixes[Prefixes::LeftShiftWHelper] * left_shift_w
+            + prefixes[Prefixes::LeftShiftW] * one
+    }
+
+    #[cfg(test)]
+    fn random_lookup_index(rng: &mut rand::rngs::StdRng) -> u128 {
+        crate::tables::test_utils::gen_bitmask_lookup_index::<XLEN>(rng)
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/virtual_sra.rs b/crates/jolt-instructions/src/tables/virtual_sra.rs
new file mode 100644
index 000000000..711ae4851
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/virtual_sra.rs
@@ -0,0 +1,80 @@
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+use crate::tables::prefixes::{PrefixEval, Prefixes};
+use crate::tables::suffixes::{SuffixEval, Suffixes};
+use crate::tables::PrefixSuffixDecomposition;
+use crate::traits::LookupTable;
+use crate::uninterleave_bits;
+
+#[derive(Copy, Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
+pub struct VirtualSRATable<const XLEN: usize>;
+
+impl<const XLEN: usize> LookupTable<XLEN> for VirtualSRATable<XLEN> {
+    fn materialize_entry(&self, index: u128) -> u64 {
+        let (x, y) = uninterleave_bits(index);
+        let mut x = LookupBits::new(x as u128, XLEN);
+        let mut y = LookupBits::new(y as u128, XLEN);
+
+        let sign_bit = u64::from(x.leading_ones() != 0);
+        let mut entry = 0;
+        let mut sign_extension = 0;
+        for i in 0..XLEN {
+            let x_i = x.pop_msb() as u64;
+            let y_i = y.pop_msb() as u64;
+            entry *= 1 + y_i;
+            entry += x_i * y_i;
+            if i != 0 {
+                sign_extension += (1 << i) * (1 - y_i);
+            }
+        }
+        entry + sign_bit * sign_extension
+    }
+
+    fn evaluate_mle<F, C>(&self, r: &[C]) -> F
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        debug_assert_eq!(r.len(), 2 * XLEN);
+        let mut result = F::zero();
+        let mut sign_extension = F::zero();
+        for i in 0..XLEN {
+            let x_i = r[2 * i];
+            let y_i = r[2 * i + 1];
+            result *= F::one() + y_i;
+            result += x_i * y_i;
+            if i != 0 {
+                sign_extension += F::from_u64(1 << i) * (F::one() - y_i);
+            }
+        }
+        result + r[0] * sign_extension
+    }
+}
+
+impl<const XLEN: usize> PrefixSuffixDecomposition<XLEN> for VirtualSRATable<XLEN> {
+    fn suffixes(&self) -> Vec<Suffixes> {
+        vec![
+            Suffixes::One,
+            Suffixes::RightShift,
+            Suffixes::RightShiftHelper,
+            Suffixes::SignExtension,
+        ]
+    }
+
+    fn combine<F: Field>(&self, prefixes: &[PrefixEval<F>], suffixes: &[SuffixEval<F>]) -> F {
+        debug_assert_eq!(self.suffixes().len(), suffixes.len());
+        let [one, right_shift, right_shift_helper, sign_extension] = suffixes.try_into().unwrap();
+        prefixes[Prefixes::RightShift] * right_shift_helper
+            + right_shift
+            + prefixes[Prefixes::LeftOperandMsb] * sign_extension
+            + prefixes[Prefixes::SignExtension] * one
+    }
+
+    #[cfg(test)]
+    fn random_lookup_index(rng: &mut rand::rngs::StdRng) -> u128 {
+        crate::tables::test_utils::gen_bitmask_lookup_index::<XLEN>(rng)
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/virtual_srl.rs b/crates/jolt-instructions/src/tables/virtual_srl.rs
new file mode 100644
index 000000000..21b72ce3d
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/virtual_srl.rs
@@ -0,0 +1,63 @@
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::lookup_bits::LookupBits;
+use crate::tables::prefixes::{PrefixEval, Prefixes};
+use crate::tables::suffixes::{SuffixEval, Suffixes};
+use crate::tables::PrefixSuffixDecomposition;
+use crate::traits::LookupTable;
+use crate::uninterleave_bits;
+
+#[derive(Copy, Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
+pub struct VirtualSRLTable<const XLEN: usize>;
+
+impl<const XLEN: usize> LookupTable<XLEN> for VirtualSRLTable<XLEN> {
+    fn materialize_entry(&self, index: u128) -> u64 {
+        let (x, y) = uninterleave_bits(index);
+        let mut x = LookupBits::new(x as u128, XLEN);
+        let mut y = LookupBits::new(y as u128, XLEN);
+
+        let mut entry = 0;
+        for _ in 0..XLEN {
+            let x_i = x.pop_msb();
+            let y_i = y.pop_msb();
+            entry *= 1 + y_i as u64;
+            entry += (x_i * y_i) as u64;
+        }
+        entry
+    }
+
+    fn evaluate_mle<F, C>(&self, r: &[C]) -> F
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        debug_assert_eq!(r.len(), 2 * XLEN);
+        let mut result = F::zero();
+        for i in 0..XLEN {
+            let x_i = r[2 * i];
+            let y_i = r[2 * i + 1];
+            result *= F::one() + y_i;
+            result += x_i * y_i;
+        }
+        result
+    }
+}
+
+impl<const XLEN: usize> PrefixSuffixDecomposition<XLEN> for VirtualSRLTable<XLEN> {
+    fn suffixes(&self) -> Vec<Suffixes> {
+        vec![Suffixes::RightShift, Suffixes::RightShiftHelper]
+    }
+
+    fn combine<F: Field>(&self, prefixes: &[PrefixEval<F>], suffixes: &[SuffixEval<F>]) -> F {
+        debug_assert_eq!(self.suffixes().len(), suffixes.len());
+        let [right_shift, right_shift_helper] = suffixes.try_into().unwrap();
+        prefixes[Prefixes::RightShift] * right_shift_helper + right_shift
+    }
+
+    #[cfg(test)]
+    fn random_lookup_index(rng: &mut rand::rngs::StdRng) -> u128 {
+        crate::tables::test_utils::gen_bitmask_lookup_index::<XLEN>(rng)
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/virtual_xor_rot.rs b/crates/jolt-instructions/src/tables/virtual_xor_rot.rs
new file mode 100644
index 000000000..5906d66ca
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/virtual_xor_rot.rs
@@ -0,0 +1,79 @@
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::tables::prefixes::{PrefixEval, Prefixes};
+use crate::tables::suffixes::{SuffixEval, Suffixes};
+use crate::tables::PrefixSuffixDecomposition;
+use crate::traits::LookupTable;
+use crate::uninterleave_bits;
+
+#[derive(Copy, Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
+pub struct VirtualXORROTTable<const XLEN: usize, const ROTATION: u32>;
+
+impl<const XLEN: usize, const ROTATION: u32> LookupTable<XLEN>
+    for VirtualXORROTTable<XLEN, ROTATION>
+{
+    fn materialize_entry(&self, index: u128) -> u64 {
+        match XLEN {
+            #[cfg(test)]
+            8 => {
+                let (x, y) = uninterleave_bits(index);
+                let xor_result = x as u8 ^ y as u8;
+                xor_result.rotate_right(ROTATION) as u64
+            }
+            64 => {
+                let (x, y) = uninterleave_bits(index);
+                let xor_result = x ^ y;
+                xor_result.rotate_right(ROTATION)
+            }
+            _ => panic!("{XLEN}-bit word size is unsupported"),
+        }
+    }
+
+    fn evaluate_mle<F, C>(&self, r: &[C]) -> F
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        debug_assert_eq!(r.len(), 2 * XLEN);
+        let mut result = F::zero();
+        for i in 0..XLEN {
+            let x_i = r[2 * i];
+            let y_i = r[2 * i + 1];
+            let rotated_position = (i + ROTATION as usize) % XLEN;
+            let bit_position = XLEN - 1 - rotated_position;
+            result += F::from_u64(1u64 << bit_position)
+                * ((F::one() - x_i) * y_i + x_i * (F::one() - y_i));
+        }
+        result
+    }
+}
+
+impl<const XLEN: usize, const ROTATION: u32> PrefixSuffixDecomposition<XLEN>
+    for VirtualXORROTTable<XLEN, ROTATION>
+{
+    fn suffixes(&self) -> Vec<Suffixes> {
+        debug_assert_eq!(XLEN, 64);
+        match ROTATION {
+            16 => vec![Suffixes::One, Suffixes::XorRot16],
+            24 => vec![Suffixes::One, Suffixes::XorRot24],
+            32 => vec![Suffixes::One, Suffixes::XorRot32],
+            63 => vec![Suffixes::One, Suffixes::XorRot63],
+            _ => unreachable!("unsupported rotation {ROTATION}"),
+        }
+    }
+
+    fn combine<F: Field>(&self, prefixes: &[PrefixEval<F>], suffixes: &[SuffixEval<F>]) -> F {
+        debug_assert_eq!(XLEN, 64);
+        debug_assert_eq!(self.suffixes().len(), suffixes.len());
+        let [one, xor_rot] = suffixes.try_into().unwrap();
+        match ROTATION {
+            16 => prefixes[Prefixes::XorRot16] * one + xor_rot,
+            24 => prefixes[Prefixes::XorRot24] * one + xor_rot,
+            32 => prefixes[Prefixes::XorRot32] * one + xor_rot,
+            63 => prefixes[Prefixes::XorRot63] * one + xor_rot,
+            _ => unreachable!("unsupported rotation {ROTATION}"),
+        }
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/virtual_xor_rotw.rs b/crates/jolt-instructions/src/tables/virtual_xor_rotw.rs
new file mode 100644
index 000000000..49ca1040e
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/virtual_xor_rotw.rs
@@ -0,0 +1,87 @@
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::tables::prefixes::{PrefixEval, Prefixes};
+use crate::tables::suffixes::{SuffixEval, Suffixes};
+use crate::tables::PrefixSuffixDecomposition;
+use crate::traits::LookupTable;
+use crate::uninterleave_bits;
+
+#[derive(Copy, Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
+pub struct VirtualXORROTWTable<const XLEN: usize, const ROTATION: u32>;
+
+impl<const XLEN: usize, const ROTATION: u32> LookupTable<XLEN>
+    for VirtualXORROTWTable<XLEN, ROTATION>
+{
+    fn materialize_entry(&self, index: u128) -> u64 {
+        match XLEN {
+            #[cfg(test)]
+            8 => {
+                let rotation = ROTATION as usize % (XLEN / 2);
+                let (x_bits, y_bits) = uninterleave_bits(index);
+                let x_lower = x_bits as u8 & 0x0F;
+                let y_lower = y_bits as u8 & 0x0F;
+                let xor_result = x_lower ^ y_lower;
+                let rotated =
+                    ((xor_result >> rotation) | (xor_result << (XLEN / 2 - rotation))) & 0x0F;
+                rotated as u64
+            }
+            64 => {
+                let (x, y) = uninterleave_bits(index);
+                let x_32 = x as u32;
+                let y_32 = y as u32;
+                let xor_result = x_32 ^ y_32;
+                xor_result.rotate_right(ROTATION) as u64
+            }
+            _ => panic!("{XLEN}-bit word size is unsupported"),
+        }
+    }
+
+    fn evaluate_mle<F, C>(&self, r: &[C]) -> F
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        debug_assert_eq!(r.len(), 2 * XLEN);
+        let mut result = F::zero();
+        for (idx, chunk) in r.chunks_exact(2).enumerate().skip(XLEN / 2) {
+            let r_x = chunk[0];
+            let r_y = chunk[1];
+            let xor_bit = (F::one() - r_x) * r_y + r_x * (F::one() - r_y);
+            let position = idx - (XLEN / 2);
+            let mut rotated_position = (position + ROTATION as usize) % (XLEN / 2);
+            rotated_position = (XLEN / 2) - 1 - rotated_position;
+            result += F::from_u64(1u64 << rotated_position) * xor_bit;
+        }
+        result
+    }
+}
+
+impl<const XLEN: usize, const ROTATION: u32> PrefixSuffixDecomposition<XLEN>
+    for VirtualXORROTWTable<XLEN, ROTATION>
+{
+    fn suffixes(&self) -> Vec<Suffixes> {
+        debug_assert_eq!(XLEN, 64);
+        match ROTATION {
+            7 => vec![Suffixes::One, Suffixes::XorRotW7],
+            8 => vec![Suffixes::One, Suffixes::XorRotW8],
+            12 => vec![Suffixes::One, Suffixes::XorRotW12],
+            16 => vec![Suffixes::One, Suffixes::XorRotW16],
+            _ => unreachable!("unsupported rotation {ROTATION}"),
+        }
+    }
+
+    fn combine<F: Field>(&self, prefixes: &[PrefixEval<F>], suffixes: &[SuffixEval<F>]) -> F {
+        debug_assert_eq!(XLEN, 64);
+        debug_assert_eq!(self.suffixes().len(), suffixes.len());
+        let [one, xor_rot] = suffixes.try_into().unwrap();
+        match ROTATION {
+            7 => prefixes[Prefixes::XorRotW7] * one + xor_rot,
+            8 => prefixes[Prefixes::XorRotW8] * one + xor_rot,
+            12 => prefixes[Prefixes::XorRotW12] * one + xor_rot,
+            16 => prefixes[Prefixes::XorRotW16] * one + xor_rot,
+            _ => unreachable!("unsupported rotation {ROTATION}"),
+        }
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/word_alignment.rs b/crates/jolt-instructions/src/tables/word_alignment.rs
new file mode 100644
index 000000000..c1dbe14db
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/word_alignment.rs
@@ -0,0 +1,39 @@
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::tables::prefixes::{PrefixEval, Prefixes};
+use crate::tables::suffixes::{SuffixEval, Suffixes};
+use crate::tables::PrefixSuffixDecomposition;
+use crate::traits::LookupTable;
+
+#[derive(Copy, Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
+pub struct WordAlignmentTable<const XLEN: usize>;
+
+impl<const XLEN: usize> LookupTable<XLEN> for WordAlignmentTable<XLEN> {
+    fn materialize_entry(&self, index: u128) -> u64 {
+        (index.is_multiple_of(4)).into()
+    }
+
+    fn evaluate_mle<F, C>(&self, r: &[C]) -> F
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        let lsb0 = r[r.len() - 1];
+        let lsb1 = r[r.len() - 2];
+        (F::one() - lsb0) * (F::one() - lsb1)
+    }
+}
+
+impl<const XLEN: usize> PrefixSuffixDecomposition<XLEN> for WordAlignmentTable<XLEN> {
+    fn suffixes(&self) -> Vec<Suffixes> {
+        vec![Suffixes::TwoLsb]
+    }
+
+    fn combine<F: Field>(&self, prefixes: &[PrefixEval<F>], suffixes: &[SuffixEval<F>]) -> F {
+        debug_assert_eq!(self.suffixes().len(), suffixes.len());
+        let [two_lsb] = suffixes.try_into().unwrap();
+        prefixes[Prefixes::TwoLsb] * two_lsb
+    }
+}
diff --git a/crates/jolt-instructions/src/tables/xor.rs b/crates/jolt-instructions/src/tables/xor.rs
new file mode 100644
index 000000000..4dc64848b
--- /dev/null
+++ b/crates/jolt-instructions/src/tables/xor.rs
@@ -0,0 +1,46 @@
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::tables::prefixes::{PrefixEval, Prefixes};
+use crate::tables::suffixes::{SuffixEval, Suffixes};
+use crate::tables::PrefixSuffixDecomposition;
+use crate::traits::LookupTable;
+use crate::uninterleave_bits;
+
+#[derive(Copy, Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
+pub struct XorTable<const XLEN: usize>;
+
+impl<const XLEN: usize> LookupTable<XLEN> for XorTable<XLEN> {
+    fn materialize_entry(&self, index: u128) -> u64 {
+        let (x, y) = uninterleave_bits(index);
+        x ^ y
+    }
+
+    fn evaluate_mle<F, C>(&self, r: &[C]) -> F
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>,
+    {
+        debug_assert_eq!(r.len(), 2 * XLEN);
+        let mut result = F::zero();
+        for i in 0..XLEN {
+            let x_i = r[2 * i];
+            let y_i = r[2 * i + 1];
+            result += F::from_u64(1u64 << (XLEN - 1 - i))
+                * ((F::one() - x_i) * y_i + x_i * (F::one() - y_i));
+        }
+        result
+    }
+}
+
+impl<const XLEN: usize> PrefixSuffixDecomposition<XLEN> for XorTable<XLEN> {
+    fn suffixes(&self) -> Vec<Suffixes> {
+        vec![Suffixes::One, Suffixes::Xor]
+    }
+
+    fn combine<F: Field>(&self, prefixes: &[PrefixEval<F>], suffixes: &[SuffixEval<F>]) -> F {
+        let [one, xor] = suffixes.try_into().unwrap();
+        prefixes[Prefixes::Xor] * one + xor
+    }
+}
diff --git a/crates/jolt-instructions/src/traits.rs b/crates/jolt-instructions/src/traits.rs
new file mode 100644
index 000000000..435925c81
--- /dev/null
+++ b/crates/jolt-instructions/src/traits.rs
@@ -0,0 +1,88 @@
+//! Core traits for Jolt instruction definitions and lookup table decompositions.
+//!
+//! The [`Instruction`] trait defines the interface for all RISC-V instructions
+//! in the Jolt zkVM. Each instruction declares its static flags, lookup table
+//! association, and execution semantics.
+//!
+//! The [`LookupTable`] trait defines small-domain functions used by the prover,
+//! including multilinear extension evaluation for the sumcheck protocol.
+
+use jolt_field::Field;
+use std::fmt::Debug;
+
+use crate::challenge_ops::{ChallengeOps, FieldOps};
+use crate::flags::Flags;
+use crate::tables::LookupTableKind;
+
+/// A RISC-V instruction: a pure function from two 64-bit operands to a 64-bit result.
+///
+/// Implementations must be stateless and deterministic. The [`execute`](Instruction::execute)
+/// method provides ground-truth computation using native Rust arithmetic.
+/// [`lookup_table`](Instruction::lookup_table) declares which lookup table this
+/// instruction decomposes into for the proving system.
+///
+/// Every instruction also implements [`Flags`] to declare its static R1CS
+/// and witness-generation flag configuration. Dynamic flags (virtual sequence
+/// state, compression, rd!=0) are applied by the runtime based on trace context.
+pub trait Instruction: Flags + Send + Sync + 'static {
+    /// Unique opcode identifying this instruction within the [`JoltInstructionSet`](crate::JoltInstructionSet).
+    fn opcode(&self) -> u32;
+
+    /// Human-readable mnemonic (e.g., `"ADD"`, `"SRL"`).
+    fn name(&self) -> &'static str;
+
+    /// Execute the instruction on two 64-bit operands, returning a 64-bit result.
+    ///
+    /// For RV64I/M instructions this uses wrapping arithmetic matching the RISC-V
+    /// specification. For W-suffix instructions, the result is sign-extended
+    /// from 32 bits to 64 bits.
+    fn execute(&self, x: u64, y: u64) -> u64;
+
+    /// The lookup table this instruction decomposes into, if any.
+    ///
+    /// Returns `None` for instructions that don't use lookup tables (loads, stores,
+    /// system instructions). The prover uses this to route instruction evaluations
+    /// to the correct table during the instruction sumcheck.
+    fn lookup_table(&self) -> Option<LookupTableKind>;
+}
+
+/// A lookup table mapping interleaved operand indices to scalar values.
+///
+/// Tables are materialized once during preprocessing and their multilinear
+/// extensions are evaluated during the sumcheck protocol. The index space is
+/// `0..2^(2*XLEN)` where `XLEN` is the word size (8 for tests, 64 for production).
+///
+/// The `XLEN` const generic determines the word size. Challenge point `r` passed
+/// to [`evaluate_mle`](LookupTable::evaluate_mle) has length `2 * XLEN`.
+///
+/// The `evaluate_mle` method is generic over a challenge type `C` to support
+/// smaller-than-field-element challenge values (e.g., 128-bit challenges with
+/// a 254-bit field), which is a critical performance optimization for the
+/// sumcheck prover.
+pub trait LookupTable<const XLEN: usize>: Clone + Debug + Send + Sync {
+    /// Compute the raw table value at the given interleaved index.
+    ///
+    /// For tables with two operands, `index` contains interleaved bits of `(x, y)`.
+    /// Use [`uninterleave_bits`](crate::uninterleave_bits) to recover the operands.
+    fn materialize_entry(&self, index: u128) -> u64;
+
+    /// Evaluate the multilinear extension of this table at challenge point `r`.
+    ///
+    /// `r` has length `2 * XLEN`. For interleaved-operand tables, even indices
+    /// correspond to the first operand and odd indices to the second.
+    ///
+    /// `C` is the challenge type (may be smaller than `F` for performance).
+    /// When `C = F`, this degenerates to standard field evaluation.
+    fn evaluate_mle<F, C>(&self, r: &[C]) -> F
+    where
+        C: ChallengeOps<F>,
+        F: Field + FieldOps<C>;
+
+    /// Materialize the entire table as a dense vector (test-only, XLEN=8).
+    #[cfg(test)]
+    fn materialize(&self) -> Vec<u64> {
+        (0..1u128 << (2 * XLEN))
+            .map(|i| self.materialize_entry(i))
+            .collect()
+    }
+}
diff --git a/crates/jolt-instructions/src/virtual_/advice.rs b/crates/jolt-instructions/src/virtual_/advice.rs
new file mode 100644
index 000000000..dd741d66c
--- /dev/null
+++ b/crates/jolt-instructions/src/virtual_/advice.rs
@@ -0,0 +1,47 @@
+//! Virtual advice and I/O instructions.
+
+use crate::opcodes;
+
+define_instruction!(
+    /// Virtual ADVICE: non-deterministic advice value. The prover supplies the value.
+    VirtualAdvice, opcodes::VIRTUAL_ADVICE, "VIRTUAL_ADVICE",
+    |_x, _y| 0,
+    circuit: [Advice, WriteLookupOutputToRD],
+    table: RangeCheck,
+);
+
+define_instruction!(
+    /// Virtual ADVICE_LEN: returns the length of the advice data.
+    VirtualAdviceLen, opcodes::VIRTUAL_ADVICE_LEN, "VIRTUAL_ADVICE_LEN",
+    |_x, _y| 0,
+    circuit: [Advice, WriteLookupOutputToRD],
+    table: RangeCheck,
+);
+
+define_instruction!(
+    /// Virtual ADVICE_LOAD: loads a value from the advice tape.
+    VirtualAdviceLoad, opcodes::VIRTUAL_ADVICE_LOAD, "VIRTUAL_ADVICE_LOAD",
+    |_x, _y| 0,
+    circuit: [Advice, WriteLookupOutputToRD],
+    table: RangeCheck,
+);
+
+define_instruction!(
+    /// Virtual HOST_IO: host I/O operation. Returns 0.
+    VirtualHostIO, opcodes::VIRTUAL_HOST_IO, "VIRTUAL_HOST_IO",
+    |_x, _y| 0,
+);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::Instruction;
+
+    #[test]
+    fn advice_returns_zero() {
+        assert_eq!(VirtualAdvice.execute(0, 0), 0);
+        assert_eq!(VirtualAdviceLen.execute(0, 0), 0);
+        assert_eq!(VirtualAdviceLoad.execute(0, 0), 0);
+        assert_eq!(VirtualHostIO.execute(0, 0), 0);
+    }
+}
diff --git a/crates/jolt-instructions/src/virtual_/arithmetic.rs b/crates/jolt-instructions/src/virtual_/arithmetic.rs
new file mode 100644
index 000000000..fa096af03
--- /dev/null
+++ b/crates/jolt-instructions/src/virtual_/arithmetic.rs
@@ -0,0 +1,83 @@
+//! Virtual arithmetic instructions used internally by the Jolt VM.
+
+use crate::opcodes;
+
+define_instruction!(
+    /// Virtual POW2: computes `2^y` where exponent is from lower 6 bits of `y`.
+    Pow2, opcodes::POW2, "POW2",
+    |_x, y| 1u64 << (y & 63),
+    circuit: [AddOperands, WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsImm],
+    table: Pow2,
+);
+
+define_instruction!(
+    /// Virtual POW2I: computes `2^imm` with immediate exponent.
+    Pow2I, opcodes::VIRTUAL_POW2I, "POW2I",
+    |_x, y| 1u64 << (y & 63),
+    circuit: [AddOperands, WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsImm],
+    table: Pow2,
+);
+
+define_instruction!(
+    /// Virtual POW2W: computes `2^(y mod 32)` for 32-bit mode.
+    Pow2W, opcodes::VIRTUAL_POW2W, "POW2W",
+    |_x, y| 1u64 << (y & 31),
+    circuit: [AddOperands, WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsImm],
+    table: Pow2W,
+);
+
+define_instruction!(
+    /// Virtual POW2IW: computes `2^(imm mod 32)` for 32-bit immediate mode.
+    Pow2IW, opcodes::VIRTUAL_POW2IW, "POW2IW",
+    |_x, y| 1u64 << (y & 31),
+    circuit: [AddOperands, WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsImm],
+    table: Pow2W,
+);
+
+define_instruction!(
+    /// Virtual MULI: multiply by immediate. `rd = rs1 * imm`.
+    MulI, opcodes::VIRTUAL_MULI, "MULI",
+    |x, y| x.wrapping_mul(y),
+    circuit: [MultiplyOperands, WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsImm],
+    table: RangeCheck,
+);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::Instruction;
+
+    #[test]
+    fn pow2_basic() {
+        assert_eq!(Pow2.execute(0, 0), 1);
+        assert_eq!(Pow2.execute(0, 10), 1024);
+        assert_eq!(Pow2.execute(0, 63), 1 << 63);
+    }
+
+    #[test]
+    fn pow2_masks_exponent() {
+        assert_eq!(Pow2.execute(0, 64), 1);
+    }
+
+    #[test]
+    fn pow2w_basic() {
+        assert_eq!(Pow2W.execute(0, 0), 1);
+        assert_eq!(Pow2W.execute(0, 31), 1 << 31);
+    }
+
+    #[test]
+    fn pow2w_masks_to_32() {
+        assert_eq!(Pow2W.execute(0, 32), 1);
+    }
+
+    #[test]
+    fn muli_basic() {
+        assert_eq!(MulI.execute(6, 7), 42);
+        assert_eq!(MulI.execute(u64::MAX, 2), u64::MAX - 1);
+    }
+}
diff --git a/crates/jolt-instructions/src/virtual_/assert.rs b/crates/jolt-instructions/src/virtual_/assert.rs
new file mode 100644
index 000000000..f2809ff29
--- /dev/null
+++ b/crates/jolt-instructions/src/virtual_/assert.rs
@@ -0,0 +1,123 @@
+//! Virtual assertion instructions used by the Jolt VM for constraint checking.
+
+use crate::opcodes;
+
+define_instruction!(
+    /// Virtual ASSERT_EQ: returns 1 if operands are equal, 0 otherwise.
+    AssertEq, opcodes::ASSERT_EQ, "ASSERT_EQ",
+    |x, y| u64::from(x == y),
+    circuit: [Assert],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value],
+    table: Equal,
+);
+
+define_instruction!(
+    /// Virtual ASSERT_LTE: returns 1 if `x <= y` (unsigned), 0 otherwise.
+    AssertLte, opcodes::ASSERT_LTE, "ASSERT_LTE",
+    |x, y| u64::from(x <= y),
+    circuit: [Assert],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value],
+    table: UnsignedLessThanEqual,
+);
+
+define_instruction!(
+    /// Virtual ASSERT_VALID_DIV0: validates division-by-zero result.
+    /// Returns 1 if divisor is nonzero, or if divisor is 0 and quotient is MAX.
+    AssertValidDiv0, opcodes::VIRTUAL_ASSERT_VALID_DIV0, "ASSERT_VALID_DIV0",
+    |x, y| {
+        if y == 0 { u64::from(x == u64::MAX) } else { 1 }
+    },
+    circuit: [Assert],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value],
+    table: ValidDiv0,
+);
+
+define_instruction!(
+    /// Virtual ASSERT_VALID_UNSIGNED_REMAINDER: validates unsigned remainder.
+    /// Returns 1 if divisor is 0 or remainder < divisor.
+    AssertValidUnsignedRemainder, opcodes::VIRTUAL_ASSERT_VALID_UNSIGNED_REMAINDER, "ASSERT_VALID_UNSIGNED_REMAINDER",
+    |x, y| {
+        if y == 0 { 1 } else { u64::from(x < y) }
+    },
+    circuit: [Assert],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value],
+    table: ValidUnsignedRemainder,
+);
+
+define_instruction!(
+    /// Virtual ASSERT_MULU_NO_OVERFLOW: checks unsigned multiply doesn't overflow.
+    /// Returns 1 if the upper XLEN bits of `x * y` are all zero.
+    AssertMulUNoOverflow, opcodes::VIRTUAL_ASSERT_MULU_NO_OVERFLOW, "ASSERT_MULU_NO_OVERFLOW",
+    |x, y| {
+        let product = (x as u128) * (y as u128);
+        u64::from((product >> 64) == 0)
+    },
+    circuit: [Assert],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value],
+    table: MulUNoOverflow,
+);
+
+define_instruction!(
+    /// Virtual ASSERT_WORD_ALIGNMENT: checks value is 4-byte aligned.
+    AssertWordAlignment, opcodes::VIRTUAL_ASSERT_WORD_ALIGNMENT, "ASSERT_WORD_ALIGNMENT",
+    |x, _y| u64::from(x.is_multiple_of(4)),
+    circuit: [Assert],
+    instruction: [LeftOperandIsRs1Value],
+    table: WordAlignment,
+);
+
+define_instruction!(
+    /// Virtual ASSERT_HALFWORD_ALIGNMENT: checks value is 2-byte aligned.
+    AssertHalfwordAlignment, opcodes::VIRTUAL_ASSERT_HALFWORD_ALIGNMENT, "ASSERT_HALFWORD_ALIGNMENT",
+    |x, _y| u64::from(x.is_multiple_of(2)),
+    circuit: [Assert],
+    instruction: [LeftOperandIsRs1Value],
+    table: HalfwordAlignment,
+);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::Instruction;
+
+    #[test]
+    fn assert_eq_basic() {
+        assert_eq!(AssertEq.execute(5, 5), 1);
+        assert_eq!(AssertEq.execute(5, 6), 0);
+    }
+
+    #[test]
+    fn assert_lte_basic() {
+        assert_eq!(AssertLte.execute(3, 5), 1);
+        assert_eq!(AssertLte.execute(5, 5), 1);
+        assert_eq!(AssertLte.execute(6, 5), 0);
+    }
+
+    #[test]
+    fn assert_valid_div0() {
+        assert_eq!(AssertValidDiv0.execute(u64::MAX, 0), 1);
+        assert_eq!(AssertValidDiv0.execute(42, 0), 0);
+        assert_eq!(AssertValidDiv0.execute(42, 3), 1);
+    }
+
+    #[test]
+    fn assert_valid_unsigned_remainder() {
+        assert_eq!(AssertValidUnsignedRemainder.execute(2, 5), 1);
+        assert_eq!(AssertValidUnsignedRemainder.execute(5, 5), 0);
+        assert_eq!(AssertValidUnsignedRemainder.execute(0, 0), 1);
+    }
+
+    #[test]
+    fn assert_word_alignment() {
+        assert_eq!(AssertWordAlignment.execute(0, 0), 1);
+        assert_eq!(AssertWordAlignment.execute(4, 0), 1);
+        assert_eq!(AssertWordAlignment.execute(3, 0), 0);
+    }
+
+    #[test]
+    fn assert_halfword_alignment() {
+        assert_eq!(AssertHalfwordAlignment.execute(0, 0), 1);
+        assert_eq!(AssertHalfwordAlignment.execute(2, 0), 1);
+        assert_eq!(AssertHalfwordAlignment.execute(1, 0), 0);
+    }
+}
diff --git a/crates/jolt-instructions/src/virtual_/bitwise.rs b/crates/jolt-instructions/src/virtual_/bitwise.rs
new file mode 100644
index 000000000..663f885cc
--- /dev/null
+++ b/crates/jolt-instructions/src/virtual_/bitwise.rs
@@ -0,0 +1,40 @@
+//! Virtual bitwise instructions used internally by the Jolt VM.
+
+use crate::opcodes;
+
+define_instruction!(
+    /// Virtual MOVSIGN: returns all-ones if `x` is negative (signed), otherwise zero.
+    MovSign, opcodes::MOVSIGN, "MOVSIGN",
+    |x, _y| if (x as i64) < 0 { u64::MAX } else { 0 },
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsImm],
+    table: Movsign,
+);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::Instruction;
+
+    #[test]
+    fn movsign_negative() {
+        let neg = (-1i64) as u64;
+        assert_eq!(MovSign.execute(neg, 42), u64::MAX);
+    }
+
+    #[test]
+    fn movsign_positive() {
+        assert_eq!(MovSign.execute(1, 42), 0);
+    }
+
+    #[test]
+    fn movsign_zero() {
+        assert_eq!(MovSign.execute(0, 42), 0);
+    }
+
+    #[test]
+    fn movsign_min() {
+        let min = i64::MIN as u64;
+        assert_eq!(MovSign.execute(min, 99), u64::MAX);
+    }
+}
diff --git a/crates/jolt-instructions/src/virtual_/byte.rs b/crates/jolt-instructions/src/virtual_/byte.rs
new file mode 100644
index 000000000..58432aad3
--- /dev/null
+++ b/crates/jolt-instructions/src/virtual_/byte.rs
@@ -0,0 +1,31 @@
+//! Virtual byte manipulation instructions.
+
+use crate::opcodes;
+
+define_instruction!(
+    /// Virtual REV8W: byte-reverse within the lower 32 bits.
+    VirtualRev8W, opcodes::VIRTUAL_REV8W, "VIRTUAL_REV8W",
+    |x, _y| {
+        let w = x as u32;
+        w.swap_bytes() as u64
+    },
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value],
+    table: VirtualRev8W,
+);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::Instruction;
+
+    #[test]
+    fn rev8w_basic() {
+        assert_eq!(VirtualRev8W.execute(0x0102_0304, 0), 0x0403_0201);
+    }
+
+    #[test]
+    fn rev8w_zero() {
+        assert_eq!(VirtualRev8W.execute(0, 0), 0);
+    }
+}
diff --git a/crates/jolt-instructions/src/virtual_/division.rs b/crates/jolt-instructions/src/virtual_/division.rs
new file mode 100644
index 000000000..38015325d
--- /dev/null
+++ b/crates/jolt-instructions/src/virtual_/division.rs
@@ -0,0 +1,75 @@
+//! Virtual division-related instructions.
+
+use crate::opcodes;
+
+/// Returns 1 if this is the signed division overflow case (MIN / -1), else returns the divisor.
+#[inline]
+fn change_divisor_64(dividend: u64, divisor: u64) -> u64 {
+    if (dividend as i64) == i64::MIN && (divisor as i64) == -1 {
+        1
+    } else {
+        divisor
+    }
+}
+
+/// 32-bit version of [`change_divisor_64`].
+#[inline]
+fn change_divisor_32(dividend: u64, divisor: u64) -> u64 {
+    if dividend as u32 == i32::MIN as u32 && divisor as u32 == u32::MAX {
+        1
+    } else {
+        divisor
+    }
+}
+
+define_instruction!(
+    /// Virtual CHANGE_DIVISOR: transforms divisor for signed division overflow.
+    /// Returns the divisor unchanged, unless dividend == MIN && divisor == -1,
+    /// in which case returns 1 to avoid overflow.
+    VirtualChangeDivisor, opcodes::VIRTUAL_CHANGE_DIVISOR, "VIRTUAL_CHANGE_DIVISOR",
+    |x, y| change_divisor_64(x, y),
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value],
+    table: VirtualChangeDivisor,
+);
+
+define_instruction!(
+    /// Virtual CHANGE_DIVISOR_W: 32-bit version of change divisor.
+    VirtualChangeDivisorW, opcodes::VIRTUAL_CHANGE_DIVISOR_W, "VIRTUAL_CHANGE_DIVISOR_W",
+    |x, y| change_divisor_32(x, y),
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value],
+    table: VirtualChangeDivisorW,
+);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::Instruction;
+
+    #[test]
+    fn change_divisor_normal() {
+        assert_eq!(VirtualChangeDivisor.execute(10, 3), 3);
+    }
+
+    #[test]
+    fn change_divisor_overflow() {
+        assert_eq!(
+            VirtualChangeDivisor.execute(i64::MIN as u64, (-1i64) as u64),
+            1
+        );
+    }
+
+    #[test]
+    fn change_divisor_w_normal() {
+        assert_eq!(VirtualChangeDivisorW.execute(10, 3), 3);
+    }
+
+    #[test]
+    fn change_divisor_w_overflow() {
+        assert_eq!(
+            VirtualChangeDivisorW.execute(i32::MIN as u64, (-1i32) as u64),
+            1
+        );
+    }
+}
diff --git a/crates/jolt-instructions/src/virtual_/extension.rs b/crates/jolt-instructions/src/virtual_/extension.rs
new file mode 100644
index 000000000..e90e26547
--- /dev/null
+++ b/crates/jolt-instructions/src/virtual_/extension.rs
@@ -0,0 +1,48 @@
+//! Virtual sign/zero extension instructions.
+
+use crate::opcodes;
+
+define_instruction!(
+    /// Virtual SIGN_EXTEND_WORD: sign-extends a 32-bit value to 64 bits.
+    VirtualSignExtendWord, opcodes::VIRTUAL_SIGN_EXTEND_WORD, "VIRTUAL_SIGN_EXTEND_WORD",
+    |x, _y| (x as i32) as i64 as u64,
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value],
+    table: RangeCheck,
+);
+
+define_instruction!(
+    /// Virtual ZERO_EXTEND_WORD: zero-extends a 32-bit value to 64 bits.
+    VirtualZeroExtendWord, opcodes::VIRTUAL_ZERO_EXTEND_WORD, "VIRTUAL_ZERO_EXTEND_WORD",
+    |x, _y| x & 0xFFFF_FFFF,
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value],
+    table: RangeCheck,
+);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::Instruction;
+
+    #[test]
+    fn sign_extend_negative() {
+        assert_eq!(
+            VirtualSignExtendWord.execute(0x8000_0000, 0),
+            0xFFFF_FFFF_8000_0000
+        );
+    }
+
+    #[test]
+    fn sign_extend_positive() {
+        assert_eq!(VirtualSignExtendWord.execute(0x7FFF_FFFF, 0), 0x7FFF_FFFF);
+    }
+
+    #[test]
+    fn zero_extend() {
+        assert_eq!(
+            VirtualZeroExtendWord.execute(0xFFFF_FFFF_8000_0000, 0),
+            0x8000_0000
+        );
+    }
+}
diff --git a/crates/jolt-instructions/src/virtual_/mod.rs b/crates/jolt-instructions/src/virtual_/mod.rs
new file mode 100644
index 000000000..ad06de46c
--- /dev/null
+++ b/crates/jolt-instructions/src/virtual_/mod.rs
@@ -0,0 +1,15 @@
+//! Virtual instructions used internally by the Jolt VM.
+//!
+//! These do not correspond directly to RISC-V ISA instructions but are
+//! needed by the proving system for constraint checking, arithmetic helpers,
+//! and instruction decompositions.
+
+pub mod advice;
+pub mod arithmetic;
+pub mod assert;
+pub mod bitwise;
+pub mod byte;
+pub mod division;
+pub mod extension;
+pub mod shift;
+pub mod xor_rotate;
diff --git a/crates/jolt-instructions/src/virtual_/shift.rs b/crates/jolt-instructions/src/virtual_/shift.rs
new file mode 100644
index 000000000..1aea05ecf
--- /dev/null
+++ b/crates/jolt-instructions/src/virtual_/shift.rs
@@ -0,0 +1,120 @@
+//! Virtual shift decomposition instructions.
+//!
+//! The RV64 shift instructions (SRL, SRA, etc.) are decomposed into
+//! virtual sequences that use specialized lookup tables for the sumcheck prover.
+
+use crate::opcodes;
+
+/// Computes the bitmask for a right-shift: `((1 << (64 - shift)) - 1) << shift`.
+/// Returns `u64::MAX` when `shift == 0`.
+#[inline]
+fn shift_right_bitmask(shift_amount: u64) -> u64 {
+    let shift = shift_amount & 63;
+    if shift == 0 {
+        u64::MAX
+    } else {
+        (((1u128 << (64 - shift)) - 1) as u64) << shift
+    }
+}
+
+define_instruction!(
+    /// Virtual SRL: logical right shift decomposition.
+    VirtualSrl, opcodes::VIRTUAL_SRL, "VIRTUAL_SRL",
+    |x, y| x >> (y & 63),
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value],
+    table: VirtualSRL,
+);
+
+define_instruction!(
+    /// Virtual SRLI: logical right shift by immediate decomposition.
+    VirtualSrli, opcodes::VIRTUAL_SRLI, "VIRTUAL_SRLI",
+    |x, y| x >> (y & 63),
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsImm],
+    table: VirtualSRL,
+);
+
+define_instruction!(
+    /// Virtual SRA: arithmetic right shift decomposition.
+    VirtualSra, opcodes::VIRTUAL_SRA, "VIRTUAL_SRA",
+    |x, y| ((x as i64) >> (y & 63)) as u64,
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value],
+    table: VirtualSRA,
+);
+
+define_instruction!(
+    /// Virtual SRAI: arithmetic right shift by immediate decomposition.
+    VirtualSrai, opcodes::VIRTUAL_SRAI, "VIRTUAL_SRAI",
+    |x, y| ((x as i64) >> (y & 63)) as u64,
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsImm],
+    table: VirtualSRA,
+);
+
+define_instruction!(
+    /// Virtual SHIFT_RIGHT_BITMASK: bitmask for right-shift amount.
+    VirtualShiftRightBitmask, opcodes::VIRTUAL_SHIFT_RIGHT_BITMASK, "VIRTUAL_SHIFT_RIGHT_BITMASK",
+    |_x, y| shift_right_bitmask(y),
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value],
+    table: ShiftRightBitmask,
+);
+
+define_instruction!(
+    /// Virtual SHIFT_RIGHT_BITMASKI: bitmask for right-shift by immediate.
+    VirtualShiftRightBitmaski, opcodes::VIRTUAL_SHIFT_RIGHT_BITMASKI, "VIRTUAL_SHIFT_RIGHT_BITMASKI",
+    |_x, y| shift_right_bitmask(y),
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsImm],
+    table: ShiftRightBitmask,
+);
+
+define_instruction!(
+    /// Virtual ROTRI: rotate right by immediate.
+    VirtualRotri, opcodes::VIRTUAL_ROTRI, "VIRTUAL_ROTRI",
+    |x, y| x.rotate_right((y & 63) as u32),
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsImm],
+    table: VirtualROTR,
+);
+
+define_instruction!(
+    /// Virtual ROTRIW: 32-bit rotate right by immediate, sign-extended.
+    VirtualRotriw, opcodes::VIRTUAL_ROTRIW, "VIRTUAL_ROTRIW",
+    |x, y| (x as u32).rotate_right((y & 31) as u32) as i32 as i64 as u64,
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsImm],
+    table: VirtualROTRW,
+);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::Instruction;
+
+    #[test]
+    fn virtual_srl_basic() {
+        assert_eq!(VirtualSrl.execute(1024, 10), 1);
+        assert_eq!(VirtualSrl.execute(u64::MAX, 63), 1);
+    }
+
+    #[test]
+    fn virtual_sra_sign_extends() {
+        let neg = (-1024i64) as u64;
+        assert_eq!(VirtualSra.execute(neg, 4), (-64i64) as u64);
+    }
+
+    #[test]
+    fn virtual_shift_right_bitmask() {
+        assert_eq!(VirtualShiftRightBitmask.execute(0, 0), u64::MAX);
+        assert_eq!(VirtualShiftRightBitmask.execute(0, 1), u64::MAX - 1);
+    }
+
+    #[test]
+    fn virtual_rotri_basic() {
+        assert_eq!(VirtualRotri.execute(1, 1), 1u64 << 63);
+        assert_eq!(VirtualRotri.execute(0xFF, 4), 0xF000_0000_0000_000F);
+    }
+}
diff --git a/crates/jolt-instructions/src/virtual_/xor_rotate.rs b/crates/jolt-instructions/src/virtual_/xor_rotate.rs
new file mode 100644
index 000000000..e18ebbc90
--- /dev/null
+++ b/crates/jolt-instructions/src/virtual_/xor_rotate.rs
@@ -0,0 +1,103 @@
+//! Virtual XOR-rotate instructions for SHA hash functions.
+
+use crate::opcodes;
+
+define_instruction!(
+    /// Virtual XOR then rotate right by 32 bits.
+    VirtualXorRot32, opcodes::VIRTUAL_XORROT32, "VIRTUAL_XORROT32",
+    |x, y| (x ^ y).rotate_right(32),
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value],
+    table: VirtualXORROT32,
+);
+
+define_instruction!(
+    /// Virtual XOR then rotate right by 24 bits.
+    VirtualXorRot24, opcodes::VIRTUAL_XORROT24, "VIRTUAL_XORROT24",
+    |x, y| (x ^ y).rotate_right(24),
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value],
+    table: VirtualXORROT24,
+);
+
+define_instruction!(
+    /// Virtual XOR then rotate right by 16 bits.
+    VirtualXorRot16, opcodes::VIRTUAL_XORROT16, "VIRTUAL_XORROT16",
+    |x, y| (x ^ y).rotate_right(16),
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value],
+    table: VirtualXORROT16,
+);
+
+define_instruction!(
+    /// Virtual XOR then rotate right by 63 bits.
+    VirtualXorRot63, opcodes::VIRTUAL_XORROT63, "VIRTUAL_XORROT63",
+    |x, y| (x ^ y).rotate_right(63),
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value],
+    table: VirtualXORROT63,
+);
+
+define_instruction!(
+    /// Virtual XOR then rotate right word (32-bit) by 16 bits.
+    VirtualXorRotW16, opcodes::VIRTUAL_XORROTW16, "VIRTUAL_XORROTW16",
+    |x, y| {
+        let val = (x as u32) ^ (y as u32);
+        val.rotate_right(16) as i32 as i64 as u64
+    },
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value],
+    table: VirtualXORROTW16,
+);
+
+define_instruction!(
+    /// Virtual XOR then rotate right word by 12 bits.
+    VirtualXorRotW12, opcodes::VIRTUAL_XORROTW12, "VIRTUAL_XORROTW12",
+    |x, y| {
+        let val = (x as u32) ^ (y as u32);
+        val.rotate_right(12) as i32 as i64 as u64
+    },
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value],
+    table: VirtualXORROTW12,
+);
+
+define_instruction!(
+    /// Virtual XOR then rotate right word by 8 bits.
+    VirtualXorRotW8, opcodes::VIRTUAL_XORROTW8, "VIRTUAL_XORROTW8",
+    |x, y| {
+        let val = (x as u32) ^ (y as u32);
+        val.rotate_right(8) as i32 as i64 as u64
+    },
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value],
+    table: VirtualXORROTW8,
+);
+
+define_instruction!(
+    /// Virtual XOR then rotate right word by 7 bits.
+    VirtualXorRotW7, opcodes::VIRTUAL_XORROTW7, "VIRTUAL_XORROTW7",
+    |x, y| {
+        let val = (x as u32) ^ (y as u32);
+        val.rotate_right(7) as i32 as i64 as u64
+    },
+    circuit: [WriteLookupOutputToRD],
+    instruction: [LeftOperandIsRs1Value, RightOperandIsRs2Value],
+    table: VirtualXORROTW7,
+);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::Instruction;
+
+    #[test]
+    fn xor_rot32() {
+        assert_eq!(VirtualXorRot32.execute(0xFF, 0), 0xFF_0000_0000);
+    }
+
+    #[test]
+    fn xor_rot63() {
+        assert_eq!(VirtualXorRot63.execute(1, 0), 2);
+    }
+}
diff --git a/crates/jolt-instructions/tests/instruction_set.rs b/crates/jolt-instructions/tests/instruction_set.rs
new file mode 100644
index 000000000..887ce05e5
--- /dev/null
+++ b/crates/jolt-instructions/tests/instruction_set.rs
@@ -0,0 +1,403 @@
+//! Integration tests for the jolt-instructions crate.
+//!
+//! Exercises the JoltInstructionSet registry, instruction execution semantics,
+//! flag consistency, and bit-interleaving utilities.
+
+use jolt_instructions::{
+    interleave_bits, opcodes, uninterleave_bits, CircuitFlags, JoltInstructionSet,
+    NUM_CIRCUIT_FLAGS, NUM_INSTRUCTION_FLAGS,
+};
+
+// Registry completeness
+
+/// Every opcode 0..COUNT maps to an instruction with matching opcode.
+#[test]
+fn registry_all_opcodes_present() {
+    let set = JoltInstructionSet::new();
+    assert_eq!(set.len(), opcodes::COUNT as usize);
+
+    for op in 0..opcodes::COUNT {
+        let instr = set
+            .instruction(op)
+            .unwrap_or_else(|| panic!("opcode {op} missing from registry"));
+        assert_eq!(instr.opcode(), op, "opcode mismatch for {}", instr.name());
+    }
+}
+
+/// All instruction names are unique.
+#[test]
+fn registry_unique_names() {
+    let set = JoltInstructionSet::new();
+    let mut names: Vec<&str> = set.iter().map(|i| i.name()).collect();
+    let original_len = names.len();
+    names.sort_unstable();
+    names.dedup();
+    assert_eq!(
+        names.len(),
+        original_len,
+        "duplicate instruction names found"
+    );
+}
+
+/// Out-of-range opcode returns None.
+#[test]
+fn registry_out_of_range() {
+    let set = JoltInstructionSet::new();
+    assert!(set.instruction(opcodes::COUNT).is_none());
+    assert!(set.instruction(u32::MAX).is_none());
+}
+
+// Arithmetic instruction semantics
+
+#[test]
+fn add_basic_and_overflow() {
+    let set = JoltInstructionSet::new();
+    let add = set.instruction(opcodes::ADD).unwrap();
+    assert_eq!(add.execute(3, 5), 8);
+    assert_eq!(add.execute(u64::MAX, 1), 0); // wrapping
+    assert_eq!(add.execute(0, 0), 0);
+}
+
+#[test]
+fn sub_basic_and_underflow() {
+    let set = JoltInstructionSet::new();
+    let sub = set.instruction(opcodes::SUB).unwrap();
+    assert_eq!(sub.execute(10, 3), 7);
+    assert_eq!(sub.execute(0, 1), u64::MAX); // wrapping
+}
+
+#[test]
+fn mul_basic_and_overflow() {
+    let set = JoltInstructionSet::new();
+    let mul = set.instruction(opcodes::MUL).unwrap();
+    assert_eq!(mul.execute(6, 7), 42);
+    assert_eq!(mul.execute(u64::MAX, 2), u64::MAX.wrapping_mul(2));
+    assert_eq!(mul.execute(0, u64::MAX), 0);
+}
+
+// Division edge cases
+
+#[test]
+fn div_by_zero_returns_max() {
+    let set = JoltInstructionSet::new();
+    let div = set.instruction(opcodes::DIV).unwrap();
+    // RISC-V spec: signed div by zero returns -1 (all bits set)
+    assert_eq!(div.execute(42, 0), u64::MAX);
+}
+
+#[test]
+fn divu_by_zero_returns_max() {
+    let set = JoltInstructionSet::new();
+    let divu = set.instruction(opcodes::DIVU).unwrap();
+    assert_eq!(divu.execute(42, 0), u64::MAX);
+}
+
+#[test]
+fn rem_by_zero_returns_dividend() {
+    let set = JoltInstructionSet::new();
+    let rem = set.instruction(opcodes::REM).unwrap();
+    assert_eq!(rem.execute(42, 0), 42);
+}
+
+#[test]
+fn remu_by_zero_returns_dividend() {
+    let set = JoltInstructionSet::new();
+    let remu = set.instruction(opcodes::REMU).unwrap();
+    assert_eq!(remu.execute(42, 0), 42);
+}
+
+#[test]
+fn div_normal() {
+    let set = JoltInstructionSet::new();
+    let div = set.instruction(opcodes::DIV).unwrap();
+    // Signed division: interpret as i64
+    let neg5 = (-5i64) as u64;
+    let result = div.execute(neg5, 2);
+    assert_eq!(result as i64, -2); // truncates toward zero
+}
+
+// Shift edge cases
+
+#[test]
+fn sll_large_shift() {
+    let set = JoltInstructionSet::new();
+    let sll = set.instruction(opcodes::SLL).unwrap();
+    // Only lower 6 bits of shift used (mod 64)
+    assert_eq!(sll.execute(1, 0), 1);
+    assert_eq!(sll.execute(1, 63), 1u64 << 63);
+    assert_eq!(sll.execute(1, 64), 1); // 64 mod 64 = 0
+}
+
+#[test]
+fn srl_basic() {
+    let set = JoltInstructionSet::new();
+    let srl = set.instruction(opcodes::SRL).unwrap();
+    assert_eq!(srl.execute(0xFF00, 8), 0xFF);
+    assert_eq!(srl.execute(u64::MAX, 63), 1);
+}
+
+#[test]
+fn sra_sign_extension() {
+    let set = JoltInstructionSet::new();
+    let sra = set.instruction(opcodes::SRA).unwrap();
+    let neg = (-16i64) as u64;
+    let result = sra.execute(neg, 2);
+    assert_eq!(result as i64, -4); // sign-extended
+}
+
+// Branch instruction semantics
+
+#[test]
+fn branch_eq_ne() {
+    let set = JoltInstructionSet::new();
+    let beq = set.instruction(opcodes::BEQ).unwrap();
+    let bne = set.instruction(opcodes::BNE).unwrap();
+
+    assert_eq!(beq.execute(5, 5), 1);
+    assert_eq!(beq.execute(5, 6), 0);
+    assert_eq!(bne.execute(5, 5), 0);
+    assert_eq!(bne.execute(5, 6), 1);
+}
+
+#[test]
+fn branch_signed_comparison() {
+    let set = JoltInstructionSet::new();
+    let blt = set.instruction(opcodes::BLT).unwrap();
+    let bge = set.instruction(opcodes::BGE).unwrap();
+
+    let neg1 = (-1i64) as u64;
+    // -1 < 0 is true (signed)
+    assert_eq!(blt.execute(neg1, 0), 1);
+    assert_eq!(bge.execute(neg1, 0), 0);
+    // 0 < -1 is false (signed)
+    assert_eq!(blt.execute(0, neg1), 0);
+    assert_eq!(bge.execute(0, neg1), 1);
+}
+
+#[test]
+fn branch_unsigned_comparison() {
+    let set = JoltInstructionSet::new();
+    let bltu = set.instruction(opcodes::BLTU).unwrap();
+    let bgeu = set.instruction(opcodes::BGEU).unwrap();
+
+    let large = u64::MAX; // unsigned: largest value
+                          // MAX < 0 is false (unsigned)
+    assert_eq!(bltu.execute(large, 0), 0);
+    assert_eq!(bgeu.execute(large, 0), 1);
+    // 0 < MAX is true (unsigned)
+    assert_eq!(bltu.execute(0, large), 1);
+    assert_eq!(bgeu.execute(0, large), 0);
+}
+
+// W-suffix sign extension
+
+#[test]
+fn addw_sign_extends() {
+    let set = JoltInstructionSet::new();
+    let addw = set.instruction(opcodes::ADDW).unwrap();
+
+    // 0x7FFF_FFFF + 1 = 0x8000_0000 → sign-extended to 0xFFFF_FFFF_8000_0000
+    let result = addw.execute(0x7FFF_FFFF, 1);
+    assert_eq!(result as i64, -2_147_483_648_i64); // i32::MIN sign-extended
+
+    // Normal case
+    assert_eq!(addw.execute(3, 5), 8);
+}
+
+#[test]
+fn subw_sign_extends() {
+    let set = JoltInstructionSet::new();
+    let subw = set.instruction(opcodes::SUBW).unwrap();
+
+    // 0 - 1 in 32-bit = 0xFFFF_FFFF → sign-extended to 0xFFFF_FFFF_FFFF_FFFF = -1
+    let result = subw.execute(0, 1);
+    assert_eq!(result as i64, -1);
+}
+
+// Load/store masking
+
+#[test]
+fn load_byte_sign_extend() {
+    let set = JoltInstructionSet::new();
+    let lb = set.instruction(opcodes::LB).unwrap();
+    let lbu = set.instruction(opcodes::LBU).unwrap();
+
+    // 0xFF → LB sign-extends to -1, LBU zero-extends to 255
+    assert_eq!(lb.execute(0xFF, 0) as i64, -1);
+    assert_eq!(lbu.execute(0xFF, 0), 0xFF);
+}
+
+#[test]
+fn store_masking() {
+    let set = JoltInstructionSet::new();
+    let sb = set.instruction(opcodes::SB).unwrap();
+    let sh = set.instruction(opcodes::SH).unwrap();
+    let sw = set.instruction(opcodes::SW).unwrap();
+
+    assert_eq!(sb.execute(0xDEAD_BEEF, 0), 0xEF);
+    assert_eq!(sh.execute(0xDEAD_BEEF, 0), 0xBEEF);
+    assert_eq!(sw.execute(0xDEAD_BEEF, 0), 0xDEAD_BEEF);
+}
+
+// Compare instructions
+
+#[test]
+fn slt_signed() {
+    let set = JoltInstructionSet::new();
+    let slt = set.instruction(opcodes::SLT).unwrap();
+    let neg1 = (-1i64) as u64;
+    assert_eq!(slt.execute(neg1, 0), 1); // -1 < 0 signed
+    assert_eq!(slt.execute(0, neg1), 0);
+    assert_eq!(slt.execute(5, 5), 0);
+}
+
+#[test]
+fn sltu_unsigned() {
+    let set = JoltInstructionSet::new();
+    let sltu = set.instruction(opcodes::SLTU).unwrap();
+    assert_eq!(sltu.execute(0, 1), 1);
+    assert_eq!(sltu.execute(1, 0), 0);
+    assert_eq!(sltu.execute(u64::MAX, 0), 0); // MAX is largest unsigned
+}
+
+// Logic instructions
+
+#[test]
+fn bitwise_operations() {
+    let set = JoltInstructionSet::new();
+    let and = set.instruction(opcodes::AND).unwrap();
+    let or = set.instruction(opcodes::OR).unwrap();
+    let xor = set.instruction(opcodes::XOR).unwrap();
+
+    assert_eq!(and.execute(0xFF, 0x0F), 0x0F);
+    assert_eq!(or.execute(0xF0, 0x0F), 0xFF);
+    assert_eq!(xor.execute(0xFF, 0xFF), 0x00);
+    assert_eq!(xor.execute(0xFF, 0x00), 0xFF);
+}
+
+// Flag consistency
+
+/// All circuit flag arrays have the correct length.
+#[test]
+fn circuit_flag_dimensions() {
+    let set = JoltInstructionSet::new();
+    for instr in set.iter() {
+        let flags = instr.circuit_flags();
+        assert_eq!(
+            flags.len(),
+            NUM_CIRCUIT_FLAGS,
+            "circuit flags wrong size for {}",
+            instr.name()
+        );
+    }
+}
+
+/// All instruction flag arrays have the correct length.
+#[test]
+fn instruction_flag_dimensions() {
+    let set = JoltInstructionSet::new();
+    for instr in set.iter() {
+        let flags = instr.instruction_flags();
+        assert_eq!(
+            flags.len(),
+            NUM_INSTRUCTION_FLAGS,
+            "instruction flags wrong size for {}",
+            instr.name()
+        );
+    }
+}
+
+/// ADD has AddOperands and WriteLookupOutputToRD flags.
+#[test]
+fn add_has_expected_flags() {
+    let set = JoltInstructionSet::new();
+    let add = set.instruction(opcodes::ADD).unwrap();
+    let cf = add.circuit_flags();
+    assert!(cf[CircuitFlags::AddOperands as usize]);
+    assert!(cf[CircuitFlags::WriteLookupOutputToRD as usize]);
+    assert!(!cf[CircuitFlags::Load as usize]);
+    assert!(!cf[CircuitFlags::Store as usize]);
+}
+
+/// Loads have Load flag, stores have Store flag.
+#[test]
+fn load_store_flags() {
+    let set = JoltInstructionSet::new();
+
+    let lw = set.instruction(opcodes::LW).unwrap();
+    assert!(lw.circuit_flags()[CircuitFlags::Load as usize]);
+    assert!(!lw.circuit_flags()[CircuitFlags::Store as usize]);
+
+    let sw = set.instruction(opcodes::SW).unwrap();
+    assert!(sw.circuit_flags()[CircuitFlags::Store as usize]);
+    assert!(!sw.circuit_flags()[CircuitFlags::Load as usize]);
+}
+
+/// Jump instructions have Jump flag.
+#[test]
+fn jump_flags() {
+    let set = JoltInstructionSet::new();
+    let jal = set.instruction(opcodes::JAL).unwrap();
+    assert!(jal.circuit_flags()[CircuitFlags::Jump as usize]);
+}
+
+// Lookup table consistency
+
+/// Instructions with lookup tables have non-None table kinds.
+/// Instructions without (loads, stores, system) have None.
+#[test]
+fn lookup_table_assignment() {
+    let set = JoltInstructionSet::new();
+
+    // ADD should have a lookup table (RangeCheck)
+    let add = set.instruction(opcodes::ADD).unwrap();
+    assert!(
+        add.lookup_table().is_some(),
+        "ADD should have a lookup table"
+    );
+
+    // LW should not have a lookup table
+    let lw = set.instruction(opcodes::LW).unwrap();
+    assert!(
+        lw.lookup_table().is_none(),
+        "LW should not have a lookup table"
+    );
+
+    // ECALL should not have a lookup table
+    let ecall = set.instruction(opcodes::ECALL).unwrap();
+    assert!(
+        ecall.lookup_table().is_none(),
+        "ECALL should not have a lookup table"
+    );
+}
+
+// Bit interleaving
+
+/// Round-trip: uninterleave(interleave(x, y)) == (x, y).
+#[test]
+fn interleave_round_trip() {
+    let test_values = [0u64, 1, 0xFF, 0xFFFF, 0xDEAD_BEEF, u64::MAX, u64::MAX / 2];
+    for &x in &test_values {
+        for &y in &test_values {
+            let interleaved = interleave_bits(x, y);
+            let (rx, ry) = uninterleave_bits(interleaved);
+            assert_eq!((rx, ry), (x, y), "round-trip failed for x={x:#x}, y={y:#x}");
+        }
+    }
+}
+
+/// Single-bit positions are correctly placed.
+#[test]
+fn interleave_single_bits() {
+    // x=1 (bit 0) should go to even position 0 → bit 1 in interleaved
+    // y=1 (bit 0) should go to odd position 0 → bit 0 in interleaved
+    // Wait, convention depends on MSB/LSB. Let's just verify round-trip.
+    for bit in 0..64 {
+        let x = 1u64 << bit;
+        let (rx, _) = uninterleave_bits(interleave_bits(x, 0));
+        assert_eq!(rx, x, "x single bit {bit} failed");
+
+        let (_, ry) = uninterleave_bits(interleave_bits(0, x));
+        assert_eq!(ry, x, "y single bit {bit} failed");
+    }
+}
diff --git a/crates/jolt-poly/Cargo.toml b/crates/jolt-poly/Cargo.toml
new file mode 100644
index 000000000..91d7a833c
--- /dev/null
+++ b/crates/jolt-poly/Cargo.toml
@@ -0,0 +1,38 @@
+[package]
+name = "jolt-poly"
+version = "0.1.0"
+edition = "2021"
+license = "MIT OR Apache-2.0"
+authors = ["Jolt Contributors"]
+repository = "https://github.com/a16z/jolt"
+description = "Polynomial types and operations for the Jolt zkVM"
+keywords = ["cryptography", "polynomial", "multilinear", "sumcheck", "zkvm"]
+categories = ["cryptography", "mathematics"]
+
+[lints]
+workspace = true
+
+[dependencies]
+jolt-field = { path = "../jolt-field" }
+serde = { workspace = true, features = ["derive", "alloc"] }
+tracing.workspace = true
+rayon = { workspace = true, optional = true }
+rand_core = { workspace = true }
+num-traits = { workspace = true }
+
+[dev-dependencies]
+rand = { workspace = true }
+rand_chacha = { workspace = true }
+bincode = { workspace = true }
+criterion = { workspace = true }
+
+[[bench]]
+name = "poly_ops"
+harness = false
+
+[package.metadata.cargo-machete]
+ignored = ["rand"]
+
+[features]
+default = ["parallel"]
+parallel = ["rayon"]
diff --git a/crates/jolt-poly/README.md b/crates/jolt-poly/README.md
new file mode 100644
index 000000000..39fe4c6b9
--- /dev/null
+++ b/crates/jolt-poly/README.md
@@ -0,0 +1,55 @@
+# jolt-poly
+
+Polynomial types and operations for the Jolt zkVM.
+
+Part of the [Jolt](https://github.com/a16z/jolt) zkVM.
+
+## Overview
+
+This crate provides multilinear and univariate polynomial representations used throughout Jolt's sumcheck-based proving system. Polynomials are represented by their evaluations over the Boolean hypercube, with specialized compact representations for small-scalar coefficients.
+
+## Public API
+
+### Core Traits
+
+- **`MultilinearEvaluation<F>`** -- Point evaluation interface for multilinear polynomials. Methods: `num_vars()`, `len()`, `evaluate(point)`.
+- **`MultilinearBinding<F>`** -- In-place variable binding for sumcheck. Method: `bind(scalar)`.
+- **`UnivariatePolynomial<F>`** -- Shared interface for univariate types. Method: `degree()`.
+
+### Polynomial Types
+
+- **`Polynomial<F: Field>`** -- Full evaluation table stored as `Vec<F>`. Supports in-place variable binding, evaluation, arithmetic operators.
+- **`Polynomial<T>` (compact mode)** -- When `T` is a small primitive (`bool`, `u8`, etc.), stores evaluations natively and promotes to field on demand. Up to 32x memory reduction.
+- **`UnivariatePoly<F>`** -- Coefficient-form univariate polynomial with Lagrange interpolation and compression.
+- **`CompressedPoly<F>`** -- Compressed univariate with linear term omitted (saves one field element per sumcheck round).
+- **`EqPolynomial<F>`** -- Equality polynomial `eq(x, r)`. Materializes all `2^n` evaluations via bottom-up doubling.
+- **`EqPlusOnePolynomial<F>`** -- Successor polynomial `eq+1(x, y)` evaluating to 1 when `y = x + 1`. Used in Spartan shift sumcheck.
+- **`EqPlusOnePrefixSuffix<F>`** -- Prefix-suffix decomposition of `eq+1` for sqrt-sized sumcheck buffers.
+- **`IdentityPolynomial`** -- Maps hypercube points to their integer index.
+- **`LtPolynomial<F>`** -- Less-than polynomial `LT(x, r)` with split optimization for sqrt-sized sumcheck buffers. Used in register/RAM value evaluation.
+
+### Streaming and Sparse Access
+
+- **`MultilinearPoly<F>`** -- Core trait for multilinear polynomial access: evaluation, row iteration, fold, sparsity hints.
+- **`RlcSource`** -- Lazy random linear combination of multiple `MultilinearPoly` sources.
+- **`OneHotPolynomial`** -- Sparse polynomial where each row has at most one nonzero entry (value 1). Enables O(T) PCS commit via generator lookup.
+
+### Binding and Evaluation
+
+- **`BindingOrder`** -- Controls the order in which variables are bound during sumcheck (MSB-first vs LSB-first).
+
+### Utility Modules
+
+- **`lagrange`** -- Lagrange interpolation, symmetric power sums, polynomial multiplication, and Newton-form interpolation over integer domains.
+- **`math`** -- Bit-manipulation utilities on `usize` via the `Math` trait (`pow2`, `log_2`).
+- **`thread`** -- Threading utilities: `drop_in_background_thread` (rayon) and `unsafe_allocate_zero_vec` (zero-initialized allocation).
+
+## Feature Flags
+
+| Flag | Default | Description |
+|------|---------|-------------|
+| `parallel` | **Yes** | Enable rayon parallelism for eq table construction and polynomial operations |
+
+## License
+
+MIT
diff --git a/crates/jolt-poly/REVIEW.md b/crates/jolt-poly/REVIEW.md
new file mode 100644
index 000000000..a0c1d48fd
--- /dev/null
+++ b/crates/jolt-poly/REVIEW.md
@@ -0,0 +1,95 @@
+# jolt-poly Review
+
+**Crate:** jolt-poly (Level 2)
+**LOC:** 4,880
+**Baseline:** 0 clippy warnings, 160 tests passing (was broken before fixes)
+**Rating:** 8/10
+
+## Overview
+
+Polynomial types and evaluation primitives for Jolt. Provides `Polynomial<T>`
+(evaluation tables over Boolean hypercube), `EqPolynomial`, `UnivariatePoly`,
+`CompressedUnivariatePoly`, and traits `MultilinearPoly<F>` / `MultilinearEvaluation<F>`.
+Also provides `RlcSource` for streaming PCS access and `LagrangeBasis` for
+interpolation. Core data structure for the entire proving pipeline — used by
+7+ downstream crates.
+
+**Verdict:** Solid polynomial library with excellent generic scalar support
+(`Polynomial<T>` where T ranges from bool to i128 to field elements). The
+evaluation-table-as-struct pattern is clean and well-tested. The major naming
+bug (`coefficients` meaning evaluations) has been fixed. bincode v2 migration
+and `is_multiple_of` ambiguity resolved all compilation issues. 160 tests
+now pass including a new LowToHigh binding correctness test.
+
+---
+
+## Findings
+
+### [CQ-1.1] Field named `coefficients` stores evaluations, not coefficients
+**File:** `src/cpu_polynomial.rs`
+**Severity:** HIGH
+**Finding:** `Polynomial<T>::coefficients` stores evaluation-table entries on the Boolean
+hypercube, not polynomial coefficients. Accessor `coefficients()` propagated this
+misnaming to 12 downstream call sites across 6 crates.
+**Status:** RESOLVED — Renamed field to `evals`, accessor to `evals()`. Updated all 12
+downstream call sites in jolt-sumcheck, jolt-blindfold, jolt-spartan, jolt-zkvm.
+
+### [CQ-2.1] bincode v1 API usage causes compilation failure
+**File:** `src/univariate.rs`, `src/compressed_univariate.rs`, `src/cpu_polynomial.rs`, `tests/integration.rs`
+**Severity:** HIGH
+**Finding:** Workspace uses bincode v2 but source files use v1 API. Tests fail to compile.
+**Status:** RESOLVED — Migrated all 4 files to bincode v2 `encode_to_vec`/`decode_from_slice`.
+
+### [CQ-2.2] `is_multiple_of` compilation error
+**File:** `src/one_hot.rs:193`
+**Severity:** HIGH
+**Finding:** `i.is_multiple_of(3)` requires `num-integer::Integer` trait which isn't imported.
+**Status:** RESOLVED — Changed to `i % 3 == 0`.
+
+### [CQ-3.1] Missing LowToHigh binding test
+**File:** `src/cpu_polynomial.rs`
+**Severity:** LOW
+**Finding:** The `bind` method has a `reverse: bool` parameter but no test for LowToHigh mode.
+**Status:** RESOLVED — Added `low_to_high_binding_produces_correct_evaluation` test verifying
+both binding orders produce identical evaluations.
+
+### [CQ-4.1] Misleading lagrange.rs comment about batch inversion
+**File:** `src/lagrange.rs`
+**Severity:** LOW
+**Finding:** Doc comment claims "a single batch inversion" but implementation uses per-element `inverse()`.
+**Status:** RESOLVED — Updated to "$O(N)$ per-element inversions" and "$O(N^2)$ weight computation".
+
+### [CD-1.1] EqPlusOnePolynomial::point field is public
+**File:** `src/eq_plus_one.rs`
+**Severity:** LOW
+**Finding:** Internal field `point` exposed as `pub` with no need for external access.
+**Status:** RESOLVED — Made private.
+
+### [CD-2.1] RlcSource::for_each_row duplication
+**File:** `src/rlc.rs`
+**Severity:** LOW
+**Finding:** Potential duplication between `for_each_row` and `fold_rows` on streaming types.
+**Status:** PASS — Investigated; these are distinct methods with different semantics (callback vs fold).
+
+### [CD-3.1] MultilinearPoly vs MultilinearEvaluation trait split undocumented
+**File:** `src/cpu_polynomial.rs`
+**Severity:** LOW
+**Finding:** The distinction between these two traits is not obvious from names alone.
+**Status:** DEFERRED — Documentation improvement, not blocking.
+
+### [CD-4.1] Cargo.toml metadata incomplete
+**File:** `Cargo.toml`
+**Severity:** LOW
+**Finding:** Missing authors, repository, keywords, categories; license was MIT-only.
+**Status:** RESOLVED — Updated to dual MIT OR Apache-2.0, added all metadata.
+
+---
+
+## Summary
+
+| Category | Pass | Resolved | Deferred | Total |
+|----------|------|----------|----------|-------|
+| CQ       | 0    | 5        | 0        | 5     |
+| CD       | 1    | 2        | 1        | 4     |
+| NIT      | 0    | 0        | 0        | 0     |
+| **Total**| **1**| **7**    | **1**    | **9** |
diff --git a/crates/jolt-poly/benches/poly_ops.rs b/crates/jolt-poly/benches/poly_ops.rs
new file mode 100644
index 000000000..2ef6538a5
--- /dev/null
+++ b/crates/jolt-poly/benches/poly_ops.rs
@@ -0,0 +1,64 @@
+#![allow(unused_results)]
+
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
+use jolt_field::{Field, Fr};
+use jolt_poly::{EqPolynomial, Polynomial};
+use rand_chacha::ChaCha20Rng;
+use rand_core::SeedableRng;
+
+fn bench_bind(c: &mut Criterion) {
+    let mut group = c.benchmark_group("Polynomial::bind");
+    for num_vars in [14, 18, 20] {
+        let mut rng = ChaCha20Rng::seed_from_u64(num_vars as u64);
+        let poly = Polynomial::<Fr>::random(num_vars, &mut rng);
+        let scalar = Fr::random(&mut rng);
+
+        group.bench_with_input(
+            BenchmarkId::from_parameter(num_vars),
+            &num_vars,
+            |bench, _| {
+                bench.iter_batched(
+                    || poly.clone(),
+                    |mut p| {
+                        p.bind(black_box(scalar));
+                        p
+                    },
+                    criterion::BatchSize::LargeInput,
+                );
+            },
+        );
+    }
+    group.finish();
+}
+
+fn bench_eq_evaluations(c: &mut Criterion) {
+    let mut group = c.benchmark_group("EqPolynomial::evaluations");
+    for num_vars in [14, 18, 20] {
+        let mut rng = ChaCha20Rng::seed_from_u64(100 + num_vars as u64);
+        let point: Vec<Fr> = (0..num_vars).map(|_| Fr::random(&mut rng)).collect();
+        let eq = EqPolynomial::new(point);
+
+        group.bench_with_input(
+            BenchmarkId::from_parameter(num_vars),
+            &num_vars,
+            |bench, _| {
+                bench.iter(|| black_box(&eq).evaluations());
+            },
+        );
+    }
+    group.finish();
+}
+
+fn bench_evaluate(c: &mut Criterion) {
+    let num_vars = 20;
+    let mut rng = ChaCha20Rng::seed_from_u64(200);
+    let poly = Polynomial::<Fr>::random(num_vars, &mut rng);
+    let point: Vec<Fr> = (0..num_vars).map(|_| Fr::random(&mut rng)).collect();
+
+    c.bench_function("Polynomial::evaluate/20", |bench| {
+        bench.iter(|| black_box(&poly).evaluate(black_box(&point)));
+    });
+}
+
+criterion_group!(benches, bench_bind, bench_eq_evaluations, bench_evaluate);
+criterion_main!(benches);
diff --git a/crates/jolt-poly/fuzz/Cargo.toml b/crates/jolt-poly/fuzz/Cargo.toml
new file mode 100644
index 000000000..7fce4b7b7
--- /dev/null
+++ b/crates/jolt-poly/fuzz/Cargo.toml
@@ -0,0 +1,20 @@
+[workspace]
+
+[package]
+name = "jolt-poly-fuzz"
+version = "0.0.0"
+publish = false
+edition = "2021"
+
+[package.metadata]
+cargo-fuzz = true
+
+[dependencies]
+libfuzzer-sys = "0.4"
+jolt-poly = { path = ".." }
+jolt-field = { path = "../../jolt-field" }
+
+[[bin]]
+name = "dense_poly_ops"
+path = "fuzz_targets/dense_poly_ops.rs"
+doc = false
diff --git a/crates/jolt-poly/fuzz/fuzz_targets/dense_poly_ops.rs b/crates/jolt-poly/fuzz/fuzz_targets/dense_poly_ops.rs
new file mode 100644
index 000000000..662c72c76
--- /dev/null
+++ b/crates/jolt-poly/fuzz/fuzz_targets/dense_poly_ops.rs
@@ -0,0 +1,37 @@
+#![no_main]
+use jolt_field::{Field, Fr};
+use jolt_poly::Polynomial;
+use libfuzzer_sys::fuzz_target;
+
+fuzz_target!(|data: &[u8]| {
+    // Need at least 32 bytes for a single field element to build a polynomial,
+    // plus 32 bytes per point coordinate. Keep num_vars small to avoid OOM.
+    if data.len() < 64 {
+        return;
+    }
+
+    // Derive num_vars from first byte, capped at 8 (256 evaluations)
+    let num_vars = (data[0] as usize % 8).max(1);
+    let n = 1usize << num_vars;
+    let needed = n * 32 + num_vars * 32;
+    if data.len() < needed {
+        return;
+    }
+
+    // Build evaluation vector from fuzzer data
+    let evals: Vec<Fr> = (0..n)
+        .map(|i| <Fr as Field>::from_bytes(&data[i * 32..(i + 1) * 32]))
+        .collect();
+    let poly = Polynomial::new(evals);
+
+    // Build evaluation point from fuzzer data
+    let point_start = n * 32;
+    let point: Vec<Fr> = (0..num_vars)
+        .map(|i| <Fr as Field>::from_bytes(&data[point_start + i * 32..point_start + (i + 1) * 32]))
+        .collect();
+
+    // evaluate and evaluate_and_consume must agree and not panic
+    let eval = poly.evaluate(&point);
+    let eval_consumed = poly.clone().evaluate_and_consume(&point);
+    assert_eq!(eval, eval_consumed, "evaluate and evaluate_and_consume disagree");
+});
diff --git a/crates/jolt-poly/fuzz/rust-toolchain.toml b/crates/jolt-poly/fuzz/rust-toolchain.toml
new file mode 100644
index 000000000..5d56faf9a
--- /dev/null
+++ b/crates/jolt-poly/fuzz/rust-toolchain.toml
@@ -0,0 +1,2 @@
+[toolchain]
+channel = "nightly"
diff --git a/crates/jolt-poly/src/binding.rs b/crates/jolt-poly/src/binding.rs
new file mode 100644
index 000000000..5060e9ca5
--- /dev/null
+++ b/crates/jolt-poly/src/binding.rs
@@ -0,0 +1,16 @@
+//! Variable binding order for sumcheck protocols.
+
+use serde::{Deserialize, Serialize};
+
+/// The order in which polynomial variables are bound during sumcheck.
+///
+/// - **LowToHigh**: Bind from the least-significant bit (index `n-1` in the
+///   evaluation table) upward. This is the default for most sumcheck instances.
+/// - **HighToLow**: Bind from the most-significant bit (index `0`) downward.
+///   Used by Spartan's outer sumcheck.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Default, Serialize, Deserialize)]
+pub enum BindingOrder {
+    #[default]
+    LowToHigh,
+    HighToLow,
+}
diff --git a/crates/jolt-poly/src/compressed_univariate.rs b/crates/jolt-poly/src/compressed_univariate.rs
new file mode 100644
index 000000000..9527f565c
--- /dev/null
+++ b/crates/jolt-poly/src/compressed_univariate.rs
@@ -0,0 +1,209 @@
+//! Compressed univariate polynomial with the linear term omitted.
+//!
+//! Used in sumcheck proofs to save one field element per round polynomial.
+//! The linear term is recoverable from the sumcheck claim `f(0) + f(1)`.
+
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::univariate::{UnivariatePoly, UnivariatePolynomial};
+
+/// Compressed univariate polynomial: stores `[c0, c2, c3, ...]` with the
+/// linear coefficient `c1` omitted.
+///
+/// Given the hint `h = f(0) + f(1)`, the linear term is recovered as:
+/// `c1 = h - 2*c0 - c2 - c3 - ...`
+///
+/// This saves one field element per sumcheck round polynomial in proof
+/// serialization (32 bytes for BN254).
+#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(bound = "")]
+pub struct CompressedPoly<F: Field> {
+    coeffs_except_linear_term: Vec<F>,
+}
+
+impl<F: Field> UnivariatePolynomial<F> for CompressedPoly<F> {
+    /// Degree of the polynomial.
+    ///
+    /// A degree-d polynomial has d+1 coefficients; the compressed form stores
+    /// d of them (all except the linear term), so `stored_len == degree`.
+    fn degree(&self) -> usize {
+        self.coeffs_except_linear_term.len()
+    }
+}
+
+impl<F: Field> CompressedPoly<F> {
+    /// Creates a compressed polynomial from the stored coefficients `[c0, c2, c3, ...]`.
+    pub fn new(coeffs_except_linear_term: Vec<F>) -> Self {
+        Self {
+            coeffs_except_linear_term,
+        }
+    }
+
+    /// The stored coefficients `[c0, c2, c3, ...]` (linear term omitted).
+    pub fn coeffs_except_linear_term(&self) -> &[F] {
+        &self.coeffs_except_linear_term
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.coeffs_except_linear_term.is_empty()
+    }
+
+    /// Recovers the omitted linear term from the hint `h = f(0) + f(1)`.
+    ///
+    /// `c1 = h - 2*c0 - c2 - c3 - ...`
+    #[inline]
+    fn recover_linear_term(&self, hint: F) -> F {
+        let c0 = self.coeffs_except_linear_term[0];
+        let mut linear_term = hint - c0 - c0;
+        for &c in &self.coeffs_except_linear_term[1..] {
+            linear_term -= c;
+        }
+        linear_term
+    }
+
+    /// Evaluates the polynomial at `point` using the hint `h = f(0) + f(1)`.
+    ///
+    /// Recovers the linear term, then evaluates via ascending-power accumulation
+    /// in O(d) multiplications.
+    #[inline]
+    pub fn evaluate_with_hint(&self, hint: F, point: F) -> F {
+        let linear_term = self.recover_linear_term(hint);
+
+        let mut x_pow = point;
+        let mut sum = self.coeffs_except_linear_term[0] + point * linear_term;
+        for &c in &self.coeffs_except_linear_term[1..] {
+            x_pow *= point;
+            sum += c * x_pow;
+        }
+        sum
+    }
+
+    /// Recovers the full polynomial given the hint `h = f(0) + f(1)`.
+    pub fn decompress(&self, hint: F) -> UnivariatePoly<F> {
+        let linear_term = self.recover_linear_term(hint);
+
+        let mut coeffs = Vec::with_capacity(self.coeffs_except_linear_term.len() + 1);
+        coeffs.push(self.coeffs_except_linear_term[0]);
+        coeffs.push(linear_term);
+        coeffs.extend_from_slice(&self.coeffs_except_linear_term[1..]);
+        UnivariatePoly::new(coeffs)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use jolt_field::Fr;
+    use num_traits::{One, Zero};
+
+    /// Helper: build a standard polynomial p(x) = c0 + c1*x + c2*x^2 + ...
+    /// and compute the sumcheck hint h = p(0) + p(1).
+    fn poly_and_hint(coeffs: Vec<Fr>) -> (UnivariatePoly<Fr>, Fr) {
+        let p = UnivariatePoly::new(coeffs);
+        let hint = p.evaluate(Fr::zero()) + p.evaluate(Fr::one());
+        (p, hint)
+    }
+
+    #[test]
+    fn compress_decompress_round_trip() {
+        // p(x) = 1 + 3x + 2x^2
+        let (p, hint) = poly_and_hint(vec![Fr::from_u64(1), Fr::from_u64(3), Fr::from_u64(2)]);
+        let compressed = p.compress();
+        let recovered = compressed.decompress(hint);
+        assert_eq!(recovered, p);
+    }
+
+    #[test]
+    fn evaluate_with_hint_matches_standard() {
+        // p(x) = 1 + 3x + 2x^2
+        let (p, hint) = poly_and_hint(vec![Fr::from_u64(1), Fr::from_u64(3), Fr::from_u64(2)]);
+        let compressed = p.compress();
+
+        for i in 0..10 {
+            let x = Fr::from_u64(i);
+            assert_eq!(
+                compressed.evaluate_with_hint(hint, x),
+                p.evaluate(x),
+                "mismatch at x={i}"
+            );
+        }
+    }
+
+    #[test]
+    fn compress_linear_polynomial() {
+        // p(x) = 5 + 7x  (degree 1)
+        let (p, hint) = poly_and_hint(vec![Fr::from_u64(5), Fr::from_u64(7)]);
+        let compressed = p.compress();
+
+        assert_eq!(compressed.degree(), 1);
+        // Stored coefficients: [c0] = [5]
+        assert_eq!(compressed.coeffs_except_linear_term().len(), 1);
+
+        let recovered = compressed.decompress(hint);
+        assert_eq!(recovered, p);
+
+        let x = Fr::from_u64(3);
+        assert_eq!(compressed.evaluate_with_hint(hint, x), p.evaluate(x));
+    }
+
+    #[test]
+    fn compress_cubic_polynomial() {
+        // p(x) = 1 + 3x + 2x^2 + x^3  (typical sumcheck degree)
+        let (p, hint) = poly_and_hint(vec![
+            Fr::from_u64(1),
+            Fr::from_u64(3),
+            Fr::from_u64(2),
+            Fr::from_u64(1),
+        ]);
+        let compressed = p.compress();
+
+        assert_eq!(compressed.degree(), 3);
+        // Stored: [c0, c2, c3] = [1, 2, 1]
+        assert_eq!(compressed.coeffs_except_linear_term().len(), 3);
+
+        let recovered = compressed.decompress(hint);
+        assert_eq!(recovered, p);
+
+        for i in 0..10 {
+            let x = Fr::from_u64(i);
+            assert_eq!(
+                compressed.evaluate_with_hint(hint, x),
+                p.evaluate(x),
+                "mismatch at x={i}"
+            );
+        }
+    }
+
+    #[test]
+    fn serde_round_trip() {
+        let (p, _) = poly_and_hint(vec![
+            Fr::from_u64(1),
+            Fr::from_u64(3),
+            Fr::from_u64(2),
+            Fr::from_u64(1),
+        ]);
+        let compressed = p.compress();
+        let bytes =
+            bincode::serde::encode_to_vec(&compressed, bincode::config::standard()).unwrap();
+        let recovered: CompressedPoly<Fr> =
+            bincode::serde::decode_from_slice(&bytes, bincode::config::standard())
+                .unwrap()
+                .0;
+        assert_eq!(compressed, recovered);
+    }
+
+    #[test]
+    fn degree_matches_standard() {
+        for deg in 1..=5 {
+            let coeffs: Vec<Fr> = (0..=deg).map(|i| Fr::from_u64(i as u64 + 1)).collect();
+            let p = UnivariatePoly::new(coeffs);
+            let compressed = p.compress();
+            assert_eq!(
+                compressed.degree(),
+                p.degree(),
+                "degree mismatch for deg={deg}"
+            );
+        }
+    }
+}
diff --git a/crates/jolt-poly/src/cpu_polynomial.rs b/crates/jolt-poly/src/cpu_polynomial.rs
new file mode 100644
index 000000000..4aabedff9
--- /dev/null
+++ b/crates/jolt-poly/src/cpu_polynomial.rs
@@ -0,0 +1,1010 @@
+//! Polynomial stored as evaluations over the Boolean hypercube.
+
+use std::ops::{Add, AddAssign, Mul, Neg, Sub, SubAssign};
+
+use jolt_field::Field;
+use rand_core::RngCore;
+use serde::{Deserialize, Serialize};
+
+use crate::eq::EqPolynomial;
+
+/// Minimum number of evaluations before parallelizing bind/evaluate.
+///
+/// Below this threshold the overhead of Rayon work-stealing exceeds the
+/// benefit. 1024 field elements is roughly one L1 cache line's worth of
+/// useful work per core, keeping synchronization cost negligible.
+const PAR_THRESHOLD: usize = 1024;
+
+/// Multilinear polynomial stored as evaluations over the Boolean hypercube $\{0,1\}^n$.
+///
+/// Generic over the scalar type `T`:
+/// - When `T` is a [`Field`] type: full polynomial with in-place [`bind`](Polynomial::bind),
+///   [`evaluate`](Polynomial::evaluate), and arithmetic operators.
+/// - When `T` is a small type (`u8`, `bool`, `i64`, etc.): compact storage with
+///   [`bind_to_field`](Polynomial::bind_to_field) for on-demand field promotion.
+#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(bound(serialize = "T: Serialize", deserialize = "T: for<'a> Deserialize<'a>",))]
+pub struct Polynomial<T> {
+    evals: Vec<T>,
+    num_vars: usize,
+}
+
+impl<T> Polynomial<T> {
+    /// Creates a polynomial from its evaluations over the Boolean hypercube.
+    ///
+    /// # Panics
+    /// Panics if `evals.len()` is not a power of two (or zero).
+    pub fn new(evals: Vec<T>) -> Self {
+        let len = evals.len();
+        if len == 0 {
+            return Self { evals, num_vars: 0 };
+        }
+        assert!(
+            len.is_power_of_two(),
+            "evaluation count must be a power of two, got {len}"
+        );
+        let num_vars = len.trailing_zeros() as usize;
+        Self { evals, num_vars }
+    }
+
+    /// Number of variables `n`. The polynomial has `2^n` evaluations.
+    #[inline]
+    pub fn num_vars(&self) -> usize {
+        self.num_vars
+    }
+
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.evals.len()
+    }
+
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.evals.is_empty()
+    }
+
+    /// The raw evaluation table over the Boolean hypercube.
+    #[inline]
+    pub fn evals(&self) -> &[T] {
+        &self.evals
+    }
+}
+
+impl<T: Copy> Polynomial<T> {
+    /// Fixes the first variable to `scalar`, promoting all evaluations to field elements.
+    ///
+    /// Produces a `Polynomial<F>` with `n - 1` variables:
+    /// $$g(x_2, \ldots, x_n) = (1 - s) \cdot f(0, x_2, \ldots) + s \cdot f(1, x_2, \ldots)$$
+    ///
+    /// When `T = F`, the `From` conversion is the identity and the compiler
+    /// eliminates it, making this equivalent to an allocating bind.
+    pub fn bind_to_field<F: Field + From<T>>(&self, scalar: F) -> Polynomial<F> {
+        let half = self.evals.len() / 2;
+        let mut result = Vec::with_capacity(half);
+        for i in 0..half {
+            let lo: F = self.evals[i].into();
+            let hi: F = self.evals[i + half].into();
+            result.push(lo + scalar * (hi - lo));
+        }
+        Polynomial {
+            evals: result,
+            num_vars: self.num_vars - 1,
+        }
+    }
+}
+
+impl<F: Field> Polynomial<F> {
+    /// Creates the zero polynomial with $2^n$ evaluations all equal to zero.
+    pub fn zeros(num_vars: usize) -> Self {
+        Self {
+            evals: vec![F::zero(); 1 << num_vars],
+            num_vars,
+        }
+    }
+
+    /// Creates a polynomial with random evaluations.
+    pub fn random(num_vars: usize, rng: &mut impl RngCore) -> Self {
+        let evals = (0..(1 << num_vars)).map(|_| F::random(rng)).collect();
+        Self { evals, num_vars }
+    }
+
+    /// Fixes the first (MSB) variable to `scalar` in place, halving the evaluations.
+    ///
+    /// The evaluations table is laid out so that the first variable controls the
+    /// upper/lower half split: indices `0..half` have $x_1 = 0$ and indices
+    /// `half..2*half` have $x_1 = 1$. The result is:
+    /// $$g(x_2, \ldots, x_n) = f(0, x_2, \ldots) + s \cdot (f(1, x_2, \ldots) - f(0, x_2, \ldots))$$
+    ///
+    /// Equivalent to `bind_with_order(scalar, BindingOrder::HighToLow)`.
+    #[inline]
+    pub fn bind(&mut self, scalar: F) {
+        self.bind_high_to_low(scalar);
+    }
+
+    /// Binds with the specified variable ordering.
+    ///
+    /// - `BindingOrder::HighToLow`: binds the MSB (first variable, index `0`).
+    ///   Pairs `evals[i]` with `evals[i + half]`.
+    /// - `BindingOrder::LowToHigh`: binds the LSB (last variable, index `n-1`).
+    ///   Pairs `evals[2*i]` with `evals[2*i + 1]`.
+    #[inline]
+    pub fn bind_with_order(&mut self, scalar: F, order: crate::BindingOrder) {
+        match order {
+            crate::BindingOrder::HighToLow => self.bind_high_to_low(scalar),
+            crate::BindingOrder::LowToHigh => self.bind_low_to_high(scalar),
+        }
+    }
+
+    #[inline]
+    fn bind_high_to_low(&mut self, scalar: F) {
+        let half = self.evals.len() / 2;
+
+        #[cfg(feature = "parallel")]
+        {
+            if half >= PAR_THRESHOLD {
+                use rayon::prelude::*;
+                let (lo, hi) = self.evals.split_at_mut(half);
+                lo.par_iter_mut().zip(hi.par_iter()).for_each(|(a, b)| {
+                    *a = *a + scalar * (*b - *a);
+                });
+            } else {
+                for i in 0..half {
+                    let lo = self.evals[i];
+                    let hi = self.evals[i + half];
+                    self.evals[i] = lo + scalar * (hi - lo);
+                }
+            }
+        }
+
+        #[cfg(not(feature = "parallel"))]
+        {
+            for i in 0..half {
+                let lo = self.evals[i];
+                let hi = self.evals[i + half];
+                self.evals[i] = lo + scalar * (hi - lo);
+            }
+        }
+
+        self.evals.truncate(half);
+        self.num_vars -= 1;
+    }
+
+    #[inline]
+    fn bind_low_to_high(&mut self, scalar: F) {
+        let half = self.evals.len() / 2;
+
+        #[cfg(feature = "parallel")]
+        {
+            if half >= PAR_THRESHOLD {
+                use rayon::prelude::*;
+                // Parallel: write into a new buffer to avoid aliasing
+                let coeffs = &self.evals;
+                let new: Vec<F> = (0..half)
+                    .into_par_iter()
+                    .map(|i| {
+                        let lo = coeffs[2 * i];
+                        let hi = coeffs[2 * i + 1];
+                        lo + scalar * (hi - lo)
+                    })
+                    .collect();
+                self.evals = new;
+            } else {
+                for i in 0..half {
+                    let lo = self.evals[2 * i];
+                    let hi = self.evals[2 * i + 1];
+                    self.evals[i] = lo + scalar * (hi - lo);
+                }
+                self.evals.truncate(half);
+            }
+        }
+
+        #[cfg(not(feature = "parallel"))]
+        {
+            for i in 0..half {
+                let lo = self.evals[2 * i];
+                let hi = self.evals[2 * i + 1];
+                self.evals[i] = lo + scalar * (hi - lo);
+            }
+            self.evals.truncate(half);
+        }
+
+        self.num_vars -= 1;
+    }
+
+    /// Returns the `(lo, hi)` pair for the given index and binding order.
+    ///
+    /// For sumcheck round polynomial evaluation at index `j`:
+    /// - `HighToLow`: `lo = evals[j]`, `hi = evals[j + half]`
+    /// - `LowToHigh`: `lo = evals[2*j]`, `hi = evals[2*j + 1]`
+    #[inline]
+    pub fn sumcheck_eval_pair(&self, index: usize, order: crate::BindingOrder) -> (F, F) {
+        match order {
+            crate::BindingOrder::HighToLow => {
+                let half = self.evals.len() / 2;
+                (self.evals[index], self.evals[index + half])
+            }
+            crate::BindingOrder::LowToHigh => (self.evals[2 * index], self.evals[2 * index + 1]),
+        }
+    }
+
+    /// Evaluates the polynomial at `point` using the multilinear extension formula:
+    /// $$f(r) = \sum_{x \in \{0,1\}^n} f(x) \cdot \widetilde{eq}(x, r)$$
+    pub fn evaluate(&self, point: &[F]) -> F {
+        assert_eq!(
+            point.len(),
+            self.num_vars,
+            "point dimension must match num_vars"
+        );
+        let eq_evals = EqPolynomial::new(point.to_vec()).evaluations();
+
+        #[cfg(feature = "parallel")]
+        {
+            if self.evals.len() >= PAR_THRESHOLD {
+                use rayon::prelude::*;
+                return self
+                    .evals
+                    .par_iter()
+                    .zip(eq_evals.par_iter())
+                    .map(|(&f, &e)| f * e)
+                    .sum();
+            }
+        }
+
+        self.evals
+            .iter()
+            .zip(eq_evals.iter())
+            .map(|(&f, &e)| f * e)
+            .sum()
+    }
+
+    /// Evaluates by sequentially binding each variable, consuming `self`.
+    ///
+    /// More memory-efficient than `evaluate` when the polynomial is no longer needed,
+    /// as it avoids materializing the full eq table.
+    pub fn evaluate_and_consume(mut self, point: &[F]) -> F {
+        assert_eq!(
+            point.len(),
+            self.num_vars,
+            "point dimension must match num_vars"
+        );
+        for &r in point {
+            self.bind(r);
+        }
+        debug_assert_eq!(self.evals.len(), 1);
+        self.evals[0]
+    }
+
+    #[inline]
+    pub fn evaluations(&self) -> &[F] {
+        &self.evals
+    }
+
+    #[inline]
+    pub fn evaluations_mut(&mut self) -> &mut [F] {
+        &mut self.evals
+    }
+}
+
+impl<F: Field> From<Vec<F>> for Polynomial<F> {
+    fn from(evaluations: Vec<F>) -> Self {
+        Self::new(evaluations)
+    }
+}
+
+impl<F: Field> crate::MultilinearEvaluation<F> for Polynomial<F> {
+    #[inline]
+    fn num_vars(&self) -> usize {
+        self.num_vars
+    }
+
+    #[inline]
+    fn len(&self) -> usize {
+        self.evals.len()
+    }
+
+    fn evaluate(&self, point: &[F]) -> F {
+        Polynomial::evaluate(self, point)
+    }
+}
+
+impl<F: Field> crate::MultilinearBinding<F> for Polynomial<F> {
+    fn bind(&mut self, scalar: F) {
+        Polynomial::bind(self, scalar);
+    }
+}
+
+#[inline]
+fn assert_matching_dims<F: Field>(a: &Polynomial<F>, b: &Polynomial<F>) -> (usize, usize) {
+    assert_eq!(
+        a.num_vars, b.num_vars,
+        "num_vars mismatch: {} vs {}",
+        a.num_vars, b.num_vars
+    );
+    (a.num_vars, a.evals.len())
+}
+
+impl<F: Field> Add for Polynomial<F> {
+    type Output = Self;
+
+    fn add(mut self, rhs: Self) -> Self {
+        self += &rhs;
+        self
+    }
+}
+
+impl<F: Field> Add<&Self> for Polynomial<F> {
+    type Output = Self;
+
+    fn add(mut self, rhs: &Self) -> Self {
+        self += rhs;
+        self
+    }
+}
+
+impl<F: Field> AddAssign for Polynomial<F> {
+    fn add_assign(&mut self, rhs: Self) {
+        *self += &rhs;
+    }
+}
+
+impl<F: Field> AddAssign<&Self> for Polynomial<F> {
+    fn add_assign(&mut self, rhs: &Self) {
+        let (_nv, len) = assert_matching_dims(self, rhs);
+
+        #[cfg(feature = "parallel")]
+        {
+            if len >= PAR_THRESHOLD {
+                use rayon::prelude::*;
+                self.evals
+                    .par_iter_mut()
+                    .zip(rhs.evals.par_iter())
+                    .for_each(|(a, b)| *a += *b);
+                return;
+            }
+        }
+
+        for i in 0..len {
+            self.evals[i] += rhs.evals[i];
+        }
+    }
+}
+
+impl<F: Field> Sub for Polynomial<F> {
+    type Output = Self;
+
+    fn sub(mut self, rhs: Self) -> Self {
+        self -= &rhs;
+        self
+    }
+}
+
+impl<F: Field> Sub<&Self> for Polynomial<F> {
+    type Output = Self;
+
+    fn sub(mut self, rhs: &Self) -> Self {
+        self -= rhs;
+        self
+    }
+}
+
+impl<F: Field> SubAssign for Polynomial<F> {
+    fn sub_assign(&mut self, rhs: Self) {
+        *self -= &rhs;
+    }
+}
+
+impl<F: Field> SubAssign<&Self> for Polynomial<F> {
+    fn sub_assign(&mut self, rhs: &Self) {
+        let (_nv, len) = assert_matching_dims(self, rhs);
+
+        #[cfg(feature = "parallel")]
+        {
+            if len >= PAR_THRESHOLD {
+                use rayon::prelude::*;
+                self.evals
+                    .par_iter_mut()
+                    .zip(rhs.evals.par_iter())
+                    .for_each(|(a, b)| *a -= *b);
+                return;
+            }
+        }
+
+        for i in 0..len {
+            self.evals[i] -= rhs.evals[i];
+        }
+    }
+}
+
+impl<F: Field> Mul<F> for Polynomial<F> {
+    type Output = Self;
+
+    fn mul(mut self, rhs: F) -> Self {
+        let len = self.evals.len();
+
+        #[cfg(feature = "parallel")]
+        {
+            if len >= PAR_THRESHOLD {
+                use rayon::prelude::*;
+                self.evals.par_iter_mut().for_each(|a| *a *= rhs);
+                return self;
+            }
+        }
+
+        for i in 0..len {
+            self.evals[i] *= rhs;
+        }
+        self
+    }
+}
+
+impl<F: Field> Mul<F> for &Polynomial<F> {
+    type Output = Polynomial<F>;
+
+    fn mul(self, rhs: F) -> Polynomial<F> {
+        self.clone() * rhs
+    }
+}
+
+impl<F: Field> Neg for Polynomial<F> {
+    type Output = Self;
+
+    fn neg(mut self) -> Self {
+        let len = self.evals.len();
+
+        #[cfg(feature = "parallel")]
+        {
+            if len >= PAR_THRESHOLD {
+                use rayon::prelude::*;
+                self.evals.par_iter_mut().for_each(|a| *a = -*a);
+                return self;
+            }
+        }
+
+        for i in 0..len {
+            self.evals[i] = -self.evals[i];
+        }
+        self
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use jolt_field::Field;
+    use jolt_field::Fr;
+    use num_traits::{One, Zero};
+    use rand_chacha::ChaCha20Rng;
+    use rand_core::SeedableRng;
+
+    #[test]
+    fn bind_to_field_then_evaluate_equals_direct_evaluate() {
+        let mut rng = ChaCha20Rng::seed_from_u64(1);
+        let n = 5;
+        let poly = Polynomial::<Fr>::random(n, &mut rng);
+        let point: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+
+        let direct = poly.evaluate(&point);
+
+        let bound = poly.bind_to_field(point[0]);
+        let via_bind = bound.evaluate(&point[1..]);
+
+        assert_eq!(direct, via_bind);
+    }
+
+    #[test]
+    fn zeros_evaluates_to_zero() {
+        let mut rng = ChaCha20Rng::seed_from_u64(2);
+        let n = 4;
+        let poly = Polynomial::<Fr>::zeros(n);
+        let point: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+        assert!(poly.evaluate(&point).is_zero());
+    }
+
+    #[test]
+    fn bind_matches_bind_to_field() {
+        let mut rng = ChaCha20Rng::seed_from_u64(3);
+        let n = 6;
+        let poly = Polynomial::<Fr>::random(n, &mut rng);
+        let scalar = Fr::random(&mut rng);
+
+        let bound = poly.bind_to_field(scalar);
+
+        let mut poly_mut = poly;
+        poly_mut.bind(scalar);
+
+        assert_eq!(bound.evaluations(), poly_mut.evaluations());
+    }
+
+    #[test]
+    fn evaluate_and_consume_matches_evaluate() {
+        let mut rng = ChaCha20Rng::seed_from_u64(4);
+        let n = 4;
+        let poly = Polynomial::<Fr>::random(n, &mut rng);
+        let point: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+
+        let expected = poly.evaluate(&point);
+        let consumed = poly.clone().evaluate_and_consume(&point);
+        assert_eq!(expected, consumed);
+    }
+
+    #[test]
+    fn empty_polynomial() {
+        let poly = Polynomial::<Fr>::new(vec![]);
+        assert_eq!(poly.num_vars(), 0);
+        assert!(poly.is_empty());
+    }
+
+    #[test]
+    fn single_evaluation() {
+        let val = Fr::from_u64(42);
+        let poly = Polynomial::new(vec![val]);
+        assert_eq!(poly.num_vars(), 0);
+        assert_eq!(poly.evaluate(&[]), val);
+    }
+
+    #[test]
+    fn sequential_bind_equals_full_evaluate() {
+        let mut rng = ChaCha20Rng::seed_from_u64(5);
+        let n = 4;
+        let poly = Polynomial::<Fr>::random(n, &mut rng);
+        let point: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+
+        let mut p = poly.clone();
+        for &r in &point {
+            p.bind(r);
+        }
+        assert_eq!(p.evals.len(), 1);
+        assert_eq!(p.evals[0], poly.evaluate(&point));
+    }
+
+    #[allow(unused)]
+    fn uses_one_trait() {
+        let _ = Fr::one();
+    }
+
+    #[test]
+    fn serde_round_trip() {
+        let mut rng = ChaCha20Rng::seed_from_u64(100);
+        let poly = Polynomial::<Fr>::random(4, &mut rng);
+        let bytes = bincode::serde::encode_to_vec(&poly, bincode::config::standard()).unwrap();
+        let recovered: Polynomial<Fr> =
+            bincode::serde::decode_from_slice(&bytes, bincode::config::standard())
+                .unwrap()
+                .0;
+        assert_eq!(poly, recovered);
+    }
+
+    #[test]
+    fn serde_round_trip_empty() {
+        let poly = Polynomial::<Fr>::new(vec![]);
+        let bytes = bincode::serde::encode_to_vec(&poly, bincode::config::standard()).unwrap();
+        let recovered: Polynomial<Fr> =
+            bincode::serde::decode_from_slice(&bytes, bincode::config::standard())
+                .unwrap()
+                .0;
+        assert_eq!(poly, recovered);
+    }
+
+    #[test]
+    fn serde_round_trip_single() {
+        let poly = Polynomial::new(vec![Fr::from_u64(99)]);
+        let bytes = bincode::serde::encode_to_vec(&poly, bincode::config::standard()).unwrap();
+        let recovered: Polynomial<Fr> =
+            bincode::serde::decode_from_slice(&bytes, bincode::config::standard())
+                .unwrap()
+                .0;
+        assert_eq!(poly, recovered);
+    }
+
+    #[test]
+    fn parallel_bind_matches_bind_to_field() {
+        // n=11 -> 2048 evaluations, above PAR_THRESHOLD=1024
+        let mut rng = ChaCha20Rng::seed_from_u64(201);
+        let n = 11;
+        let poly = Polynomial::<Fr>::random(n, &mut rng);
+        let scalar = Fr::random(&mut rng);
+
+        let bound = poly.bind_to_field(scalar);
+
+        let mut poly_mut = poly;
+        poly_mut.bind(scalar);
+
+        assert_eq!(bound.evaluations(), poly_mut.evaluations());
+    }
+
+    #[test]
+    fn parallel_bind_equals_evaluate_and_consume() {
+        let mut rng = ChaCha20Rng::seed_from_u64(202);
+        let n = 11;
+        let poly = Polynomial::<Fr>::random(n, &mut rng);
+        let point: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+
+        let consumed = poly.clone().evaluate_and_consume(&point);
+
+        let mut p = poly;
+        for &r in &point {
+            p.bind(r);
+        }
+        assert_eq!(p.evals.len(), 1);
+        assert_eq!(p.evals[0], consumed);
+    }
+
+    #[test]
+    fn parallel_bind_then_evaluate_and_consume() {
+        let mut rng = ChaCha20Rng::seed_from_u64(203);
+        let n = 11;
+        let poly = Polynomial::<Fr>::random(n, &mut rng);
+        let point: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+
+        let expected = poly.clone().evaluate_and_consume(&point);
+
+        let bound = poly.bind_to_field(point[0]);
+        let via_bind = bound.evaluate_and_consume(&point[1..]);
+        assert_eq!(expected, via_bind);
+    }
+
+    #[test]
+    fn add_element_wise() {
+        let mut rng = ChaCha20Rng::seed_from_u64(500);
+        let n = 4;
+        let a = Polynomial::<Fr>::random(n, &mut rng);
+        let b = Polynomial::<Fr>::random(n, &mut rng);
+
+        let sum = a.clone() + &b;
+        for i in 0..sum.evaluations().len() {
+            assert_eq!(
+                sum.evaluations()[i],
+                a.evaluations()[i] + b.evaluations()[i]
+            );
+        }
+    }
+
+    #[test]
+    fn sub_element_wise() {
+        let mut rng = ChaCha20Rng::seed_from_u64(501);
+        let n = 4;
+        let a = Polynomial::<Fr>::random(n, &mut rng);
+        let b = Polynomial::<Fr>::random(n, &mut rng);
+
+        let diff = a.clone() - &b;
+        for i in 0..diff.evaluations().len() {
+            assert_eq!(
+                diff.evaluations()[i],
+                a.evaluations()[i] - b.evaluations()[i]
+            );
+        }
+    }
+
+    #[test]
+    fn scalar_mul() {
+        let mut rng = ChaCha20Rng::seed_from_u64(502);
+        let n = 4;
+        let poly = Polynomial::<Fr>::random(n, &mut rng);
+        let s = Fr::random(&mut rng);
+
+        let scaled = poly.clone() * s;
+        for i in 0..scaled.evaluations().len() {
+            assert_eq!(scaled.evaluations()[i], poly.evaluations()[i] * s);
+        }
+    }
+
+    #[test]
+    fn negation() {
+        let mut rng = ChaCha20Rng::seed_from_u64(503);
+        let n = 4;
+        let poly = Polynomial::<Fr>::random(n, &mut rng);
+
+        let neg = -poly.clone();
+        for i in 0..neg.evaluations().len() {
+            assert_eq!(neg.evaluations()[i], -poly.evaluations()[i]);
+        }
+    }
+
+    #[test]
+    fn add_preserves_evaluation() {
+        let mut rng = ChaCha20Rng::seed_from_u64(504);
+        let n = 5;
+        let a = Polynomial::<Fr>::random(n, &mut rng);
+        let b = Polynomial::<Fr>::random(n, &mut rng);
+        let point: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+
+        let sum = a.clone() + &b;
+        assert_eq!(
+            sum.evaluate(&point),
+            a.evaluate(&point) + b.evaluate(&point)
+        );
+    }
+
+    #[test]
+    fn sub_preserves_evaluation() {
+        let mut rng = ChaCha20Rng::seed_from_u64(505);
+        let n = 5;
+        let a = Polynomial::<Fr>::random(n, &mut rng);
+        let b = Polynomial::<Fr>::random(n, &mut rng);
+        let point: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+
+        let diff = a.clone() - &b;
+        assert_eq!(
+            diff.evaluate(&point),
+            a.evaluate(&point) - b.evaluate(&point)
+        );
+    }
+
+    #[test]
+    fn scalar_mul_preserves_evaluation() {
+        let mut rng = ChaCha20Rng::seed_from_u64(506);
+        let n = 5;
+        let poly = Polynomial::<Fr>::random(n, &mut rng);
+        let s = Fr::random(&mut rng);
+        let point: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+
+        let scaled = poly.clone() * s;
+        assert_eq!(scaled.evaluate(&point), poly.evaluate(&point) * s);
+    }
+
+    #[test]
+    #[should_panic(expected = "num_vars mismatch")]
+    fn add_mismatched_num_vars_panics() {
+        let mut rng = ChaCha20Rng::seed_from_u64(510);
+        let a = Polynomial::<Fr>::random(3, &mut rng);
+        let b = Polynomial::<Fr>::random(4, &mut rng);
+        let _ = a + b;
+    }
+
+    #[test]
+    fn add_assign_accumulation() {
+        let mut rng = ChaCha20Rng::seed_from_u64(511);
+        let n = 4;
+        let a = Polynomial::<Fr>::random(n, &mut rng);
+        let b = Polynomial::<Fr>::random(n, &mut rng);
+        let c = Polynomial::<Fr>::random(n, &mut rng);
+
+        let mut acc = a.clone();
+        acc += &b;
+        acc += &c;
+
+        let expected = a.clone() + &b + &c;
+        assert_eq!(acc, expected);
+    }
+
+    #[test]
+    fn neg_double_is_identity() {
+        let mut rng = ChaCha20Rng::seed_from_u64(512);
+        let poly = Polynomial::<Fr>::random(4, &mut rng);
+        assert_eq!(-(-poly.clone()), poly);
+    }
+
+    #[test]
+    fn add_sub_inverse() {
+        let mut rng = ChaCha20Rng::seed_from_u64(513);
+        let n = 4;
+        let a = Polynomial::<Fr>::random(n, &mut rng);
+        let b = Polynomial::<Fr>::random(n, &mut rng);
+
+        let result = (a.clone() + &b) - &b;
+        assert_eq!(result, a);
+    }
+
+    #[test]
+    fn ref_scalar_mul() {
+        let mut rng = ChaCha20Rng::seed_from_u64(514);
+        let n = 4;
+        let poly = Polynomial::<Fr>::random(n, &mut rng);
+        let s = Fr::random(&mut rng);
+
+        let owned_result = poly.clone() * s;
+        let ref_result = &poly * s;
+        assert_eq!(owned_result, ref_result);
+    }
+
+    #[test]
+    fn compact_u8_bind_to_field_matches_dense() {
+        let scalars: Vec<u8> = vec![0, 1, 2, 3, 4, 5, 6, 7];
+        let compact = Polynomial::new(scalars.clone());
+        let dense_evals: Vec<Fr> = scalars.iter().map(|&s| Fr::from(s)).collect();
+        let dense = Polynomial::new(dense_evals);
+
+        let mut rng = ChaCha20Rng::seed_from_u64(10);
+        let scalar = Fr::random(&mut rng);
+
+        assert_eq!(
+            compact.bind_to_field::<Fr>(scalar),
+            dense.bind_to_field(scalar)
+        );
+    }
+
+    #[test]
+    fn compact_u8_sequential_bind_matches_evaluate() {
+        let scalars: Vec<u8> = vec![0, 1, 2, 3, 4, 5, 6, 7];
+        let compact = Polynomial::new(scalars.clone());
+        let dense_evals: Vec<Fr> = scalars.iter().map(|&s| Fr::from(s)).collect();
+        let dense = Polynomial::new(dense_evals);
+
+        let mut rng = ChaCha20Rng::seed_from_u64(10);
+        let point: Vec<Fr> = (0..3).map(|_| Fr::random(&mut rng)).collect();
+
+        let mut bound = compact.bind_to_field::<Fr>(point[0]);
+        for &r in &point[1..] {
+            bound.bind(r);
+        }
+        assert_eq!(bound.evals[0], dense.evaluate(&point));
+    }
+
+    #[test]
+    fn compact_bool_bind_to_field_matches_dense() {
+        let scalars: Vec<bool> = vec![true, false, false, true];
+        let compact = Polynomial::new(scalars.clone());
+        let dense_evals: Vec<Fr> = scalars.iter().map(|&s| Fr::from(s)).collect();
+        let dense = Polynomial::new(dense_evals);
+
+        let mut rng = ChaCha20Rng::seed_from_u64(20);
+        let scalar = Fr::random(&mut rng);
+
+        assert_eq!(
+            compact.bind_to_field::<Fr>(scalar),
+            dense.bind_to_field(scalar)
+        );
+    }
+
+    #[test]
+    fn compact_u16_bind_to_field_matches_dense() {
+        let scalars: Vec<u16> = vec![1, 2, 3, 4, 5, 6, 7, 8];
+        let compact = Polynomial::new(scalars.clone());
+        let dense_evals: Vec<Fr> = scalars.iter().map(|&s| Fr::from(s)).collect();
+        let dense = Polynomial::new(dense_evals);
+
+        let mut rng = ChaCha20Rng::seed_from_u64(30);
+        let scalar = Fr::random(&mut rng);
+
+        assert_eq!(
+            compact.bind_to_field::<Fr>(scalar),
+            dense.bind_to_field(scalar)
+        );
+    }
+
+    #[test]
+    fn compact_i64_bind_to_field_matches_dense() {
+        let scalars: Vec<i64> = vec![-1, 0, 1, -100, i64::MIN, i64::MAX, -42, 42];
+        let compact = Polynomial::new(scalars.clone());
+        let dense_evals: Vec<Fr> = scalars.iter().map(|&s| Fr::from(s)).collect();
+        let dense = Polynomial::new(dense_evals);
+
+        let mut rng = ChaCha20Rng::seed_from_u64(50);
+        let scalar = Fr::random(&mut rng);
+
+        assert_eq!(
+            compact.bind_to_field::<Fr>(scalar),
+            dense.bind_to_field(scalar)
+        );
+    }
+
+    #[test]
+    fn compact_i128_bind_to_field_matches_dense() {
+        let scalars: Vec<i128> = vec![-1, 0, 1, -999, i128::MIN, i128::MAX, -7, 7];
+        let compact = Polynomial::new(scalars.clone());
+        let dense_evals: Vec<Fr> = scalars.iter().map(|&s| Fr::from(s)).collect();
+        let dense = Polynomial::new(dense_evals);
+
+        let mut rng = ChaCha20Rng::seed_from_u64(60);
+        let scalar = Fr::random(&mut rng);
+
+        assert_eq!(
+            compact.bind_to_field::<Fr>(scalar),
+            dense.bind_to_field(scalar)
+        );
+    }
+
+    #[test]
+    fn compact_u128_bind_to_field_matches_dense() {
+        let scalars: Vec<u128> = vec![u128::MAX, u128::MAX - 1, 0, 1];
+        let compact = Polynomial::new(scalars.clone());
+        let dense_evals: Vec<Fr> = scalars.iter().map(|&s| Fr::from(s)).collect();
+        let dense = Polynomial::new(dense_evals);
+
+        let mut rng = ChaCha20Rng::seed_from_u64(70);
+        let scalar = Fr::random(&mut rng);
+
+        assert_eq!(
+            compact.bind_to_field::<Fr>(scalar),
+            dense.bind_to_field(scalar)
+        );
+    }
+
+    #[test]
+    fn compact_bind_chain_consistency() {
+        let scalars: Vec<u32> = vec![10, 20, 30, 40, 50, 60, 70, 80];
+        let compact = Polynomial::new(scalars.clone());
+        let dense_evals: Vec<Fr> = scalars.iter().map(|&s| Fr::from(s)).collect();
+        let dense = Polynomial::new(dense_evals);
+
+        let mut rng = ChaCha20Rng::seed_from_u64(80);
+        let r1 = Fr::random(&mut rng);
+        let r2 = Fr::random(&mut rng);
+        let remaining: Vec<Fr> = (0..1).map(|_| Fr::random(&mut rng)).collect();
+
+        // bind_to_field(r1) then bind(r2) should match dense evaluate
+        let mut bound = compact.bind_to_field::<Fr>(r1);
+        bound.bind(r2);
+        let result = bound.evaluate(&remaining);
+
+        let mut full_point = vec![r1, r2];
+        full_point.extend_from_slice(&remaining);
+        assert_eq!(result, dense.evaluate(&full_point));
+    }
+
+    #[test]
+    fn compact_empty() {
+        let compact = Polynomial::<u8>::new(vec![]);
+        assert_eq!(compact.num_vars(), 0);
+        assert!(compact.is_empty());
+    }
+
+    #[test]
+    fn compact_single_element() {
+        let compact = Polynomial::<u64>::new(vec![42]);
+        assert_eq!(compact.num_vars(), 0);
+        assert_eq!(compact.evals(), &[42u64]);
+    }
+
+    #[test]
+    fn serde_round_trip_compact_u8() {
+        let scalars: Vec<u8> = vec![0, 1, 2, 3, 4, 5, 6, 7];
+        let compact = Polynomial::new(scalars);
+        let bytes = bincode::serde::encode_to_vec(&compact, bincode::config::standard()).unwrap();
+        let recovered: Polynomial<u8> =
+            bincode::serde::decode_from_slice(&bytes, bincode::config::standard())
+                .unwrap()
+                .0;
+
+        let mut rng = ChaCha20Rng::seed_from_u64(40);
+        let scalar = Fr::random(&mut rng);
+        assert_eq!(
+            compact.bind_to_field::<Fr>(scalar),
+            recovered.bind_to_field::<Fr>(scalar)
+        );
+    }
+
+    #[test]
+    fn serde_round_trip_compact_bool() {
+        let scalars: Vec<bool> = vec![true, false, true, false];
+        let compact = Polynomial::new(scalars);
+        let bytes = bincode::serde::encode_to_vec(&compact, bincode::config::standard()).unwrap();
+        let recovered: Polynomial<bool> =
+            bincode::serde::decode_from_slice(&bytes, bincode::config::standard())
+                .unwrap()
+                .0;
+
+        let mut rng = ChaCha20Rng::seed_from_u64(41);
+        let scalar = Fr::random(&mut rng);
+        assert_eq!(
+            compact.bind_to_field::<Fr>(scalar),
+            recovered.bind_to_field::<Fr>(scalar)
+        );
+    }
+
+    #[test]
+    fn low_to_high_binding_produces_correct_evaluation() {
+        let mut rng = ChaCha20Rng::seed_from_u64(900);
+        let n = 5;
+        let poly = Polynomial::<Fr>::random(n, &mut rng);
+        let point: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+
+        // HighToLow binds point[0] first (MSB), so binding sequentially
+        // with point[0], point[1], ... should yield evaluate(point).
+        let mut hi_to_lo = poly.clone();
+        for &r in &point {
+            hi_to_lo.bind_with_order(r, crate::BindingOrder::HighToLow);
+        }
+        assert_eq!(hi_to_lo.len(), 1);
+        assert_eq!(hi_to_lo.evaluations()[0], poly.evaluate(&point));
+
+        // LowToHigh binds point[n-1] first (LSB), so to get the same
+        // evaluation we must reverse the order of challenges.
+        let mut lo_to_hi = poly.clone();
+        for &r in point.iter().rev() {
+            lo_to_hi.bind_with_order(r, crate::BindingOrder::LowToHigh);
+        }
+        assert_eq!(lo_to_hi.len(), 1);
+        assert_eq!(lo_to_hi.evaluations()[0], poly.evaluate(&point));
+    }
+}
diff --git a/crates/jolt-poly/src/eq.rs b/crates/jolt-poly/src/eq.rs
new file mode 100644
index 000000000..6ca5ec8cb
--- /dev/null
+++ b/crates/jolt-poly/src/eq.rs
@@ -0,0 +1,584 @@
+//! Equality polynomial for multilinear evaluation.
+
+use std::ops::{Mul, SubAssign};
+
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+use crate::math::Math;
+use crate::thread::unsafe_allocate_zero_vec;
+
+/// Equality polynomial $\widetilde{eq}(x, r) = \prod_{i=1}^{n}(r_i x_i + (1-r_i)(1-x_i))$.
+///
+/// Given a fixed point $r \in \mathbb{F}^n$, the equality polynomial evaluates to 1
+/// when $x = r$ on the Boolean hypercube and 0 at all other Boolean points. Its
+/// multilinear extension interpolates these values over all of $\mathbb{F}^n$.
+///
+/// This polynomial is fundamental to sumcheck-based protocols where it selects
+/// a single evaluation from a multilinear polynomial:
+/// $$f(r) = \sum_{x \in \{0,1\}^n} f(x) \cdot \widetilde{eq}(x, r)$$
+#[derive(Clone, Debug, Serialize, Deserialize)]
+#[serde(bound = "")]
+#[allow(clippy::unsafe_derive_deserialize)]
+pub struct EqPolynomial<F: Field> {
+    point: Vec<F>,
+}
+
+/// Parallelism threshold: tables larger than this are built with rayon.
+const PAR_THRESHOLD: usize = 1024;
+
+impl<F: Field> EqPolynomial<F> {
+    /// Creates a new equality polynomial for the given point $r \in \mathbb{F}^n$.
+    pub fn new(point: Vec<F>) -> Self {
+        Self { point }
+    }
+
+    /// Number of variables `n` in the fixed point `r`.
+    pub fn num_vars(&self) -> usize {
+        self.point.len()
+    }
+
+    /// Materializes all $2^n$ evaluations of $\widetilde{eq}(\cdot, r)$ over the Boolean hypercube.
+    ///
+    /// Uses a bottom-up doubling construction: starting from `[1]`, each coordinate
+    /// $r_i$ doubles the table by producing entries $e \cdot (1 - r_i)$ and $e \cdot r_i$.
+    ///
+    /// Time: $O(2^n)$. Space: $O(2^n)$.
+    #[tracing::instrument(skip_all, name = "EqPolynomial::evaluations")]
+    pub fn evaluations(&self) -> Vec<F> {
+        let n = self.point.len();
+        let size = 1usize << n;
+        let mut table = Vec::with_capacity(size);
+        table.push(F::one());
+
+        for &r_i in &self.point {
+            let one_minus_r_i = F::one() - r_i;
+            let prev_len = table.len();
+
+            table.resize(prev_len * 2, F::zero());
+
+            // Process in reverse to avoid overwriting entries we still need.
+            // After this loop, table[2*j] = old[j] * (1 - r_i) and
+            // table[2*j+1] = old[j] * r_i.
+            #[cfg(feature = "parallel")]
+            {
+                if prev_len >= PAR_THRESHOLD {
+                    use rayon::prelude::*;
+                    // Snapshot the previous layer so we can scatter into interleaved positions.
+                    let prev: Vec<F> = table[..prev_len].to_vec();
+                    prev.par_iter().enumerate().for_each(|(j, &base)| {
+                        let ptr = table.as_ptr().cast_mut();
+                        // SAFETY: each j maps to disjoint indices 2*j and 2*j+1,
+                        // and 2*j+1 < 2*prev_len = table.len().
+                        unsafe {
+                            ptr.add(2 * j).write(base * one_minus_r_i);
+                            ptr.add(2 * j + 1).write(base * r_i);
+                        }
+                    });
+                } else {
+                    for j in (0..prev_len).rev() {
+                        let base = table[j];
+                        table[2 * j] = base * one_minus_r_i;
+                        table[2 * j + 1] = base * r_i;
+                    }
+                }
+            }
+
+            #[cfg(not(feature = "parallel"))]
+            {
+                for j in (0..prev_len).rev() {
+                    let base = table[j];
+                    table[2 * j] = base * one_minus_r_i;
+                    table[2 * j + 1] = base * r_i;
+                }
+            }
+        }
+
+        table
+    }
+
+    /// Evaluates $\widetilde{eq}(p, r)$ at a single point without materializing the full table.
+    ///
+    /// Computes the product $\prod_{i} (r_i \cdot p_i + (1 - r_i)(1 - p_i))$ directly.
+    ///
+    /// Time: $O(n)$. Space: $O(1)$.
+    #[inline]
+    pub fn evaluate(&self, point: &[F]) -> F {
+        assert_eq!(
+            self.point.len(),
+            point.len(),
+            "eq polynomial dimension mismatch"
+        );
+        self.point
+            .iter()
+            .zip(point.iter())
+            .fold(F::one(), |acc, (&r_i, &p_i)| {
+                acc * (r_i * p_i + (F::one() - r_i) * (F::one() - p_i))
+            })
+    }
+}
+
+/// Static (point-free) evaluation methods for eq polynomial tables.
+///
+/// These accept challenge or field-element slices and produce materialized
+/// tables without constructing an `EqPolynomial` instance. They are used
+/// by split-eq evaluators and sumcheck witnesses.
+impl<F: Field> EqPolynomial<F> {
+    /// Computes `eq(x, y) = Π_i (x_i y_i + (1 - x_i)(1 - y_i))` for two slices.
+    pub fn mle<C>(x: &[C], y: &[C]) -> F
+    where
+        C: Copy + Send + Sync + Into<F>,
+        F: Mul<C, Output = F> + SubAssign<F>,
+    {
+        assert_eq!(x.len(), y.len());
+        x.iter()
+            .zip(y.iter())
+            .map(|(x_i, y_i)| {
+                let x: F = (*x_i).into();
+                let y: F = (*y_i).into();
+                x * y + (F::one() - x) * (F::one() - y)
+            })
+            .fold(F::one(), |acc, v| acc * v)
+    }
+
+    /// Computes `eq(r, 0) = Π_i (1 - r_i)`, selecting the all-zeros vertex.
+    pub fn zero_selector<C>(r: &[C]) -> F
+    where
+        C: Copy + Send + Sync + Into<F>,
+    {
+        r.iter()
+            .map(|r_i| F::one() - (*r_i).into())
+            .fold(F::one(), |acc, v| acc * v)
+    }
+
+    /// Computes `{ eq(r, x) : x ∈ {0,1}^n }` with optional scaling.
+    ///
+    /// Uses a serial or parallel path based on table size. Big-endian index
+    /// order: `r[0]` is the MSB.
+    #[tracing::instrument(skip_all, name = "EqPolynomial::evals")]
+    pub fn evals<C>(r: &[C], scaling_factor: Option<F>) -> Vec<F>
+    where
+        C: Copy + Send + Sync + Into<F>,
+        F: Mul<C, Output = F> + SubAssign<F>,
+    {
+        if r.len() <= 16 {
+            Self::evals_serial(r, scaling_factor)
+        } else {
+            Self::evals_parallel(r, scaling_factor)
+        }
+    }
+
+    /// Serial eq table construction with optional scaling.
+    #[inline]
+    pub fn evals_serial<C>(r: &[C], scaling_factor: Option<F>) -> Vec<F>
+    where
+        C: Copy + Send + Sync + Into<F>,
+        F: Mul<C, Output = F> + SubAssign<F>,
+    {
+        let mut evals: Vec<F> = vec![scaling_factor.unwrap_or(F::one()); r.len().pow2()];
+        let mut size = 1;
+        for r_j in r {
+            size *= 2;
+            for i in (0..size).rev().step_by(2) {
+                let scalar = evals[i / 2];
+                evals[i] = scalar * *r_j;
+                evals[i - 1] = scalar - evals[i];
+            }
+        }
+        evals
+    }
+
+    /// Prefix-cached eq tables: `result[j]` = eq over the prefix `r[..j]`.
+    ///
+    /// Returns `n+1` tables where `result[0] = [scaling_factor]` (eq over 0 vars).
+    /// Big-endian index order.
+    #[tracing::instrument(skip_all, name = "EqPolynomial::evals_cached")]
+    pub fn evals_cached<C>(r: &[C], scaling_factor: Option<F>) -> Vec<Vec<F>>
+    where
+        C: Copy + Send + Sync + Into<F>,
+        F: Mul<C, Output = F> + SubAssign<F>,
+    {
+        let mut evals: Vec<Vec<F>> = (0..=r.len())
+            .map(|i| vec![scaling_factor.unwrap_or(F::one()); 1 << i])
+            .collect();
+        let mut size = 1;
+        for j in 0..r.len() {
+            size *= 2;
+            for i in (0..size).rev().step_by(2) {
+                let scalar = evals[j][i / 2];
+                evals[j + 1][i] = scalar * r[j];
+                evals[j + 1][i - 1] = scalar - evals[j + 1][i];
+            }
+        }
+        evals
+    }
+
+    /// Like [`evals_cached`](Self::evals_cached) but for high-to-low (reverse) binding order.
+    ///
+    /// Returns `result` where `result[j]` contains evaluations for the suffix `r[(n-j)..]`.
+    /// `result[0] = [scaling_factor]`. Builds tables in reverse variable order.
+    pub fn evals_cached_rev<C>(r: &[C], scaling_factor: Option<F>) -> Vec<Vec<F>>
+    where
+        C: Copy + Send + Sync + Into<F>,
+        F: Mul<C, Output = F>,
+    {
+        let rev_r: Vec<_> = r.iter().rev().collect();
+        let mut evals: Vec<Vec<F>> = (0..=r.len())
+            .map(|i| vec![scaling_factor.unwrap_or(F::one()); 1 << i])
+            .collect();
+        let mut size = 1;
+        for j in 0..r.len() {
+            for i in 0..size {
+                let scalar = evals[j][i];
+                let multiple = 1 << j;
+                evals[j + 1][i + multiple] = scalar * *rev_r[j];
+                evals[j + 1][i] = scalar - evals[j + 1][i + multiple];
+            }
+            size *= 2;
+        }
+        evals
+    }
+
+    /// Parallel eq table construction with optional scaling.
+    ///
+    /// Uses rayon to build large layers in parallel. Low-to-high construction:
+    /// processes `r` in reverse so that the first coordinate ends up as the MSB.
+    #[tracing::instrument(skip_all, name = "EqPolynomial::evals_parallel")]
+    #[inline]
+    pub fn evals_parallel<C>(r: &[C], scaling_factor: Option<F>) -> Vec<F>
+    where
+        C: Copy + Send + Sync + Into<F>,
+        F: Mul<C, Output = F> + SubAssign<F>,
+    {
+        let final_size = r.len().pow2();
+        let mut evals: Vec<F> = unsafe_allocate_zero_vec(final_size);
+        let mut size = 1;
+        evals[0] = scaling_factor.unwrap_or(F::one());
+
+        for r in r.iter().rev() {
+            let (evals_left, evals_right) = evals.split_at_mut(size);
+            let (evals_right, _) = evals_right.split_at_mut(size);
+
+            #[cfg(feature = "parallel")]
+            {
+                use rayon::prelude::*;
+                evals_left
+                    .par_iter_mut()
+                    .zip(evals_right.par_iter_mut())
+                    .for_each(|(x, y)| {
+                        *y = *x * *r;
+                        *x -= *y;
+                    });
+            }
+
+            #[cfg(not(feature = "parallel"))]
+            {
+                for i in 0..size {
+                    evals_right[i] = evals_left[i] * *r;
+                    evals_left[i] -= evals_right[i];
+                }
+            }
+
+            size *= 2;
+        }
+
+        evals
+    }
+}
+
+impl<F: Field> crate::MultilinearEvaluation<F> for EqPolynomial<F> {
+    fn num_vars(&self) -> usize {
+        self.point.len()
+    }
+
+    fn len(&self) -> usize {
+        1 << self.point.len()
+    }
+
+    fn evaluate(&self, point: &[F]) -> F {
+        EqPolynomial::evaluate(self, point)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use jolt_field::Field;
+    use jolt_field::Fr;
+    use num_traits::{One, Zero};
+    use rand_chacha::ChaCha20Rng;
+    use rand_core::SeedableRng;
+
+    fn index_to_bits(idx: usize, n: usize) -> Vec<Fr> {
+        (0..n)
+            .map(|i| {
+                if (idx >> (n - 1 - i)) & 1 == 1 {
+                    Fr::one()
+                } else {
+                    Fr::zero()
+                }
+            })
+            .collect()
+    }
+
+    #[test]
+    fn sum_over_hypercube_is_one() {
+        let mut rng = ChaCha20Rng::seed_from_u64(42);
+        let n = 4;
+        let point: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+        let eq = EqPolynomial::new(point);
+        let table = eq.evaluations();
+        let sum: Fr = table.iter().copied().sum();
+        assert_eq!(sum, Fr::one());
+    }
+
+    #[test]
+    fn evaluate_at_boolean_selects_entry() {
+        let mut rng = ChaCha20Rng::seed_from_u64(99);
+        let n = 3;
+        let point: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+        let eq = EqPolynomial::new(point);
+        let table = eq.evaluations();
+
+        for (idx, &entry) in table.iter().enumerate() {
+            let bits = index_to_bits(idx, n);
+            let direct = eq.evaluate(&bits);
+            assert_eq!(direct, entry, "mismatch at index {idx}");
+        }
+    }
+
+    #[test]
+    fn evaluations_matches_evaluate_pointwise() {
+        let mut rng = ChaCha20Rng::seed_from_u64(7);
+        let n = 5;
+        let point: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+        let eq = EqPolynomial::new(point);
+        let table = eq.evaluations();
+
+        for (idx, &entry) in table.iter().enumerate() {
+            let bits = index_to_bits(idx, n);
+            assert_eq!(entry, eq.evaluate(&bits));
+        }
+    }
+
+    #[test]
+    fn parallel_evaluations_sum_is_one() {
+        // num_vars=11 -> 2048 entries, above PAR_THRESHOLD=1024
+        // Verifies the parallel path produces a valid eq table whose entries sum to 1.
+        let mut rng = ChaCha20Rng::seed_from_u64(300);
+        let n = 11;
+        let point: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+        let eq = EqPolynomial::new(point);
+        let table = eq.evaluations();
+
+        assert_eq!(table.len(), 1 << n);
+        let sum: Fr = table.iter().copied().sum();
+        assert_eq!(sum, Fr::one());
+    }
+
+    #[test]
+    fn parallel_evaluations_inner_product_consistency() {
+        // Verifies that the inner product of two eq tables (which computes
+        // eq(r, s) = sum_x eq(x,r)*eq(x,s)) is consistent with evaluate().
+        // This holds regardless of table ordering.
+        let mut rng = ChaCha20Rng::seed_from_u64(303);
+        let n = 11;
+        let r: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+        let s: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+
+        let eq_r = EqPolynomial::new(r.clone());
+        let eq_s = EqPolynomial::new(s.clone());
+
+        let table_r = eq_r.evaluations();
+        let table_s = eq_s.evaluations();
+
+        let inner_product: Fr = table_r
+            .iter()
+            .zip(table_s.iter())
+            .map(|(&a, &b)| a * b)
+            .sum();
+        let direct = eq_r.evaluate(&s);
+        assert_eq!(inner_product, direct);
+    }
+
+    #[test]
+    fn parallel_sum_over_hypercube_is_one() {
+        let mut rng = ChaCha20Rng::seed_from_u64(301);
+        let n = 11;
+        let point: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+        let eq = EqPolynomial::new(point);
+        let table = eq.evaluations();
+        let sum: Fr = table.iter().copied().sum();
+        assert_eq!(sum, Fr::one());
+    }
+
+    #[test]
+    fn evaluate_cross_verification_random_point() {
+        let mut rng = ChaCha20Rng::seed_from_u64(302);
+        let n = 6;
+        let r: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+        let eq = EqPolynomial::new(r);
+        let table = eq.evaluations();
+
+        // Pick a random non-Boolean evaluation point and verify via definition
+        let p: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+        let direct = eq.evaluate(&p);
+
+        // Manual computation: sum over hypercube of eq(x,r) * eq(x,p)
+        // which equals eq(r,p) since sum_x eq(x,r)*eq(x,p) = eq(r,p)
+        let eq_p = EqPolynomial::new(p);
+        let table_p = eq_p.evaluations();
+        let via_tables: Fr = table.iter().zip(table_p.iter()).map(|(&a, &b)| a * b).sum();
+        assert_eq!(direct, via_tables);
+    }
+
+    #[test]
+    fn eq_at_boolean_point_is_one() {
+        // eq(b, b) = 1 for any Boolean vector b ∈ {0,1}^n
+        for n in 1..=5 {
+            for idx in 0..(1 << n) {
+                let bits = index_to_bits(idx, n);
+                let eq = EqPolynomial::new(bits.clone());
+                assert_eq!(
+                    eq.evaluate(&bits),
+                    Fr::one(),
+                    "eq(b, b) != 1 for n={n}, idx={idx}"
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn eq_at_distinct_boolean_points_is_zero() {
+        let n = 3;
+        for i in 0..(1 << n) {
+            for j in 0..(1 << n) {
+                if i == j {
+                    continue;
+                }
+                let bi = index_to_bits(i, n);
+                let bj = index_to_bits(j, n);
+                let eq = EqPolynomial::new(bi);
+                assert!(
+                    eq.evaluate(&bj).is_zero(),
+                    "eq(b_i, b_j) != 0 for i={i}, j={j}"
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn evals_serial_matches_instance() {
+        let mut rng = ChaCha20Rng::seed_from_u64(400);
+        for n in 1..=8 {
+            let r: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+            let instance = EqPolynomial::new(r.clone()).evaluations();
+            let via_static = EqPolynomial::<Fr>::evals_serial(&r, None);
+            assert_eq!(instance, via_static, "mismatch for n={n}");
+        }
+    }
+
+    #[test]
+    fn evals_parallel_matches_serial() {
+        let mut rng = ChaCha20Rng::seed_from_u64(401);
+        for n in [5, 10, 12] {
+            let r: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+            let serial = EqPolynomial::<Fr>::evals_serial(&r, None);
+            let parallel = EqPolynomial::<Fr>::evals_parallel(&r, None);
+            assert_eq!(serial, parallel, "serial vs parallel mismatch for n={n}");
+        }
+    }
+
+    #[test]
+    fn evals_cached_prefix_consistency() {
+        let mut rng = ChaCha20Rng::seed_from_u64(402);
+        for n in 2..=10 {
+            let r: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+            let cached = EqPolynomial::<Fr>::evals_cached(&r, None);
+            assert_eq!(cached.len(), n + 1);
+            assert_eq!(cached[0], vec![Fr::one()]);
+            for i in 0..=n {
+                assert_eq!(cached[i].len(), 1 << i);
+                let direct = EqPolynomial::<Fr>::evals_serial(&r[..i], None);
+                assert_eq!(cached[i], direct, "cached[{i}] mismatch for n={n}");
+            }
+        }
+    }
+
+    #[test]
+    fn evals_cached_rev_consistency() {
+        let mut rng = ChaCha20Rng::seed_from_u64(403);
+        for n in 2..=8 {
+            let r: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+            let cached_rev = EqPolynomial::<Fr>::evals_cached_rev(&r, None);
+            assert_eq!(cached_rev.len(), n + 1);
+            assert_eq!(cached_rev[0], vec![Fr::one()]);
+            for (j, table) in cached_rev.iter().enumerate() {
+                assert_eq!(table.len(), 1 << j);
+            }
+            // The last entry should equal evals over all variables in reverse order
+            let full_rev: Vec<Fr> = r.iter().rev().copied().collect();
+            let full_table = EqPolynomial::<Fr>::evals_serial(&full_rev, None);
+            // Sizes should match but the table is built differently
+            assert_eq!(cached_rev[n].len(), full_table.len());
+        }
+    }
+
+    #[test]
+    fn mle_static_matches_instance_evaluate() {
+        let mut rng = ChaCha20Rng::seed_from_u64(404);
+        let n = 5;
+        let x: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+        let y: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+
+        let via_instance = EqPolynomial::new(x.clone()).evaluate(&y);
+        let via_static = EqPolynomial::<Fr>::mle(&x, &y);
+        assert_eq!(via_instance, via_static);
+    }
+
+    #[test]
+    fn zero_selector() {
+        let mut rng = ChaCha20Rng::seed_from_u64(405);
+        let n = 4;
+        let r: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+
+        let expected = r
+            .iter()
+            .fold(Fr::one(), |acc, &r_i| acc * (Fr::one() - r_i));
+        let result = EqPolynomial::<Fr>::zero_selector(&r);
+        assert_eq!(expected, result);
+    }
+
+    #[test]
+    fn parallel_evaluations_pointwise_correctness() {
+        // Verifies that the parallel path in evaluations() produces the correct
+        // entry at every index — catches layout mismatches (blocked vs interleaved).
+        let mut rng = ChaCha20Rng::seed_from_u64(500);
+        let n = 12; // 4096 entries, well above PAR_THRESHOLD=1024
+        let point: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+        let eq = EqPolynomial::new(point);
+        let table = eq.evaluations();
+        assert_eq!(table.len(), 1 << n);
+
+        for (idx, &entry) in table.iter().enumerate() {
+            let bits = index_to_bits(idx, n);
+            let expected = eq.evaluate(&bits);
+            assert_eq!(entry, expected, "mismatch at index {idx}");
+        }
+    }
+
+    #[test]
+    fn evals_with_scaling() {
+        let mut rng = ChaCha20Rng::seed_from_u64(406);
+        let n = 4;
+        let r: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+        let scale = Fr::from_u64(7);
+
+        let unscaled = EqPolynomial::<Fr>::evals_serial(&r, None);
+        let scaled = EqPolynomial::<Fr>::evals_serial(&r, Some(scale));
+
+        for (u, s) in unscaled.iter().zip(scaled.iter()) {
+            assert_eq!(*u * scale, *s);
+        }
+    }
+}
diff --git a/crates/jolt-poly/src/eq_plus_one.rs b/crates/jolt-poly/src/eq_plus_one.rs
new file mode 100644
index 000000000..edfd21e87
--- /dev/null
+++ b/crates/jolt-poly/src/eq_plus_one.rs
@@ -0,0 +1,328 @@
+//! Equality-plus-one polynomial for shift sumcheck.
+//!
+//! The MLE `eq+1(x, y)` evaluates to 1 when `y = x + 1` (as integers in
+//! `[0, 2^l − 2]`) and 0 otherwise. There is no wrap-around: when `x` is all
+//! ones (the maximum), the polynomial outputs 0 for every `y`.
+//!
+//! This is used in the Spartan shift sumcheck to relate polynomial evaluations
+//! at consecutive cycles.
+//!
+//! Both `x` and `y` are in **big-endian** bit ordering (`point[0]` = MSB).
+
+use jolt_field::Field;
+
+use crate::thread::unsafe_allocate_zero_vec;
+use crate::EqPolynomial;
+
+/// MLE evaluating to 1 iff `y = x + 1` (no wrap at `2^l − 1`).
+///
+/// Stores a fixed point `x` in big-endian order. Call [`evaluate`](Self::evaluate)
+/// to compute `eq+1(x, y)` at any `y`.
+pub struct EqPlusOnePolynomial<F: Field> {
+    /// Fixed point (big-endian: `point[0]` = MSB).
+    point: Vec<F>,
+}
+
+impl<F: Field> EqPlusOnePolynomial<F> {
+    pub fn new(point: Vec<F>) -> Self {
+        Self { point }
+    }
+
+    /// Evaluates `eq+1(x, y)` at a single point `y` (big-endian).
+    ///
+    /// The identity decomposes by suffix length `k` of consecutive 1-bits in `x`:
+    /// - The bottom `k` bits of `x` are 1 and the corresponding bits of `y` are 0.
+    /// - Bit `k` flips: `x[k] = 0`, `y[k] = 1`.
+    /// - All higher bits match.
+    pub fn evaluate(&self, y: &[F]) -> F {
+        let l = self.point.len();
+        let x = &self.point;
+        assert_eq!(y.len(), l, "point length mismatch");
+        let one = F::one();
+
+        (0..l)
+            .map(|k| {
+                let lower_bits: F = (0..k)
+                    .map(|i| x[l - 1 - i] * (one - y[l - 1 - i]))
+                    .product();
+                let flip = (one - x[l - 1 - k]) * y[l - 1 - k];
+                let higher_bits: F = ((k + 1)..l)
+                    .map(|i| {
+                        x[l - 1 - i] * y[l - 1 - i] + (one - x[l - 1 - i]) * (one - y[l - 1 - i])
+                    })
+                    .product();
+                lower_bits * flip * higher_bits
+            })
+            .sum()
+    }
+
+    /// Computes full evaluation tables `(eq_evals, eq_plus_one_evals)` over the
+    /// Boolean hypercube, where:
+    ///
+    /// - `eq_evals[j] = eq(r, j)`
+    /// - `eq_plus_one_evals[j] = eq+1(r, j)`
+    ///
+    /// Both tables are indexed in big-endian order: `j = 0` corresponds to
+    /// the all-zeros vertex.
+    ///
+    /// The `eq` table is built incrementally prefix-by-prefix. At each step
+    /// the `eq+1` contribution for the new bit position is derived from the
+    /// partial `eq` table and a product of the remaining `r` coordinates.
+    pub fn evals(r: &[F], scaling_factor: Option<F>) -> (Vec<F>, Vec<F>) {
+        let ell = r.len();
+        let size = 1usize << ell;
+        let mut eq_evals: Vec<F> = unsafe_allocate_zero_vec(size);
+        eq_evals[0] = scaling_factor.unwrap_or(F::one());
+        let mut eq_plus_one_evals: Vec<F> = unsafe_allocate_zero_vec(size);
+
+        // Build tables incrementally. After processing bit i, the eq table
+        // encodes a prefix of length i+1, stored at strided positions.
+        //
+        // At each step:
+        // 1. Derive eq+1 contributions from the current eq prefix.
+        // 2. Extend the eq table by one more variable r[i].
+        for i in 0..ell {
+            let step = 1usize << (ell - i);
+            let half_step = step / 2;
+
+            // r_lower_product = (1 - r[i]) · Π_{j > i} r[j]
+            let mut r_lower_product = F::one();
+            for &x in r.iter().skip(i + 1) {
+                r_lower_product *= x;
+            }
+            r_lower_product *= F::one() - r[i];
+
+            // Fill eq+1 entries for bit position i.
+            let mut idx = half_step;
+            while idx < size {
+                eq_plus_one_evals[idx] = eq_evals[idx - half_step] * r_lower_product;
+                idx += step;
+            }
+
+            // Extend eq table by variable r[i].
+            // The eq table after i steps has 2^i nonzero entries at stride 2^(ell-i).
+            // After extension, it has 2^(i+1) entries at stride 2^(ell-i-1).
+            // Selected indices: 0, eq_step, 2·eq_step, ... where eq_step = 2^(ell-i-1).
+            // Pairs: (k, k+eq_step) → eq[k+eq_step] = eq[k]·r[i]; eq[k] -= eq[k+eq_step].
+            let eq_step = 1usize << (ell - i - 1);
+            let mut k = 0;
+            while k < size {
+                let val = eq_evals[k] * r[i];
+                eq_evals[k + eq_step] = val;
+                eq_evals[k] -= val;
+                k += eq_step * 2;
+            }
+        }
+
+        (eq_evals, eq_plus_one_evals)
+    }
+}
+
+/// Prefix-suffix decomposition of `eq+1` for sumcheck optimization.
+///
+/// Decomposes `eq+1((r_hi, r_lo), (y_hi, y_lo))` into two rank-1 terms:
+///
+/// ```text
+///   prefix_0(y_lo) · suffix_0(y_hi) + prefix_1(y_lo) · suffix_1(y_hi)
+/// ```
+///
+/// where `r = (r_hi, r_lo)` is split at the midpoint. This enables the first
+/// half of the shift sumcheck to operate on √N-sized buffers rather than N.
+///
+/// See <https://eprint.iacr.org/2025/611.pdf> (Appendix A).
+pub struct EqPlusOnePrefixSuffix<F: Field> {
+    /// Evals of `eq+1(r_lo, j)` for `j ∈ {0,1}^{n/2}`.
+    pub prefix_0: Vec<F>,
+    /// Evals of `eq(r_hi, j)` for `j ∈ {0,1}^{n/2}`.
+    pub suffix_0: Vec<F>,
+    /// `is_max(r_lo) · is_min(j)` — nonzero only at `j = 0`.
+    pub prefix_1: Vec<F>,
+    /// Evals of `eq+1(r_hi, j)` for `j ∈ {0,1}^{n/2}`.
+    pub suffix_1: Vec<F>,
+}
+
+impl<F: Field> EqPlusOnePrefixSuffix<F> {
+    /// Creates the decomposition from a big-endian point `r`.
+    ///
+    /// Splits at `r.len() / 2`: the first half is `r_hi`, the second is `r_lo`.
+    pub fn new(r: &[F]) -> Self {
+        let mid = r.len() / 2;
+        let (r_hi, r_lo) = r.split_at(mid);
+
+        // is_max(r_lo) = eq((1,...,1), r_lo) = Π r_lo[i]
+        let ones: Vec<F> = vec![F::one(); r_lo.len()];
+        let is_max_eval = EqPolynomial::<F>::mle(&ones, r_lo);
+
+        let mut prefix_1 = vec![F::zero(); 1 << r_lo.len()];
+        prefix_1[0] = is_max_eval;
+
+        let (suffix_0, suffix_1) = EqPlusOnePolynomial::evals(r_hi, None);
+
+        Self {
+            prefix_0: EqPlusOnePolynomial::evals(r_lo, None).1,
+            suffix_0,
+            prefix_1,
+            suffix_1,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use jolt_field::{Field, Fr};
+    use num_traits::{One, Zero};
+    use rand_chacha::ChaCha20Rng;
+    use rand_core::SeedableRng;
+
+    fn index_to_bits(idx: usize, n: usize) -> Vec<Fr> {
+        (0..n)
+            .map(|i| {
+                if (idx >> (n - 1 - i)) & 1 == 1 {
+                    Fr::one()
+                } else {
+                    Fr::zero()
+                }
+            })
+            .collect()
+    }
+
+    #[test]
+    fn successor_at_boolean_points() {
+        // For l=3, eq+1(x, y) = 1 iff y = x + 1 (no wrap at 7).
+        let l = 3;
+        for x_int in 0..(1 << l) {
+            let x_bits = index_to_bits(x_int, l);
+            let eq_plus_one = EqPlusOnePolynomial::new(x_bits);
+
+            for y_int in 0..(1 << l) {
+                let y_bits = index_to_bits(y_int, l);
+                let val = eq_plus_one.evaluate(&y_bits);
+                if x_int < (1 << l) - 1 && y_int == x_int + 1 {
+                    assert_eq!(val, Fr::one(), "eq+1({x_int}, {y_int}) should be 1");
+                } else {
+                    assert!(val.is_zero(), "eq+1({x_int}, {y_int}) should be 0");
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn no_wraparound_at_max() {
+        // eq+1(all_ones, 0) = 0 (no wrap-around).
+        let l = 4;
+        let x = vec![Fr::one(); l];
+        let y = vec![Fr::zero(); l];
+        let eq_plus_one = EqPlusOnePolynomial::new(x);
+        assert!(eq_plus_one.evaluate(&y).is_zero());
+    }
+
+    #[test]
+    fn evals_table_matches_pointwise() {
+        let mut rng = ChaCha20Rng::seed_from_u64(42);
+        let l = 4;
+        let r: Vec<Fr> = (0..l).map(|_| Fr::random(&mut rng)).collect();
+
+        let (eq_evals, eq_plus_one_evals) = EqPlusOnePolynomial::evals(&r, None);
+
+        let eq_poly = EqPolynomial::new(r.clone());
+        let eq_plus_one_poly = EqPlusOnePolynomial::new(r);
+
+        for idx in 0..(1 << l) {
+            let bits = index_to_bits(idx, l);
+            assert_eq!(
+                eq_evals[idx],
+                eq_poly.evaluate(&bits),
+                "eq mismatch at {idx}"
+            );
+            assert_eq!(
+                eq_plus_one_evals[idx],
+                eq_plus_one_poly.evaluate(&bits),
+                "eq+1 mismatch at {idx}"
+            );
+        }
+    }
+
+    #[test]
+    fn evals_with_scaling() {
+        let mut rng = ChaCha20Rng::seed_from_u64(99);
+        let l = 3;
+        let r: Vec<Fr> = (0..l).map(|_| Fr::random(&mut rng)).collect();
+        let scale = Fr::from_u64(5);
+
+        let (eq_unscaled, _) = EqPlusOnePolynomial::evals(&r, None);
+        let (eq_scaled, _) = EqPlusOnePolynomial::evals(&r, Some(scale));
+
+        for (u, s) in eq_unscaled.iter().zip(eq_scaled.iter()) {
+            assert_eq!(*u * scale, *s);
+        }
+    }
+
+    #[test]
+    fn prefix_suffix_matches_direct() {
+        let mut rng = ChaCha20Rng::seed_from_u64(123);
+        let l = 4;
+        let r: Vec<Fr> = (0..l).map(|_| Fr::random(&mut rng)).collect();
+
+        let eq_plus_one_direct = EqPlusOnePolynomial::new(r.clone());
+
+        let ps = EqPlusOnePrefixSuffix::new(&r);
+
+        // Verify at a random evaluation point y = (y_hi, y_lo).
+        let y: Vec<Fr> = (0..l).map(|_| Fr::random(&mut rng)).collect();
+        let (y_hi, y_lo) = y.split_at(l / 2);
+
+        let p0_eval = crate::Polynomial::new(ps.prefix_0).evaluate(y_lo);
+        let s0_eval = crate::Polynomial::new(ps.suffix_0).evaluate(y_hi);
+        let p1_eval = crate::Polynomial::new(ps.prefix_1).evaluate(y_lo);
+        let s1_eval = crate::Polynomial::new(ps.suffix_1).evaluate(y_hi);
+
+        let via_decomp = p0_eval * s0_eval + p1_eval * s1_eval;
+        let via_direct = eq_plus_one_direct.evaluate(&y);
+        assert_eq!(via_decomp, via_direct);
+    }
+
+    #[test]
+    fn prefix_suffix_multiple_random_points() {
+        let mut rng = ChaCha20Rng::seed_from_u64(456);
+        for l in [4, 6, 8] {
+            let r: Vec<Fr> = (0..l).map(|_| Fr::random(&mut rng)).collect();
+            let direct = EqPlusOnePolynomial::new(r.clone());
+            let ps = EqPlusOnePrefixSuffix::new(&r);
+
+            for _ in 0..5 {
+                let y: Vec<Fr> = (0..l).map(|_| Fr::random(&mut rng)).collect();
+                let (y_hi, y_lo) = y.split_at(l / 2);
+
+                let p0 = crate::Polynomial::new(ps.prefix_0.clone()).evaluate(y_lo);
+                let s0 = crate::Polynomial::new(ps.suffix_0.clone()).evaluate(y_hi);
+                let p1 = crate::Polynomial::new(ps.prefix_1.clone()).evaluate(y_lo);
+                let s1 = crate::Polynomial::new(ps.suffix_1.clone()).evaluate(y_hi);
+
+                assert_eq!(
+                    p0 * s0 + p1 * s1,
+                    direct.evaluate(&y),
+                    "decomposition mismatch for l={l}"
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn eq_plus_one_sum_over_hypercube() {
+        // For random r, sum_y eq+1(r, y) should equal 1 - eq(r, max).
+        // Because eq+1 maps x → x+1 for x in [0, 2^l-2], so it covers
+        // all y in [1, 2^l-1], missing y=0 and hitting y=(2^l-1) only if
+        // x=(2^l-2). The sum should be 1 - Π r_i (the missing all-ones term).
+        let mut rng = ChaCha20Rng::seed_from_u64(789);
+        let l = 5;
+        let r: Vec<Fr> = (0..l).map(|_| Fr::random(&mut rng)).collect();
+
+        let (_, eq_plus_one_evals) = EqPlusOnePolynomial::evals(&r, None);
+        let sum: Fr = eq_plus_one_evals.iter().copied().sum();
+
+        let r_product: Fr = r.iter().copied().product();
+        let expected = Fr::one() - r_product;
+        assert_eq!(sum, expected);
+    }
+}
diff --git a/crates/jolt-poly/src/identity.rs b/crates/jolt-poly/src/identity.rs
new file mode 100644
index 000000000..f050fccec
--- /dev/null
+++ b/crates/jolt-poly/src/identity.rs
@@ -0,0 +1,101 @@
+//! Identity polynomial evaluating to the integer index on the Boolean hypercube.
+
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+/// Identity polynomial: $\widetilde{I}(x) = \sum_{i=0}^{2^n - 1} i \cdot \widetilde{eq}(x, i)$.
+///
+/// At each Boolean hypercube point $b \in \{0,1\}^n$, this polynomial evaluates to the
+/// integer whose binary representation is $b$ (most-significant bit first). Its multilinear
+/// extension at an arbitrary point $r \in \mathbb{F}^n$ is:
+/// $$\widetilde{I}(r_1, \ldots, r_n) = \sum_{i=1}^{n} r_i \cdot 2^{n-i}$$
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct IdentityPolynomial {
+    num_vars: usize,
+}
+
+impl IdentityPolynomial {
+    /// Creates an identity polynomial over $n$ variables.
+    pub fn new(num_vars: usize) -> Self {
+        Self { num_vars }
+    }
+
+    pub fn num_vars(&self) -> usize {
+        self.num_vars
+    }
+
+    /// Evaluates $\widetilde{I}(r) = \sum_{i=1}^{n} r_i \cdot 2^{n-i}$.
+    ///
+    /// Time: $O(n)$. No heap allocation.
+    #[inline]
+    pub fn evaluate<F: Field>(&self, point: &[F]) -> F {
+        assert_eq!(
+            point.len(),
+            self.num_vars,
+            "point dimension must match num_vars"
+        );
+        let n = self.num_vars;
+        point
+            .iter()
+            .enumerate()
+            .fold(F::zero(), |acc, (i, &r_i)| acc + r_i.mul_pow_2(n - 1 - i))
+    }
+}
+
+impl<F: Field> crate::MultilinearEvaluation<F> for IdentityPolynomial {
+    fn num_vars(&self) -> usize {
+        self.num_vars
+    }
+
+    fn len(&self) -> usize {
+        1 << self.num_vars
+    }
+
+    fn evaluate(&self, point: &[F]) -> F {
+        IdentityPolynomial::evaluate(self, point)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use jolt_field::Field;
+    use jolt_field::Fr;
+    use num_traits::{One, Zero};
+
+    #[test]
+    fn evaluate_at_boolean_points_returns_index() {
+        let n = 4;
+        let id = IdentityPolynomial::new(n);
+
+        for idx in 0..(1usize << n) {
+            let bits: Vec<Fr> = (0..n)
+                .map(|i| {
+                    if (idx >> (n - 1 - i)) & 1 == 1 {
+                        Fr::one()
+                    } else {
+                        Fr::zero()
+                    }
+                })
+                .collect();
+            assert_eq!(
+                id.evaluate(&bits),
+                Fr::from_u64(idx as u64),
+                "mismatch at index {idx}"
+            );
+        }
+    }
+
+    #[test]
+    fn zero_vars() {
+        let id = IdentityPolynomial::new(0);
+        assert!(id.evaluate::<Fr>(&[]).is_zero());
+    }
+
+    #[test]
+    fn single_var() {
+        let id = IdentityPolynomial::new(1);
+        assert!(id.evaluate(&[Fr::zero()]).is_zero());
+        assert_eq!(id.evaluate(&[Fr::one()]), Fr::one());
+    }
+}
diff --git a/crates/jolt-poly/src/lagrange.rs b/crates/jolt-poly/src/lagrange.rs
new file mode 100644
index 000000000..66213f320
--- /dev/null
+++ b/crates/jolt-poly/src/lagrange.rs
@@ -0,0 +1,347 @@
+//! Lagrange interpolation utilities over integer domains.
+//!
+//! Provides building blocks for the univariate skip optimization in sumcheck
+//! protocols. All functions are generic over [`Field`] and operate on
+//! integer-indexed domains (symmetric or arbitrary).
+
+use jolt_field::Field;
+
+/// Evaluates all Lagrange basis polynomials $L_0(r), \ldots, L_{N-1}(r)$ over
+/// the domain $\{s, s+1, \ldots, s+N-1\}$ where $s$ = `domain_start`.
+///
+/// Uses the barycentric formula with $O(N^2)$ work for weight computation
+/// and $O(N)$ per-element inversions.
+///
+/// # Panics
+/// Panics if `domain_size` is zero.
+pub fn lagrange_evals<F: Field>(domain_start: i64, domain_size: usize, r: F) -> Vec<F> {
+    assert!(domain_size > 0, "domain_size must be positive");
+
+    // Check if r coincides with a grid point (early exit)
+    let nodes: Vec<F> = (0..domain_size)
+        .map(|k| F::from_i64(domain_start + k as i64))
+        .collect();
+
+    for (i, &node) in nodes.iter().enumerate() {
+        if r == node {
+            let mut result = vec![F::zero(); domain_size];
+            result[i] = F::one();
+            return result;
+        }
+    }
+
+    // Compute (r - x_0)(r - x_1)...(r - x_{N-1})
+    let diffs: Vec<F> = nodes.iter().map(|&x| r - x).collect();
+    let full_product: F = diffs.iter().copied().product();
+
+    // Barycentric weights: w_i = 1 / prod_{j != i} (x_i - x_j)
+    // For consecutive integers {s, s+1, ..., s+N-1}, the denominator is
+    // prod_{j != i} (i - j) which equals (-1)^{N-1-i} * i! * (N-1-i)!
+    let mut weights = vec![F::one(); domain_size];
+    for (i, wi) in weights.iter_mut().enumerate() {
+        for j in 0..domain_size {
+            if i != j {
+                let diff = (i as i64) - (j as i64);
+                *wi *= F::from_i64(diff);
+            }
+        }
+        *wi = wi.inverse().expect("Lagrange weights must be invertible");
+    }
+
+    // L_i(r) = full_product * w_i / (r - x_i)
+    let mut result = Vec::with_capacity(domain_size);
+    for i in 0..domain_size {
+        let diff_inv = diffs[i]
+            .inverse()
+            .expect("r should not coincide with a node");
+        result.push(full_product * weights[i] * diff_inv);
+    }
+
+    result
+}
+
+/// Computes power sums $S_k = \sum_{t=-D}^{D} t^k$ for $k = 0, 1, \ldots, \text{num\_powers}-1$
+/// over the symmetric integer domain $\{-D, \ldots, D\}$ of size $2D+1$.
+///
+/// Returns integer power sums as `i128`. Odd-power sums are zero by symmetry.
+///
+/// Used by the verifier to check $\sum_{Y \in W} p(Y) = \text{claimed\_sum}$
+/// without evaluating $p$ at every domain point.
+pub fn symmetric_power_sums(half_width: i64, num_powers: usize) -> Vec<i128> {
+    let mut sums = vec![0i128; num_powers];
+    for t in -half_width..=half_width {
+        let mut power = 1i128;
+        for s in &mut sums {
+            *s += power;
+            power *= t as i128;
+        }
+    }
+    sums
+}
+
+/// Polynomial multiplication in coefficient form.
+///
+/// Given $p(x) = \sum a_i x^i$ and $q(x) = \sum b_j x^j$, returns
+/// coefficients of $p \cdot q$ of length `a.len() + b.len() - 1`.
+///
+/// Returns empty if either input is empty.
+pub fn poly_mul<F: Field>(a: &[F], b: &[F]) -> Vec<F> {
+    if a.is_empty() || b.is_empty() {
+        return Vec::new();
+    }
+    let n = a.len() + b.len() - 1;
+    let mut result = vec![F::zero(); n];
+    for (i, &ai) in a.iter().enumerate() {
+        for (j, &bj) in b.iter().enumerate() {
+            result[i + j] += ai * bj;
+        }
+    }
+    result
+}
+
+/// Interpolates evaluations at consecutive integers to monomial coefficients.
+///
+/// Given values $[f(s), f(s+1), \ldots, f(s+N-1)]$ where $s$ = `domain_start`,
+/// returns the unique polynomial of degree $\leq N-1$ in coefficient form
+/// $[c_0, c_1, \ldots, c_{N-1}]$ such that $p(x) = \sum c_i x^i$.
+///
+/// Uses Newton's divided differences for $O(N^2)$ work.
+///
+/// # Panics
+/// Panics if `values` is empty.
+pub fn interpolate_to_coeffs<F: Field>(domain_start: i64, values: &[F]) -> Vec<F> {
+    let n = values.len();
+    assert!(n > 0, "cannot interpolate zero values");
+
+    // Newton's divided differences: dd[i] = f[x_i, ..., x_{i-step}]
+    // For consecutive integer nodes x_k = s+k, the denominator is always `step`.
+    let mut dd = values.to_vec();
+    for step in 1..n {
+        let denom_inv = F::from_i64(step as i64)
+            .inverse()
+            .expect("divided difference denominator must be invertible");
+        for i in (step..n).rev() {
+            dd[i] = (dd[i] - dd[i - 1]) * denom_inv;
+        }
+    }
+
+    // Convert Newton form (with nodes s, s+1, ...) to monomial form.
+    // p(x) = dd[0] + dd[1]*(x-s) + dd[2]*(x-s)*(x-s-1) + ...
+    let mut coeffs = vec![F::zero(); n];
+    // basis[k] = coefficient-form of (x-s)(x-s-1)...(x-s-k+1)
+    let mut basis = vec![F::zero(); n];
+    basis[0] = F::one();
+
+    for (k, &dd_k) in dd.iter().enumerate() {
+        // Add dd[k] * basis to coeffs
+        for (i, &b) in basis.iter().enumerate().take(k + 1) {
+            coeffs[i] += dd_k * b;
+        }
+        // Update basis: multiply by (x - (s + k))
+        if k < n - 1 {
+            let shift = F::from_i64(-(domain_start + k as i64));
+            // basis = basis * (x + shift) = basis * x + basis * shift
+            // Process in reverse to avoid overwriting
+            for i in (1..=k + 1).rev() {
+                basis[i] = basis[i - 1] + basis[i] * shift;
+            }
+            basis[0] *= shift;
+        }
+    }
+
+    coeffs
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use jolt_field::Fr;
+    use num_traits::{One, Zero};
+
+    #[test]
+    fn lagrange_evals_partition_of_unity() {
+        // Sum of all Lagrange basis values at any point must be 1
+        let r = Fr::from_u64(42);
+        let evals = lagrange_evals(0, 5, r);
+        let sum: Fr = evals.iter().copied().sum();
+        assert_eq!(sum, Fr::one());
+    }
+
+    #[test]
+    fn lagrange_evals_at_node_is_indicator() {
+        for i in 0..5u64 {
+            let r = Fr::from_u64(i);
+            let evals = lagrange_evals(0, 5, r);
+            for (j, &val) in evals.iter().enumerate() {
+                if j == i as usize {
+                    assert_eq!(val, Fr::one(), "L_{j}({i}) should be 1");
+                } else {
+                    assert!(val.is_zero(), "L_{j}({i}) should be 0");
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn lagrange_evals_symmetric_domain() {
+        // Domain {-2, -1, 0, 1, 2}
+        let r = Fr::from_u64(7);
+        let evals = lagrange_evals(-2, 5, r);
+        let sum: Fr = evals.iter().copied().sum();
+        assert_eq!(sum, Fr::one());
+    }
+
+    #[test]
+    fn lagrange_evals_symmetric_at_node() {
+        // r = -1 is the second node in {-2, -1, 0, 1, 2}
+        let r = Fr::from_i64(-1);
+        let evals = lagrange_evals(-2, 5, r);
+        assert_eq!(evals[1], Fr::one());
+        for (i, &val) in evals.iter().enumerate() {
+            if i != 1 {
+                assert!(val.is_zero());
+            }
+        }
+    }
+
+    #[test]
+    fn symmetric_power_sums_basic() {
+        // Domain {-1, 0, 1}: S_0 = 3, S_1 = 0, S_2 = 2
+        let sums = symmetric_power_sums(1, 4);
+        assert_eq!(sums[0], 3);
+        assert_eq!(sums[1], 0); // symmetric
+        assert_eq!(sums[2], 2); // (-1)^2 + 0 + 1^2
+        assert_eq!(sums[3], 0); // symmetric
+    }
+
+    #[test]
+    fn symmetric_power_sums_width_2() {
+        // Domain {-2, -1, 0, 1, 2}: S_0 = 5, S_1 = 0, S_2 = 10
+        let sums = symmetric_power_sums(2, 3);
+        assert_eq!(sums[0], 5);
+        assert_eq!(sums[1], 0);
+        assert_eq!(sums[2], 10); // 4 + 1 + 0 + 1 + 4
+    }
+
+    #[test]
+    fn poly_mul_basic() {
+        // (1 + 2x) * (3 + x) = 3 + 7x + 2x^2
+        let a = [Fr::from_u64(1), Fr::from_u64(2)];
+        let b = [Fr::from_u64(3), Fr::from_u64(1)];
+        let c = poly_mul(&a, &b);
+        assert_eq!(c.len(), 3);
+        assert_eq!(c[0], Fr::from_u64(3));
+        assert_eq!(c[1], Fr::from_u64(7));
+        assert_eq!(c[2], Fr::from_u64(2));
+    }
+
+    #[test]
+    fn poly_mul_empty() {
+        let a: [Fr; 0] = [];
+        let b = [Fr::from_u64(1)];
+        assert!(poly_mul(&a, &b).is_empty());
+    }
+
+    #[test]
+    fn interpolate_to_coeffs_constant() {
+        // f(0) = f(1) = f(2) = 5 → p(x) = 5
+        let vals = [Fr::from_u64(5), Fr::from_u64(5), Fr::from_u64(5)];
+        let coeffs = interpolate_to_coeffs(0, &vals);
+        assert_eq!(coeffs[0], Fr::from_u64(5));
+        assert!(coeffs[1].is_zero());
+        assert!(coeffs[2].is_zero());
+    }
+
+    #[test]
+    fn interpolate_to_coeffs_linear() {
+        // f(0) = 1, f(1) = 3 → p(x) = 1 + 2x
+        let vals = [Fr::from_u64(1), Fr::from_u64(3)];
+        let coeffs = interpolate_to_coeffs(0, &vals);
+        assert_eq!(coeffs[0], Fr::from_u64(1));
+        assert_eq!(coeffs[1], Fr::from_u64(2));
+    }
+
+    #[test]
+    fn interpolate_to_coeffs_quadratic() {
+        // f(0) = 1, f(1) = 4, f(2) = 11 → p(x) = 1 + x + 2x^2
+        // p(0)=1, p(1)=1+1+2=4, p(2)=1+2+8=11 ✓
+        let vals = [Fr::from_u64(1), Fr::from_u64(4), Fr::from_u64(11)];
+        let coeffs = interpolate_to_coeffs(0, &vals);
+        // Verify by evaluating at each point
+        for (i, &expected) in vals.iter().enumerate() {
+            let x = Fr::from_u64(i as u64);
+            let mut val = Fr::zero();
+            let mut x_pow = Fr::one();
+            for &c in &coeffs {
+                val += c * x_pow;
+                x_pow *= x;
+            }
+            assert_eq!(val, expected, "mismatch at x={i}");
+        }
+    }
+
+    #[test]
+    fn interpolate_to_coeffs_symmetric_domain() {
+        // Domain {-1, 0, 1}: f(-1)=2, f(0)=1, f(1)=2 → p(x) = 1 + x^2
+        let vals = [Fr::from_u64(2), Fr::from_u64(1), Fr::from_u64(2)];
+        let coeffs = interpolate_to_coeffs(-1, &vals);
+
+        // Verify at each domain point
+        for (k, &expected) in vals.iter().enumerate() {
+            let x = Fr::from_i64(-1 + k as i64);
+            let mut val = Fr::zero();
+            let mut x_pow = Fr::one();
+            for &c in &coeffs {
+                val += c * x_pow;
+                x_pow *= x;
+            }
+            assert_eq!(val, expected, "mismatch at x={}", -1 + k as i64);
+        }
+    }
+
+    #[test]
+    fn interpolate_roundtrip_with_poly_mul() {
+        // Interpolate, multiply by (x - 5), check evaluations
+        let vals = [Fr::from_u64(3), Fr::from_u64(7), Fr::from_u64(13)];
+        let coeffs = interpolate_to_coeffs(0, &vals);
+        let linear = [Fr::from_i64(-5), Fr::one()]; // (x - 5)
+        let product = poly_mul(&coeffs, &linear);
+
+        // Verify product at x = 0,1,2
+        for (i, &f_val) in vals.iter().enumerate() {
+            let x = Fr::from_u64(i as u64);
+            let mut val = Fr::zero();
+            let mut x_pow = Fr::one();
+            for &c in &product {
+                val += c * x_pow;
+                x_pow *= x;
+            }
+            let expected = f_val * (x - Fr::from_u64(5));
+            assert_eq!(val, expected, "product mismatch at x={i}");
+        }
+    }
+
+    #[test]
+    fn lagrange_evals_agrees_with_interpolation() {
+        // Verify that lagrange_evals computes the same as interpolating
+        // indicator values and evaluating at r
+        let r = Fr::from_u64(17);
+        let domain_start = -3i64;
+        let domain_size = 7;
+        let evals = lagrange_evals(domain_start, domain_size, r);
+
+        for i in 0..domain_size {
+            // Indicator values: 1 at position i, 0 elsewhere
+            let mut indicator = vec![Fr::zero(); domain_size];
+            indicator[i] = Fr::one();
+            let coeffs = interpolate_to_coeffs(domain_start, &indicator);
+            let mut val = Fr::zero();
+            let mut x_pow = Fr::one();
+            for &c in &coeffs {
+                val += c * x_pow;
+                x_pow *= r;
+            }
+            assert_eq!(evals[i], val, "L_{i}(17) mismatch");
+        }
+    }
+}
diff --git a/crates/jolt-poly/src/lib.rs b/crates/jolt-poly/src/lib.rs
new file mode 100644
index 000000000..308d37d22
--- /dev/null
+++ b/crates/jolt-poly/src/lib.rs
@@ -0,0 +1,39 @@
+//! Polynomial types and operations for multilinear, univariate, and
+//! specialized polynomials. Backend-agnostic and reusable outside Jolt.
+//!
+//! This crate provides the core polynomial abstractions used throughout
+//! the Jolt zkVM proving system:
+//!
+//! - [`Polynomial`]: Evaluation table over the Boolean hypercube, generic over
+//!   scalar type (`Field` for dense, primitives like `u8`/`bool` for compact)
+//! - [`EqPolynomial`]: Equality polynomial for sumcheck evaluation
+//! - [`UnivariatePoly`]: Coefficient-form univariate polynomial with Lagrange interpolation
+//! - [`CompressedPoly`]: Compressed univariate with the linear term omitted (for proof size)
+//! - [`IdentityPolynomial`]: Maps hypercube points to their integer index
+
+mod binding;
+mod compressed_univariate;
+mod cpu_polynomial;
+mod eq;
+mod eq_plus_one;
+mod identity;
+pub mod lagrange;
+mod lt;
+pub mod math;
+mod multilinear;
+mod one_hot;
+mod source;
+pub mod thread;
+mod univariate;
+
+pub use binding::BindingOrder;
+pub use compressed_univariate::CompressedPoly;
+pub use cpu_polynomial::Polynomial;
+pub use eq::EqPolynomial;
+pub use eq_plus_one::{EqPlusOnePolynomial, EqPlusOnePrefixSuffix};
+pub use identity::IdentityPolynomial;
+pub use lt::LtPolynomial;
+pub use multilinear::{MultilinearBinding, MultilinearEvaluation};
+pub use one_hot::OneHotPolynomial;
+pub use source::{MultilinearPoly, RlcSource};
+pub use univariate::{UnivariatePoly, UnivariatePolynomial};
diff --git a/crates/jolt-poly/src/lt.rs b/crates/jolt-poly/src/lt.rs
new file mode 100644
index 000000000..85260a5e8
--- /dev/null
+++ b/crates/jolt-poly/src/lt.rs
@@ -0,0 +1,388 @@
+//! Less-than polynomial for value accumulation sumchecks.
+//!
+//! The MLE `LT(x, y)` evaluates to 1 on Boolean inputs when `x < y` as
+//! integers and 0 otherwise. Its multilinear extension is:
+//!
+//! $$\text{LT}(x, y) = \sum_{i} (1 - x_i) \cdot y_i \cdot \text{eq}(x_{i+1:}, y_{i+1:})$$
+//!
+//! where the sum runs from MSB to LSB (big-endian bit ordering).
+//!
+//! Used in the register/RAM value evaluation sumcheck to accumulate writes
+//! that occurred before a given cycle point.
+//!
+//! # Split optimization
+//!
+//! Rather than materializing the full `2^n` table and binding it each round
+//! (O(n·2^n) total work, O(2^n) memory), `LtPolynomial` splits the point
+//! `r` at the midpoint into `(r_hi, r_lo)` and stores three √N-sized tables:
+//!
+//! ```text
+//! LT(j, r) = LT(j_hi, r_hi) + eq(j_hi, r_hi) · LT(j_lo, r_lo)
+//! ```
+//!
+//! where `j = (j_hi, j_lo)`. Binding proceeds HighToLow: first all hi vars
+//! (shrinking `lt_hi` and `eq_hi`), then all lo vars (shrinking `lt_lo`).
+//! Total memory stays at 3 · √N throughout.
+
+use jolt_field::Field;
+
+use crate::EqPolynomial;
+
+/// Split less-than polynomial for efficient sumcheck binding.
+///
+/// Stores three sub-tables of size ≤ √N each, reconstructing full-table
+/// values on demand via `LT[j] = lt_hi[j_hi] + eq_hi[j_hi] · lt_lo[j_lo]`.
+///
+/// Supports HighToLow binding only (MSB first).
+pub struct LtPolynomial<F: Field> {
+    lt_lo: Vec<F>,
+    lt_hi: Vec<F>,
+    eq_hi: Vec<F>,
+    n_lo_vars: usize,
+    n_hi_vars: usize,
+}
+
+impl<F: Field> LtPolynomial<F> {
+    /// Creates a split LT polynomial for the fixed point `r` (big-endian).
+    ///
+    /// Splits at `r.len() / 2`: the first half is `r_hi`, the second is `r_lo`.
+    /// For odd-length `r`, `r_hi` gets the extra variable.
+    pub fn new(r: &[F]) -> Self {
+        let mid = r.len() / 2;
+        let (r_hi, r_lo) = r.split_at(r.len() - mid);
+
+        Self {
+            lt_lo: lt_evals(r_lo),
+            lt_hi: lt_evals(r_hi),
+            eq_hi: EqPolynomial::new(r_hi.to_vec()).evaluations(),
+            n_lo_vars: r_lo.len(),
+            n_hi_vars: r_hi.len(),
+        }
+    }
+
+    /// Total number of remaining variables.
+    #[inline]
+    pub fn num_vars(&self) -> usize {
+        self.n_hi_vars + self.n_lo_vars
+    }
+
+    /// Effective table size `2^num_vars`.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.lt_hi.len() * self.lt_lo.len()
+    }
+
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.lt_hi.is_empty()
+    }
+
+    /// Reconstructs `LT[idx]` from the split tables.
+    #[inline]
+    fn get(&self, idx: usize) -> F {
+        let lo_size = self.lt_lo.len();
+        let i_hi = idx / lo_size;
+        let i_lo = idx % lo_size;
+        self.lt_hi[i_hi] + self.eq_hi[i_hi] * self.lt_lo[i_lo]
+    }
+
+    /// Returns `(LT[j], LT[j + half])` for HighToLow sumcheck pairing.
+    ///
+    /// In the hi-binding phase, the pairing splits across hi-table halves.
+    /// In the lo-binding phase (all hi vars bound), it splits across lo-table halves.
+    #[inline]
+    pub fn sumcheck_eval_pair(&self, j: usize) -> (F, F) {
+        let half = self.len() / 2;
+        (self.get(j), self.get(j + half))
+    }
+
+    /// Binds the MSB (HighToLow), halving the effective table size.
+    pub fn bind(&mut self, challenge: F) {
+        if self.n_hi_vars > 0 {
+            bind_in_place(&mut self.lt_hi, challenge);
+            bind_in_place(&mut self.eq_hi, challenge);
+            self.n_hi_vars -= 1;
+        } else {
+            assert!(self.n_lo_vars > 0, "no variables left to bind");
+            bind_in_place(&mut self.lt_lo, challenge);
+            self.n_lo_vars -= 1;
+        }
+    }
+
+    /// Materializes the full `2^n` evaluation table `[LT(0, r), ..., LT(2^n - 1, r)]`.
+    ///
+    /// Big-endian index order: `j = 0` corresponds to the all-zeros vertex.
+    pub fn evaluations(r: &[F]) -> Vec<F> {
+        lt_evals(r)
+    }
+
+    /// Evaluates `LT(x, r)` at a single point without materializing the full table.
+    ///
+    /// Computes `Σ_i (1 - x_i) · r_i · eq(x[0..i], r[0..i])` iteratively
+    /// from MSB to LSB, accumulating the prefix eq product.
+    ///
+    /// Both `x` and `r` are big-endian. Time: O(n). Space: O(1).
+    pub fn evaluate(x: &[F], r: &[F]) -> F {
+        assert_eq!(x.len(), r.len(), "LT point dimension mismatch");
+        let mut lt = F::zero();
+        let mut eq_prefix = F::one();
+        for (&xi, &ri) in x.iter().zip(r.iter()) {
+            lt += (F::one() - xi) * ri * eq_prefix;
+            eq_prefix *= xi * ri + (F::one() - xi) * (F::one() - ri);
+        }
+        lt
+    }
+}
+
+/// Materializes `[LT(0, r), LT(1, r), ..., LT(2^n - 1, r)]` in big-endian order.
+///
+/// Uses an in-place doubling construction. For each bit position `i` (LSB to MSB):
+/// - Left half `x`: `x' = x + r_i - x·r_i` (accumulates `(1-x_i)·r_i·eq_suffix`)
+/// - Right half `y`: `y' = x·r_i` (propagates eq term through x_i=1)
+///
+/// Time: O(n·2^n). Space: O(2^n).
+fn lt_evals<F: Field>(r: &[F]) -> Vec<F> {
+    let n = r.len();
+    let mut evals = vec![F::zero(); 1usize << n];
+    for (i, &ri) in r.iter().rev().enumerate() {
+        let (left, right) = evals.split_at_mut(1 << i);
+        left.iter_mut().zip(right.iter_mut()).for_each(|(x, y)| {
+            *y = *x * ri;
+            *x += ri - *y;
+        });
+    }
+    evals
+}
+
+/// In-place HighToLow bind: `v[j] = v[j] + challenge · (v[j+half] - v[j])`.
+#[inline]
+fn bind_in_place<F: Field>(v: &mut Vec<F>, challenge: F) {
+    let half = v.len() / 2;
+    for j in 0..half {
+        let lo = v[j];
+        let hi = v[j + half];
+        v[j] = lo + challenge * (hi - lo);
+    }
+    v.truncate(half);
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use jolt_field::{Field, Fr};
+    use num_traits::{One, Zero};
+    use rand_chacha::ChaCha20Rng;
+    use rand_core::SeedableRng;
+
+    fn index_to_bits(idx: usize, n: usize) -> Vec<Fr> {
+        (0..n)
+            .map(|i| {
+                if (idx >> (n - 1 - i)) & 1 == 1 {
+                    Fr::one()
+                } else {
+                    Fr::zero()
+                }
+            })
+            .collect()
+    }
+
+    #[test]
+    fn boolean_correctness() {
+        // LT(x, r) = 1 iff x < r on Boolean inputs.
+        for n in 1..=5 {
+            for r_int in 0..(1u64 << n) {
+                let r_bits = index_to_bits(r_int as usize, n);
+                let table = LtPolynomial::evaluations(&r_bits);
+
+                for x_int in 0..(1u64 << n) {
+                    let expected = if x_int < r_int { Fr::one() } else { Fr::zero() };
+                    assert_eq!(
+                        table[x_int as usize], expected,
+                        "LT({x_int}, {r_int}) wrong for n={n}"
+                    );
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn evaluations_matches_inline() {
+        let mut rng = ChaCha20Rng::seed_from_u64(42);
+        for n in 2..=6 {
+            let r: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+            let table = LtPolynomial::evaluations(&r);
+
+            for (idx, &entry) in table.iter().enumerate() {
+                let x = index_to_bits(idx, n);
+                let inline = LtPolynomial::evaluate(&x, &r);
+                assert_eq!(entry, inline, "mismatch at idx={idx}, n={n}");
+            }
+        }
+    }
+
+    #[test]
+    fn split_matches_full_table() {
+        let mut rng = ChaCha20Rng::seed_from_u64(99);
+        for n in 2..=8 {
+            let r: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+            let full_table = LtPolynomial::evaluations(&r);
+            let split = LtPolynomial::new(&r);
+
+            assert_eq!(split.len(), full_table.len());
+            for (j, &expected) in full_table.iter().enumerate() {
+                assert_eq!(split.get(j), expected, "split mismatch at j={j}, n={n}");
+            }
+        }
+    }
+
+    #[test]
+    fn sumcheck_eval_pair_matches_full_table() {
+        let mut rng = ChaCha20Rng::seed_from_u64(123);
+        for n in 2..=7 {
+            let r: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+            let full_table = LtPolynomial::evaluations(&r);
+            let split = LtPolynomial::new(&r);
+
+            let half = full_table.len() / 2;
+            for (j, (&expected_lo, &expected_hi)) in full_table[..half]
+                .iter()
+                .zip(full_table[half..].iter())
+                .enumerate()
+            {
+                let (lo, hi) = split.sumcheck_eval_pair(j);
+                assert_eq!(lo, expected_lo, "lo mismatch at j={j}, n={n}");
+                assert_eq!(hi, expected_hi, "hi mismatch at j={j}, n={n}");
+            }
+        }
+    }
+
+    #[test]
+    fn sequential_bind_converges() {
+        // Bind all variables → single scalar = evaluate(challenges, r).
+        let mut rng = ChaCha20Rng::seed_from_u64(200);
+        for n in 2..=8 {
+            let r: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+            let challenges: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+
+            let mut split = LtPolynomial::new(&r);
+            for &c in &challenges {
+                split.bind(c);
+            }
+
+            assert_eq!(split.num_vars(), 0);
+            assert_eq!(split.len(), 1);
+            let final_val = split.get(0);
+
+            let expected = LtPolynomial::evaluate(&challenges, &r);
+            assert_eq!(final_val, expected, "bind convergence failed for n={n}");
+        }
+    }
+
+    #[test]
+    fn bind_matches_full_table_bind() {
+        // Verify that binding the split matches binding the full table.
+        let mut rng = ChaCha20Rng::seed_from_u64(300);
+        for n in 3..=7 {
+            let r: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+            let challenge = Fr::random(&mut rng);
+
+            let mut split = LtPolynomial::new(&r);
+            split.bind(challenge);
+
+            let mut full = LtPolynomial::evaluations(&r);
+            bind_in_place(&mut full, challenge);
+
+            assert_eq!(split.len(), full.len());
+            for (j, &expected) in full.iter().enumerate() {
+                assert_eq!(split.get(j), expected, "post-bind mismatch at j={j}, n={n}");
+            }
+        }
+    }
+
+    #[test]
+    fn multi_round_bind_matches_full_table() {
+        // Bind several rounds and verify each round matches the full table.
+        let mut rng = ChaCha20Rng::seed_from_u64(400);
+        let n = 6;
+        let r: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+        let challenges: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+
+        let mut split = LtPolynomial::new(&r);
+        let mut full = LtPolynomial::evaluations(&r);
+
+        for (round, &c) in challenges.iter().enumerate() {
+            split.bind(c);
+            bind_in_place(&mut full, c);
+
+            assert_eq!(split.len(), full.len(), "size mismatch after round {round}");
+            for (j, &expected) in full.iter().enumerate() {
+                assert_eq!(
+                    split.get(j),
+                    expected,
+                    "mismatch at j={j} after round {round}"
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn inline_evaluate_matches_table() {
+        let mut rng = ChaCha20Rng::seed_from_u64(500);
+        for n in 2..=6 {
+            let x: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+            let r: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+
+            let via_table = {
+                let table = LtPolynomial::evaluations(&r);
+                let eq_evals = EqPolynomial::new(x.clone()).evaluations();
+                table
+                    .iter()
+                    .zip(eq_evals.iter())
+                    .map(|(&t, &e)| t * e)
+                    .sum::<Fr>()
+            };
+
+            let via_inline = LtPolynomial::evaluate(&x, &r);
+            assert_eq!(via_table, via_inline, "inline vs table mismatch for n={n}");
+        }
+    }
+
+    #[test]
+    fn sum_over_hypercube() {
+        // Σ_x LT(x, r) = r interpreted as an integer in [0, 2^n).
+        // Actually: Σ_x LT(x, r) for Boolean x gives the number of x < r,
+        // but for random r this is a multilinear extension.
+        // Simple check: for Boolean r, the sum should equal r_int.
+        for n in 1..=5 {
+            for r_int in 0..(1u64 << n) {
+                let r_bits = index_to_bits(r_int as usize, n);
+                let table = LtPolynomial::evaluations(&r_bits);
+                let sum: Fr = table.iter().copied().sum();
+                assert_eq!(
+                    sum,
+                    Fr::from_u64(r_int),
+                    "hypercube sum wrong for r={r_int}, n={n}"
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn odd_num_vars() {
+        // Verify split works correctly when n is odd (hi gets extra var).
+        let mut rng = ChaCha20Rng::seed_from_u64(600);
+        for n in [3, 5, 7] {
+            let r: Vec<Fr> = (0..n).map(|_| Fr::random(&mut rng)).collect();
+            let full_table = LtPolynomial::evaluations(&r);
+            let split = LtPolynomial::new(&r);
+
+            let mid = n / 2;
+            assert_eq!(split.n_hi_vars, n - mid);
+            assert_eq!(split.n_lo_vars, mid);
+
+            for (j, &expected) in full_table.iter().enumerate() {
+                assert_eq!(split.get(j), expected, "odd split mismatch at j={j}, n={n}");
+            }
+        }
+    }
+}
diff --git a/crates/jolt-poly/src/math.rs b/crates/jolt-poly/src/math.rs
new file mode 100644
index 000000000..754e40692
--- /dev/null
+++ b/crates/jolt-poly/src/math.rs
@@ -0,0 +1,58 @@
+//! Bit-manipulation utilities on `usize`.
+
+pub trait Math {
+    /// Returns `2^self`.
+    fn pow2(self) -> usize;
+    /// Returns `floor(log2(self))`.
+    fn log_2(self) -> usize;
+}
+
+impl Math for usize {
+    #[inline]
+    fn pow2(self) -> usize {
+        1usize << self
+    }
+
+    fn log_2(self) -> usize {
+        assert_ne!(self, 0);
+        if self.is_power_of_two() {
+            (1usize.leading_zeros() - self.leading_zeros()) as usize
+        } else {
+            (0usize.leading_zeros() - self.leading_zeros()) as usize
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn pow2_values() {
+        assert_eq!(0.pow2(), 1);
+        assert_eq!(1.pow2(), 2);
+        assert_eq!(10.pow2(), 1024);
+        assert_eq!(20.pow2(), 1_048_576);
+    }
+
+    #[test]
+    fn log_2_powers_of_two() {
+        assert_eq!(1.log_2(), 0);
+        assert_eq!(2.log_2(), 1);
+        assert_eq!(4.log_2(), 2);
+        assert_eq!(1024.log_2(), 10);
+    }
+
+    #[test]
+    fn log_2_non_powers() {
+        assert_eq!(3.log_2(), 2);
+        assert_eq!(5.log_2(), 3);
+        assert_eq!(1023.log_2(), 10);
+    }
+
+    #[test]
+    #[should_panic(expected = "assertion")]
+    fn log_2_zero_panics() {
+        let _ = 0usize.log_2();
+    }
+}
diff --git a/crates/jolt-poly/src/multilinear.rs b/crates/jolt-poly/src/multilinear.rs
new file mode 100644
index 000000000..e977ed6a9
--- /dev/null
+++ b/crates/jolt-poly/src/multilinear.rs
@@ -0,0 +1,42 @@
+//! Computation traits for multilinear polynomials.
+//!
+//! These traits define the two core operations on multilinear polynomials
+//! without coupling to data layout or storage. Concrete types like
+//! [`Polynomial<F>`](crate::Polynomial) implement whichever traits they support,
+//! and downstream code (commitment schemes, sumcheck) is generic over these
+//! interfaces — enabling different backends (CPU, GPU) behind the same API.
+
+use jolt_field::Field;
+
+/// Multilinear polynomial evaluation at an arbitrary point.
+///
+/// Any multilinear polynomial $f: \mathbb{F}^n \to \mathbb{F}$ is uniquely
+/// determined by its $2^n$ evaluations on the Boolean hypercube. This trait
+/// exposes point evaluation and dimensional metadata without prescribing how
+/// the evaluations are stored.
+pub trait MultilinearEvaluation<F: Field>: Send + Sync {
+    /// Number of variables $n$. The polynomial has $2^n$ evaluations.
+    fn num_vars(&self) -> usize;
+
+    /// Number of evaluations, equal to $2^n$.
+    fn len(&self) -> usize;
+
+    fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    /// Evaluates the polynomial at `point` $\in \mathbb{F}^n$ using the
+    /// multilinear extension formula:
+    /// $$f(r) = \sum_{x \in \{0,1\}^n} f(x) \cdot \widetilde{eq}(x, r)$$
+    fn evaluate(&self, point: &[F]) -> F;
+}
+
+/// In-place variable binding — the core sumcheck operation.
+///
+/// Fixes the first variable to `scalar`, halving the evaluation table:
+/// $$g(x_2, \ldots, x_n) = (1 - s) \cdot f(0, x_2, \ldots, x_n) + s \cdot f(1, x_2, \ldots, x_n)$$
+///
+/// After calling `bind`, `num_vars` decreases by 1 and `len` halves.
+pub trait MultilinearBinding<F: Field>: Send + Sync {
+    fn bind(&mut self, scalar: F);
+}
diff --git a/crates/jolt-poly/src/one_hot.rs b/crates/jolt-poly/src/one_hot.rs
new file mode 100644
index 000000000..2a569fbb4
--- /dev/null
+++ b/crates/jolt-poly/src/one_hot.rs
@@ -0,0 +1,259 @@
+//! One-hot multilinear polynomial — sparse representation where each row has
+//! at most one nonzero entry with value 1.
+//!
+//! Used for Jolt's RA (random access) lookup index polynomials, where each
+//! cycle selects exactly one of `k` possible values. Storing the hot index
+//! per row instead of a dense `T × k` evaluation table reduces memory by
+//! a factor of `k` and enables ~254× faster commitment via generator lookup
+//! instead of full MSM.
+
+use jolt_field::Field;
+
+use crate::source::MultilinearPoly;
+
+/// Sparse multilinear polynomial where each row has at most one nonzero
+/// entry, and that entry is always `F::one()`.
+///
+/// The polynomial represents a `(T × k)` evaluation table where `T` is the
+/// number of rows (cycles) and `k` is the number of columns. Row `i` has
+/// value 1 at column `indices[i]` and 0 elsewhere. `None` means the entire
+/// row is zero.
+///
+/// `num_vars = log2(T * k)` where `T * k` must be a power of two.
+#[derive(Clone, Debug)]
+pub struct OneHotPolynomial {
+    k: usize,
+    indices: Vec<Option<u8>>,
+    num_vars: usize,
+}
+
+impl OneHotPolynomial {
+    /// Creates a one-hot polynomial from column indices.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `k * indices.len()` is not a power of two.
+    pub fn new(k: usize, indices: Vec<Option<u8>>) -> Self {
+        let total = k * indices.len();
+        assert!(
+            total.is_power_of_two(),
+            "k * num_rows must be a power of two, got {total}"
+        );
+        let num_vars = total.trailing_zeros() as usize;
+        Self {
+            k,
+            indices,
+            num_vars,
+        }
+    }
+
+    #[inline]
+    pub fn k(&self) -> usize {
+        self.k
+    }
+
+    #[inline]
+    pub fn indices(&self) -> &[Option<u8>] {
+        &self.indices
+    }
+
+    #[inline]
+    pub fn num_rows(&self) -> usize {
+        self.indices.len()
+    }
+
+    /// Number of variables $n$. The polynomial has $2^n$ evaluations.
+    ///
+    /// Inherent method avoids trait disambiguation since [`MultilinearPoly`]
+    /// is generic over `F`.
+    #[inline]
+    pub fn num_vars(&self) -> usize {
+        self.num_vars
+    }
+}
+
+impl<F: Field> MultilinearPoly<F> for OneHotPolynomial {
+    #[inline]
+    fn num_vars(&self) -> usize {
+        self.num_vars
+    }
+
+    fn evaluate(&self, point: &[F]) -> F {
+        assert_eq!(point.len(), self.num_vars);
+        let eq_evals = crate::EqPolynomial::new(point.to_vec()).evaluations();
+        let mut result = F::zero();
+        for (row, &opt_col) in self.indices.iter().enumerate() {
+            if let Some(col) = opt_col {
+                result += eq_evals[row * self.k + col as usize];
+            }
+        }
+        result
+    }
+
+    fn for_each_row(&self, sigma: usize, f: &mut dyn FnMut(usize, &[F])) {
+        let num_cols = 1usize << sigma;
+        let total_len = 1usize << self.num_vars;
+        let num_rows = total_len / num_cols;
+
+        // Pre-index nonzero entries by matrix row.
+        let mut row_hot_cols: Vec<Vec<usize>> = vec![Vec::new(); num_rows];
+        for (cycle, &opt_col) in self.indices.iter().enumerate() {
+            if let Some(col) = opt_col {
+                let flat = cycle * self.k + col as usize;
+                row_hot_cols[flat / num_cols].push(flat % num_cols);
+            }
+        }
+
+        let mut buf = vec![F::zero(); num_cols];
+        for (row_idx, cols) in row_hot_cols.into_iter().enumerate() {
+            buf.fill(F::zero());
+            for c in cols {
+                buf[c] = F::one();
+            }
+            f(row_idx, &buf);
+        }
+    }
+
+    /// O(T) sparse fold — accumulates `left[row]` into `result[col]` only at
+    /// nonzero positions, avoiding the O(T × K) dense iteration.
+    fn fold_rows(&self, left: &[F], sigma: usize) -> Vec<F> {
+        let num_cols = 1usize << sigma;
+        let mut result = vec![F::zero(); num_cols];
+        for (cycle, &opt_col) in self.indices.iter().enumerate() {
+            if let Some(col) = opt_col {
+                let flat = cycle * self.k + col as usize;
+                result[flat % num_cols] += left[flat / num_cols];
+            }
+        }
+        result
+    }
+
+    #[inline]
+    fn is_sparse(&self) -> bool {
+        true
+    }
+
+    fn for_each_nonzero(&self, f: &mut dyn FnMut(usize, F)) {
+        for (cycle, &opt_col) in self.indices.iter().enumerate() {
+            if let Some(col) = opt_col {
+                f(cycle * self.k + col as usize, F::one());
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::Polynomial;
+    use jolt_field::Fr;
+    use num_traits::Zero;
+    use rand_chacha::ChaCha20Rng;
+    use rand_core::{RngCore, SeedableRng};
+
+    fn make_one_hot(k: usize, indices: &[Option<u8>]) -> OneHotPolynomial {
+        OneHotPolynomial::new(k, indices.to_vec())
+    }
+
+    fn to_dense<F: Field>(oh: &OneHotPolynomial) -> Polynomial<F> {
+        let total = 1usize << oh.num_vars;
+        let mut table = vec![F::zero(); total];
+        for (row, &opt_col) in oh.indices.iter().enumerate() {
+            if let Some(col) = opt_col {
+                table[row * oh.k + col as usize] = F::one();
+            }
+        }
+        Polynomial::new(table)
+    }
+
+    #[test]
+    fn evaluate_matches_dense() {
+        let mut rng = ChaCha20Rng::seed_from_u64(1);
+        let k = 4;
+        let indices: Vec<Option<u8>> = (0..4)
+            .map(|_| Some((rng.next_u32() % k as u32) as u8))
+            .collect();
+        let oh = make_one_hot(k, &indices);
+        let dense: Polynomial<Fr> = to_dense(&oh);
+        let nv = oh.num_vars();
+
+        for _ in 0..5 {
+            let point: Vec<Fr> = (0..nv).map(|_| Fr::random(&mut rng)).collect();
+            assert_eq!(oh.evaluate(&point), dense.evaluate(&point));
+        }
+    }
+
+    #[test]
+    fn fold_rows_matches_dense() {
+        let mut rng = ChaCha20Rng::seed_from_u64(2);
+        let k = 8;
+        let n_rows = 8;
+        let indices: Vec<Option<u8>> = (0..n_rows)
+            .map(|i| {
+                if i % 3 == 0 {
+                    None
+                } else {
+                    Some((rng.next_u32() % k as u32) as u8)
+                }
+            })
+            .collect();
+        let oh = make_one_hot(k, &indices);
+        let dense: Polynomial<Fr> = to_dense(&oh);
+
+        let sigma = 3;
+        let num_rows_matrix = (1usize << oh.num_vars()) >> sigma;
+        let left: Vec<Fr> = (0..num_rows_matrix).map(|_| Fr::random(&mut rng)).collect();
+
+        assert_eq!(oh.fold_rows(&left, sigma), dense.fold_rows(&left, sigma));
+    }
+
+    #[test]
+    fn for_each_row_matches_dense() {
+        let k = 4;
+        let indices = vec![Some(0), Some(3), None, Some(1)];
+        let oh = make_one_hot(k, &indices);
+        let dense: Polynomial<Fr> = to_dense(&oh);
+
+        let sigma = 2;
+        let mut oh_rows: Vec<Vec<Fr>> = Vec::new();
+        oh.for_each_row(sigma, &mut |_, row: &[Fr]| oh_rows.push(row.to_vec()));
+
+        let mut dense_rows: Vec<Vec<Fr>> = Vec::new();
+        dense.for_each_row(sigma, &mut |_, row: &[Fr]| dense_rows.push(row.to_vec()));
+
+        assert_eq!(oh_rows, dense_rows);
+    }
+
+    #[test]
+    fn for_each_nonzero_yields_correct_entries() {
+        let k = 4;
+        let indices = vec![Some(2), None, Some(0), Some(3)];
+        let oh = make_one_hot(k, &indices);
+
+        let mut entries = Vec::new();
+        oh.for_each_nonzero(&mut |idx, val: Fr| entries.push((idx, val)));
+
+        assert_eq!(entries.len(), 3);
+        // cycle 0, col 2; cycle 2, col 0; cycle 3, col 3
+        assert_eq!(entries[0].0, 2);
+        assert_eq!(entries[1].0, 2 * 4);
+        assert_eq!(entries[2].0, 3 * 4 + 3);
+        assert!(entries.iter().all(|(_, v)| *v == Fr::from_u64(1)));
+    }
+
+    #[test]
+    fn is_sparse_returns_true() {
+        let oh = make_one_hot(4, &[Some(0), Some(1), Some(2), Some(3)]);
+        assert!(MultilinearPoly::<Fr>::is_sparse(&oh));
+    }
+
+    #[test]
+    fn all_none_evaluates_to_zero() {
+        let oh = make_one_hot(4, &[None, None, None, None]);
+        let mut rng = ChaCha20Rng::seed_from_u64(10);
+        let nv = <OneHotPolynomial as MultilinearPoly<Fr>>::num_vars(&oh);
+        let point: Vec<Fr> = (0..nv).map(|_| Fr::random(&mut rng)).collect();
+        let eval: Fr = oh.evaluate(&point);
+        assert!(eval.is_zero());
+    }
+}
diff --git a/crates/jolt-poly/src/source.rs b/crates/jolt-poly/src/source.rs
new file mode 100644
index 000000000..2556f7f92
--- /dev/null
+++ b/crates/jolt-poly/src/source.rs
@@ -0,0 +1,553 @@
+//! Abstract multilinear polynomial trait and compositions.
+//!
+//! [`MultilinearPoly`] is the core abstraction over multilinear polynomials
+//! in evaluation form. Implementations range from dense evaluation tables
+//! ([`Polynomial<F>`](crate::Polynomial)) to structured sparse representations
+//! ([`OneHotPolynomial`](crate::OneHotPolynomial)) to lazy compositions ([`RlcSource`]). The trait
+//! decouples polynomial *access* from *storage*, enabling streaming opening
+//! proofs where the full $2^n$ table never resides in memory simultaneously.
+//!
+//! [`RlcSource`] composes multiple polynomials via random linear combination
+//! without materializing the combined table. Its [`fold_rows`](MultilinearPoly::fold_rows)
+//! distributes across constituents, avoiding allocation of the combined table.
+
+use jolt_field::Field;
+
+use crate::Polynomial;
+
+/// A multilinear polynomial $f : \{0,1\}^n \to \mathbb{F}$ in evaluation form.
+///
+/// The evaluation table can be viewed as a $(2^\nu \times 2^\sigma)$ matrix
+/// where $\nu + \sigma = n$. Implementations range from dense evaluation
+/// tables ([`Polynomial<F>`](crate::Polynomial)) to structured sparse forms
+/// ([`OneHotPolynomial`](crate::OneHotPolynomial)) to lazy compositions ([`RlcSource`]).
+///
+/// Core operations:
+/// - [`num_vars`](Self::num_vars) / [`evaluate`](Self::evaluate): metadata and point evaluation
+/// - [`for_each_row`](Self::for_each_row): row-wise iteration (streaming commit, row-based MSM)
+/// - [`fold_rows`](Self::fold_rows): matrix-vector product $v \cdot M$ (opening protocols)
+/// - [`is_sparse`](Self::is_sparse) / [`for_each_nonzero`](Self::for_each_nonzero): sparsity
+///   hints for PCS commit optimization (e.g., batch addition instead of MSM)
+pub trait MultilinearPoly<F: Field>: Send + Sync {
+    /// Number of variables $n$. The polynomial has $2^n$ evaluations.
+    fn num_vars(&self) -> usize;
+
+    /// Evaluates $f(r)$ at an arbitrary point $r \in \mathbb{F}^n$.
+    fn evaluate(&self, point: &[F]) -> F;
+
+    /// Iterates over the evaluation table in row-major order.
+    ///
+    /// The table is interpreted as a $(2^\nu \times 2^\sigma)$ matrix where
+    /// $\sigma$ is the number of column variables and $\nu = n - \sigma$.
+    /// The closure receives `(row_index, row_data)` pairs in order.
+    ///
+    /// For in-memory polynomials, rows are borrowed slices (zero-copy).
+    /// For lazy sources, each row may be computed on-the-fly.
+    fn for_each_row(&self, sigma: usize, f: &mut dyn FnMut(usize, &[F]));
+
+    /// Folds a left vector against the $(2^\nu \times 2^\sigma)$ matrix form.
+    ///
+    /// Computes:
+    /// $$\text{result}\[c\] = \sum_{r=0}^{2^\nu - 1} \text{left}\[r\] \cdot M\[r\]\[c\]$$
+    ///
+    /// where $M\[r\]\[c\] = f(\text{bits}(r \cdot 2^\sigma + c))$ and
+    /// $\nu = n - \sigma$.
+    ///
+    /// The default implementation iterates rows via [`for_each_row`](Self::for_each_row).
+    /// Implementations with distributable structure (e.g., [`RlcSource`]) or
+    /// sparse representations (e.g., one-hot polynomials) should override
+    /// for better performance.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `left.len() != 2^(num_vars - sigma)`.
+    fn fold_rows(&self, left: &[F], sigma: usize) -> Vec<F> {
+        let num_cols = 1usize << sigma;
+        let mut result = vec![F::zero(); num_cols];
+        self.for_each_row(sigma, &mut |row_idx, row| {
+            let l = left[row_idx];
+            for (r, &val) in result.iter_mut().zip(row.iter()) {
+                *r += l * val;
+            }
+        });
+        result
+    }
+
+    /// Whether this polynomial has sparse structure that allows more efficient
+    /// commitment (e.g., batch affine addition instead of full MSM).
+    ///
+    /// When true, PCS backends should use [`for_each_nonzero`](Self::for_each_nonzero)
+    /// to access only the nonzero entries.
+    fn is_sparse(&self) -> bool {
+        false
+    }
+
+    /// Iterates over nonzero entries as `(flat_index, value)` pairs.
+    ///
+    /// For dense polynomials, the default scans the full table. Structured
+    /// sparse types (e.g., [`OneHotPolynomial`](crate::OneHotPolynomial)) yield only O(T) entries.
+    fn for_each_nonzero(&self, f: &mut dyn FnMut(usize, F)) {
+        let n = self.num_vars();
+        let total = 1usize << n;
+        self.for_each_row(n, &mut |_, row| {
+            for (i, &val) in row.iter().take(total).enumerate() {
+                if !val.is_zero() {
+                    f(i, val);
+                }
+            }
+        });
+    }
+}
+
+impl<F: Field> MultilinearPoly<F> for Polynomial<F> {
+    #[inline]
+    fn num_vars(&self) -> usize {
+        Polynomial::num_vars(self)
+    }
+
+    fn evaluate(&self, point: &[F]) -> F {
+        Polynomial::evaluate(self, point)
+    }
+
+    fn for_each_row(&self, sigma: usize, f: &mut dyn FnMut(usize, &[F])) {
+        let num_cols = 1usize << sigma;
+        for (i, row) in self.evaluations().chunks(num_cols).enumerate() {
+            f(i, row);
+        }
+    }
+
+    fn fold_rows(&self, left: &[F], sigma: usize) -> Vec<F> {
+        let num_cols = 1usize << sigma;
+        let evals = self.evaluations();
+        debug_assert_eq!(
+            left.len(),
+            evals.len() / num_cols,
+            "left vector length must equal number of rows"
+        );
+
+        let mut result = vec![F::zero(); num_cols];
+        for (row_idx, row) in evals.chunks(num_cols).enumerate() {
+            let l = left[row_idx];
+            for (r, &val) in result.iter_mut().zip(row.iter()) {
+                *r += l * val;
+            }
+        }
+        result
+    }
+}
+
+impl<F: Field> MultilinearPoly<F> for [F] {
+    #[inline]
+    fn num_vars(&self) -> usize {
+        if self.is_empty() {
+            return 0;
+        }
+        assert!(
+            self.len().is_power_of_two(),
+            "slice length must be a power of two, got {}",
+            self.len()
+        );
+        self.len().trailing_zeros() as usize
+    }
+
+    fn evaluate(&self, point: &[F]) -> F {
+        let eq_evals = crate::EqPolynomial::new(point.to_vec()).evaluations();
+        self.iter().zip(eq_evals.iter()).map(|(&f, &e)| f * e).sum()
+    }
+
+    fn for_each_row(&self, sigma: usize, f: &mut dyn FnMut(usize, &[F])) {
+        let num_cols = 1usize << sigma;
+        for (i, row) in self.chunks(num_cols).enumerate() {
+            f(i, row);
+        }
+    }
+
+    fn fold_rows(&self, left: &[F], sigma: usize) -> Vec<F> {
+        let num_cols = 1usize << sigma;
+        let mut result = vec![F::zero(); num_cols];
+        for (row_idx, row) in self.chunks(num_cols).enumerate() {
+            let l = left[row_idx];
+            for (r, &val) in result.iter_mut().zip(row.iter()) {
+                *r += l * val;
+            }
+        }
+        result
+    }
+}
+
+impl<F: Field> MultilinearPoly<F> for Vec<F> {
+    #[inline]
+    fn num_vars(&self) -> usize {
+        self.as_slice().num_vars()
+    }
+
+    fn evaluate(&self, point: &[F]) -> F {
+        self.as_slice().evaluate(point)
+    }
+
+    fn for_each_row(&self, sigma: usize, f: &mut dyn FnMut(usize, &[F])) {
+        self.as_slice().for_each_row(sigma, f);
+    }
+
+    fn fold_rows(&self, left: &[F], sigma: usize) -> Vec<F> {
+        self.as_slice().fold_rows(left, sigma)
+    }
+}
+
+/// Lazy RLC composition of multilinear polynomials.
+///
+/// Represents $f(x) = \sum_{i=0}^{k-1} s_i \cdot f_i(x)$ without
+/// materializing the combined evaluation table. Operations distribute
+/// over the constituents:
+///
+/// - [`evaluate`](MultilinearPoly::evaluate): $\sum_i s_i \cdot f_i(r)$
+/// - [`fold_rows`](MultilinearPoly::fold_rows): $\sum_i s_i \cdot (v \cdot M_i)$ —
+///   each polynomial computes its own fold, results are combined with scalars.
+///   No evaluation table is ever materialized.
+pub struct RlcSource<F: Field, S: MultilinearPoly<F>> {
+    sources: Vec<S>,
+    scalars: Vec<F>,
+    num_vars: usize,
+}
+
+impl<F: Field, S: MultilinearPoly<F>> RlcSource<F, S> {
+    /// Creates a lazy RLC composition.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `sources` and `scalars` have different lengths, or if
+    /// sources have inconsistent `num_vars`.
+    pub fn new(sources: Vec<S>, scalars: Vec<F>) -> Self {
+        assert_eq!(sources.len(), scalars.len());
+        let num_vars = sources.first().map_or(0, |s| s.num_vars());
+        debug_assert!(
+            sources.iter().all(|s| s.num_vars() == num_vars),
+            "all sources must have the same num_vars"
+        );
+        Self {
+            sources,
+            scalars,
+            num_vars,
+        }
+    }
+
+    pub fn sources(&self) -> &[S] {
+        &self.sources
+    }
+
+    pub fn scalars(&self) -> &[F] {
+        &self.scalars
+    }
+}
+
+impl<F: Field, S: MultilinearPoly<F>> MultilinearPoly<F> for RlcSource<F, S> {
+    fn num_vars(&self) -> usize {
+        self.num_vars
+    }
+
+    fn evaluate(&self, point: &[F]) -> F {
+        self.sources
+            .iter()
+            .zip(&self.scalars)
+            .map(|(source, &scalar)| scalar * source.evaluate(point))
+            .fold(F::zero(), |acc, x| acc + x)
+    }
+
+    /// Iterates over combined rows by collecting each source's rows and combining.
+    ///
+    /// Memory: O(k × 2^σ) where k = number of sources.
+    /// For streaming-critical paths, prefer [`fold_rows`](Self::fold_rows) which
+    /// distributes without materializing any rows.
+    fn for_each_row(&self, sigma: usize, f: &mut dyn FnMut(usize, &[F])) {
+        if self.sources.is_empty() {
+            return;
+        }
+
+        let num_cols = 1usize << sigma;
+        let nu = self.num_vars.saturating_sub(sigma);
+        let num_rows = 1usize << nu;
+
+        // Collect all rows from all sources.
+        // Each inner vec has num_rows entries, each of length num_cols.
+        let all_rows: Vec<Vec<Vec<F>>> = self
+            .sources
+            .iter()
+            .map(|source| {
+                let mut rows = Vec::with_capacity(num_rows);
+                source.for_each_row(sigma, &mut |_idx, row| {
+                    rows.push(row.to_vec());
+                });
+                rows
+            })
+            .collect();
+
+        let mut combined = vec![F::zero(); num_cols];
+        for row_idx in 0..num_rows {
+            combined.fill(F::zero());
+            for (source_rows, &scalar) in all_rows.iter().zip(&self.scalars) {
+                for (dst, &val) in combined.iter_mut().zip(source_rows[row_idx].iter()) {
+                    *dst += scalar * val;
+                }
+            }
+            f(row_idx, &combined);
+        }
+    }
+
+    /// Distributes fold_rows across constituent sources.
+    ///
+    /// Computes $\sum_i s_i \cdot (v \cdot M_i)$ by having each source
+    /// independently compute its own fold. No evaluation table is
+    /// ever materialized — this is the key streaming win.
+    fn fold_rows(&self, left: &[F], sigma: usize) -> Vec<F> {
+        let num_cols = 1usize << sigma;
+        let mut result = vec![F::zero(); num_cols];
+        for (source, &scalar) in self.sources.iter().zip(&self.scalars) {
+            let contribution = source.fold_rows(left, sigma);
+            for (r, &c) in result.iter_mut().zip(contribution.iter()) {
+                *r += scalar * c;
+            }
+        }
+        result
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use jolt_field::Fr;
+    use num_traits::Zero;
+    use rand_chacha::ChaCha20Rng;
+    use rand_core::SeedableRng;
+
+    #[test]
+    fn polynomial_for_each_row_matches_chunks() {
+        let mut rng = ChaCha20Rng::seed_from_u64(1);
+        let poly = Polynomial::<Fr>::random(4, &mut rng);
+        let sigma = 2;
+        let num_cols = 1usize << sigma;
+
+        let mut rows = Vec::new();
+        poly.for_each_row(sigma, &mut |_idx, row| {
+            rows.push(row.to_vec());
+        });
+
+        assert_eq!(rows.len(), poly.len() / num_cols);
+        for (i, row) in rows.iter().enumerate() {
+            let start = i * num_cols;
+            assert_eq!(row.as_slice(), &poly.evaluations()[start..start + num_cols]);
+        }
+    }
+
+    #[test]
+    fn polynomial_fold_rows_matches_manual_vmp() {
+        let mut rng = ChaCha20Rng::seed_from_u64(2);
+        let num_vars = 4;
+        let sigma = 2;
+        let nu = num_vars - sigma;
+        let num_cols = 1usize << sigma;
+        let num_rows = 1usize << nu;
+
+        let poly = Polynomial::<Fr>::random(num_vars, &mut rng);
+        let left: Vec<Fr> = (0..num_rows).map(|_| Fr::random(&mut rng)).collect();
+
+        let result = poly.fold_rows(&left, sigma);
+
+        // Manual VMP
+        let mut expected = vec![Fr::zero(); num_cols];
+        for (row, &l) in left.iter().enumerate() {
+            for (col, dest) in expected.iter_mut().enumerate() {
+                *dest += l * poly.evaluations()[row * num_cols + col];
+            }
+        }
+
+        assert_eq!(result, expected);
+    }
+
+    #[test]
+    fn rlc_source_evaluate_matches_manual() {
+        let mut rng = ChaCha20Rng::seed_from_u64(10);
+        let num_vars = 3;
+
+        let p1 = Polynomial::<Fr>::random(num_vars, &mut rng);
+        let p2 = Polynomial::<Fr>::random(num_vars, &mut rng);
+        let s1 = Fr::random(&mut rng);
+        let s2 = Fr::random(&mut rng);
+
+        let point: Vec<Fr> = (0..num_vars).map(|_| Fr::random(&mut rng)).collect();
+
+        let rlc = RlcSource::new(vec![p1.clone(), p2.clone()], vec![s1, s2]);
+        let result = rlc.evaluate(&point);
+        let expected = s1 * p1.evaluate(&point) + s2 * p2.evaluate(&point);
+
+        assert_eq!(result, expected);
+    }
+
+    #[test]
+    fn rlc_source_fold_rows_matches_materialized() {
+        let mut rng = ChaCha20Rng::seed_from_u64(20);
+        let num_vars = 4;
+        let sigma = 2;
+        let nu = num_vars - sigma;
+        let num_rows = 1usize << nu;
+
+        let p1 = Polynomial::<Fr>::random(num_vars, &mut rng);
+        let p2 = Polynomial::<Fr>::random(num_vars, &mut rng);
+        let s1 = Fr::random(&mut rng);
+        let s2 = Fr::random(&mut rng);
+        let left: Vec<Fr> = (0..num_rows).map(|_| Fr::random(&mut rng)).collect();
+
+        // Lazy fold
+        let rlc = RlcSource::new(vec![p1.clone(), p2.clone()], vec![s1, s2]);
+        let lazy_result = rlc.fold_rows(&left, sigma);
+
+        // Materialized fold
+        let combined_evals: Vec<Fr> = p1
+            .evaluations()
+            .iter()
+            .zip(p2.evaluations().iter())
+            .map(|(&a, &b)| s1 * a + s2 * b)
+            .collect();
+        let combined = Polynomial::new(combined_evals);
+        let materialized_result = combined.fold_rows(&left, sigma);
+
+        assert_eq!(lazy_result, materialized_result);
+    }
+
+    #[test]
+    fn rlc_source_for_each_row_matches_materialized() {
+        let mut rng = ChaCha20Rng::seed_from_u64(30);
+        let num_vars = 3;
+        let sigma = 1;
+
+        let p1 = Polynomial::<Fr>::random(num_vars, &mut rng);
+        let p2 = Polynomial::<Fr>::random(num_vars, &mut rng);
+        let s1 = Fr::random(&mut rng);
+        let s2 = Fr::random(&mut rng);
+
+        let rlc = RlcSource::new(vec![p1.clone(), p2.clone()], vec![s1, s2]);
+
+        let mut lazy_rows = Vec::new();
+        rlc.for_each_row(sigma, &mut |_idx, row| {
+            lazy_rows.push(row.to_vec());
+        });
+
+        let combined_evals: Vec<Fr> = p1
+            .evaluations()
+            .iter()
+            .zip(p2.evaluations().iter())
+            .map(|(&a, &b)| s1 * a + s2 * b)
+            .collect();
+        let combined = Polynomial::new(combined_evals);
+        let mut materialized_rows = Vec::new();
+        combined.for_each_row(sigma, &mut |_idx, row| {
+            materialized_rows.push(row.to_vec());
+        });
+
+        assert_eq!(lazy_rows, materialized_rows);
+    }
+
+    #[test]
+    fn rlc_source_fold_equals_evaluate_at_point() {
+        use crate::eq::EqPolynomial;
+
+        let mut rng = ChaCha20Rng::seed_from_u64(40);
+        let num_vars = 4;
+        let sigma = 2;
+        let nu = num_vars - sigma;
+
+        let p1 = Polynomial::<Fr>::random(num_vars, &mut rng);
+        let p2 = Polynomial::<Fr>::random(num_vars, &mut rng);
+        let p3 = Polynomial::<Fr>::random(num_vars, &mut rng);
+        let s1 = Fr::random(&mut rng);
+        let s2 = Fr::random(&mut rng);
+        let s3 = Fr::random(&mut rng);
+
+        let point: Vec<Fr> = (0..num_vars).map(|_| Fr::random(&mut rng)).collect();
+
+        // Split point into row-point (first nu vars) and col-point (last sigma vars)
+        let row_point = &point[..nu];
+        let col_point = &point[nu..];
+
+        let rlc = RlcSource::new(vec![p1.clone(), p2.clone(), p3.clone()], vec![s1, s2, s3]);
+
+        // fold_rows with eq(row_point) as left vector, then dot with eq(col_point)
+        let eq_rows = EqPolynomial::new(row_point.to_vec()).evaluations();
+        let folded = rlc.fold_rows(&eq_rows, sigma);
+        let eq_cols = EqPolynomial::new(col_point.to_vec()).evaluations();
+        let via_fold: Fr = folded
+            .iter()
+            .zip(eq_cols.iter())
+            .map(|(&a, &b)| a * b)
+            .sum();
+
+        // Direct evaluation
+        let via_eval = rlc.evaluate(&point);
+
+        assert_eq!(via_fold, via_eval);
+    }
+
+    #[test]
+    fn default_fold_rows_matches_override() {
+        let mut rng = ChaCha20Rng::seed_from_u64(50);
+        let num_vars = 4;
+        let sigma = 2;
+        let nu = num_vars - sigma;
+        let num_rows = 1usize << nu;
+
+        let poly = Polynomial::<Fr>::random(num_vars, &mut rng);
+        let left: Vec<Fr> = (0..num_rows).map(|_| Fr::random(&mut rng)).collect();
+
+        // Use the default impl (via for_each_row)
+        let default_result = default_fold_rows(&poly, &left, sigma);
+
+        // Use the overridden impl
+        let override_result = poly.fold_rows(&left, sigma);
+
+        assert_eq!(default_result, override_result);
+    }
+
+    /// Calls the default `fold_rows` implementation (via `for_each_row`).
+    fn default_fold_rows<F: Field>(
+        source: &impl MultilinearPoly<F>,
+        left: &[F],
+        sigma: usize,
+    ) -> Vec<F> {
+        let num_cols = 1usize << sigma;
+        let mut result = vec![F::zero(); num_cols];
+        source.for_each_row(sigma, &mut |row_idx, row| {
+            let l = left[row_idx];
+            for (r, &val) in result.iter_mut().zip(row.iter()) {
+                *r += l * val;
+            }
+        });
+        result
+    }
+
+    #[test]
+    fn empty_rlc_source() {
+        let rlc: RlcSource<Fr, Polynomial<Fr>> = RlcSource::new(vec![], vec![]);
+        assert_eq!(rlc.num_vars(), 0);
+    }
+
+    #[test]
+    fn single_source_rlc_is_scaled_original() {
+        let mut rng = ChaCha20Rng::seed_from_u64(60);
+        let num_vars = 3;
+        let sigma = 1;
+        let nu = num_vars - sigma;
+        let num_rows = 1usize << nu;
+
+        let poly = Polynomial::<Fr>::random(num_vars, &mut rng);
+        let scalar = Fr::random(&mut rng);
+        let left: Vec<Fr> = (0..num_rows).map(|_| Fr::random(&mut rng)).collect();
+
+        let rlc = RlcSource::new(vec![poly.clone()], vec![scalar]);
+        let rlc_result = rlc.fold_rows(&left, sigma);
+
+        // Manually scale the polynomial fold
+        let direct_result = poly.fold_rows(&left, sigma);
+        let scaled: Vec<Fr> = direct_result.iter().map(|&v| scalar * v).collect();
+
+        assert_eq!(rlc_result, scaled);
+    }
+}
diff --git a/crates/jolt-poly/src/thread.rs b/crates/jolt-poly/src/thread.rs
new file mode 100644
index 000000000..d0813b378
--- /dev/null
+++ b/crates/jolt-poly/src/thread.rs
@@ -0,0 +1,65 @@
+//! Threading utilities for polynomial operations.
+
+use num_traits::Zero;
+
+/// Drops `data` in a background rayon task to avoid blocking the caller.
+#[cfg(feature = "parallel")]
+pub fn drop_in_background_thread<T: Send + 'static>(data: T) {
+    rayon::spawn(move || drop(data));
+}
+
+/// Allocates a zeroed `Vec<T>` of `size` elements using `alloc_zeroed`.
+///
+/// # Safety contract
+///
+/// The caller must ensure that `T::zero()` is represented as all-zero bytes.
+/// This is verified in debug/test builds via an assertion.
+#[allow(clippy::all)]
+pub fn unsafe_allocate_zero_vec<T: Sized + Zero>(size: usize) -> Vec<T> {
+    #[cfg(test)]
+    {
+        // SAFETY: We read the zero representation as raw bytes to verify the
+        // all-zeros invariant that `alloc_zeroed` relies on.
+        unsafe {
+            let value = &T::zero();
+            let ptr = std::ptr::from_ref::<T>(value).cast::<u8>();
+            let bytes = std::slice::from_raw_parts(ptr, std::mem::size_of::<T>());
+            assert!(
+                bytes.iter().all(|&byte| byte == 0),
+                "T::zero() is not all-zero bytes — unsafe_allocate_zero_vec is invalid for this type"
+            );
+        }
+    }
+
+    // SAFETY: `alloc_zeroed` produces a valid zero-initialized allocation.
+    // The caller guarantees that `T::zero()` is all-zero bytes, so the
+    // resulting `Vec<T>` contains valid `T` values.
+    unsafe {
+        let layout = std::alloc::Layout::array::<T>(size).unwrap();
+        let ptr = std::alloc::alloc_zeroed(layout).cast::<T>();
+        if ptr.is_null() {
+            std::alloc::handle_alloc_error(layout);
+        }
+        #[allow(clippy::same_length_and_capacity)]
+        Vec::from_raw_parts(ptr, size, size)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn zero_vec_u64() {
+        let v: Vec<u64> = unsafe_allocate_zero_vec(1024);
+        assert_eq!(v.len(), 1024);
+        assert!(v.iter().all(|&x| x == 0));
+    }
+
+    #[test]
+    fn zero_vec_f64() {
+        let v: Vec<f64> = unsafe_allocate_zero_vec(256);
+        assert_eq!(v.len(), 256);
+        assert!(v.iter().all(|&x| x == 0.0));
+    }
+}
diff --git a/crates/jolt-poly/src/univariate.rs b/crates/jolt-poly/src/univariate.rs
new file mode 100644
index 000000000..431993558
--- /dev/null
+++ b/crates/jolt-poly/src/univariate.rs
@@ -0,0 +1,881 @@
+//! Univariate polynomial in coefficient form.
+
+use std::ops::{Add, AddAssign, Mul, MulAssign, Neg, Sub, SubAssign};
+
+use jolt_field::Field;
+use serde::{Deserialize, Serialize};
+
+/// Shared interface for univariate polynomial types.
+///
+/// Provides the minimal common API between [`UnivariatePoly`] (full coefficient
+/// form) and [`CompressedPoly`](crate::CompressedPoly) (linear term omitted). Evaluation and coefficient
+/// access are deliberately left as inherent methods because the two representations
+/// require different calling conventions (compressed evaluation needs an external
+/// hint value).
+pub trait UnivariatePolynomial<F: Field>: Send + Sync {
+    /// Degree of the polynomial, or 0 for the zero polynomial.
+    fn degree(&self) -> usize;
+}
+
+/// Univariate polynomial in coefficient form: $p(x) = \sum_{i=0}^{d} c_i x^i$.
+///
+/// Coefficients are stored in ascending degree order: `coefficients[i]` is the
+/// coefficient of $x^i$. An empty coefficient vector represents the zero polynomial.
+#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(bound = "")]
+pub struct UnivariatePoly<F: Field> {
+    coefficients: Vec<F>,
+}
+
+impl<F: Field> UnivariatePolynomial<F> for UnivariatePoly<F> {
+    fn degree(&self) -> usize {
+        if self.coefficients.is_empty() {
+            0
+        } else {
+            self.coefficients.len() - 1
+        }
+    }
+}
+
+impl<F: Field> UnivariatePoly<F> {
+    /// Creates a polynomial from coefficients in ascending degree order.
+    pub fn new(coefficients: Vec<F>) -> Self {
+        Self { coefficients }
+    }
+
+    pub fn zero() -> Self {
+        Self {
+            coefficients: Vec::new(),
+        }
+    }
+
+    /// Evaluates $p(\text{point})$ using Horner's method.
+    ///
+    /// Computes $c_d + x(c_{d-1} + x(c_{d-2} + \cdots))$ in $O(d)$ multiplications.
+    #[inline]
+    pub fn evaluate(&self, point: F) -> F {
+        if self.coefficients.is_empty() {
+            return F::zero();
+        }
+        self.coefficients
+            .iter()
+            .rev()
+            .copied()
+            .reduce(|acc, c| acc * point + c)
+            .unwrap()
+    }
+
+    /// Lagrange interpolation from a set of $(x_i, y_i)$ pairs.
+    ///
+    /// Given $n$ distinct points, produces the unique polynomial of degree $\le n-1$
+    /// passing through all of them using the standard $O(n^2)$ algorithm.
+    ///
+    /// # Panics
+    /// Panics if `points` is empty.
+    pub fn interpolate(points: &[(F, F)]) -> Self {
+        assert!(!points.is_empty(), "cannot interpolate zero points");
+
+        let n = points.len();
+        let mut result = vec![F::zero(); n];
+
+        for j in 0..n {
+            let mut basis = vec![F::zero(); n];
+            basis[0] = F::one();
+
+            let mut basis_len = 1;
+            for m in 0..n {
+                if m == j {
+                    continue;
+                }
+                let denom = (points[j].0 - points[m].0)
+                    .inverse()
+                    .expect("interpolation points must be distinct");
+                let neg_xm = -points[m].0;
+
+                // Multiply polynomial by (x - x_m): shift up and add
+                for k in (1..=basis_len).rev() {
+                    basis[k] = basis[k - 1] + basis[k] * neg_xm;
+                }
+                basis[0] *= neg_xm;
+                basis_len += 1;
+
+                for coeff in basis.iter_mut().take(basis_len) {
+                    *coeff *= denom;
+                }
+            }
+
+            for k in 0..n {
+                result[k] += points[j].1 * basis[k];
+            }
+        }
+
+        Self {
+            coefficients: result,
+        }
+    }
+
+    /// Coefficients in ascending degree order: index $i$ holds the coefficient of $x^i$.
+    pub fn coefficients(&self) -> &[F] {
+        &self.coefficients
+    }
+
+    /// Consumes the polynomial and returns the coefficient vector.
+    pub fn into_coefficients(self) -> Vec<F> {
+        self.coefficients
+    }
+
+    /// Evaluates the $i$-th Lagrange basis polynomial at `point` over the domain
+    /// $\{0, 1, \ldots, n-1\}$:
+    /// $$L_i(x) = \prod_{\substack{j=0 \\ j \neq i}}^{n-1} \frac{x - j}{i - j}$$
+    ///
+    /// # Panics
+    /// Panics if `index >= domain_size`.
+    pub fn evaluate_basis(domain_size: usize, index: usize, point: F) -> F {
+        assert!(
+            index < domain_size,
+            "index {index} out of domain of size {domain_size}"
+        );
+
+        let mut numer = F::one();
+        let mut denom = F::one();
+
+        for j in 0..domain_size {
+            if j == index {
+                continue;
+            }
+            let j_f = F::from_u64(j as u64);
+            let i_f = F::from_u64(index as u64);
+            numer *= point - j_f;
+            denom *= i_f - j_f;
+        }
+
+        numer * denom.inverse().expect("Lagrange denominator is zero")
+    }
+
+    /// Interpolates a polynomial from evaluations at $\{0, 1, \ldots, n-1\}$.
+    ///
+    /// Given evaluations $[f(0), f(1), \ldots, f(n-1)]$, returns the unique polynomial
+    /// of degree $\le n-1$ matching those values. This is a convenience wrapper around
+    /// [`interpolate`](Self::interpolate) for the common integer-domain case.
+    ///
+    /// # Panics
+    /// Panics if `evals` is empty.
+    pub fn interpolate_over_integers(evals: &[F]) -> Self {
+        assert!(!evals.is_empty(), "cannot interpolate zero evaluations");
+        let points: Vec<(F, F)> = evals
+            .iter()
+            .enumerate()
+            .map(|(i, &y)| (F::from_u64(i as u64), y))
+            .collect();
+        Self::interpolate(&points)
+    }
+
+    /// Compresses the polynomial by omitting the linear term.
+    ///
+    /// The resulting [`CompressedPoly`](crate::CompressedPoly) stores
+    /// `[c0, c2, c3, ...]`, saving one field element in proof serialization.
+    /// The linear term can be recovered given the hint value `f(0) + f(1)`.
+    ///
+    /// # Panics
+    /// Panics if the polynomial has degree < 1 (no linear term to omit).
+    pub fn compress(&self) -> crate::CompressedPoly<F> {
+        assert!(
+            self.coefficients.len() >= 2,
+            "cannot compress a polynomial of degree < 1"
+        );
+        let coeffs = [&self.coefficients[..1], &self.coefficients[2..]].concat();
+        debug_assert_eq!(coeffs.len() + 1, self.coefficients.len());
+        crate::CompressedPoly::new(coeffs)
+    }
+
+    /// Interpolates from evaluations at `0, 1, 2, ..., n-1` using Gaussian elimination
+    /// on the Vandermonde system. Equivalent to `interpolate_over_integers` but uses a
+    /// direct matrix solve instead of the Lagrange formula.
+    pub fn from_evals(evals: &[F]) -> Self {
+        Self {
+            coefficients: gaussian_elimination_vandermonde(evals),
+        }
+    }
+
+    /// Interpolates from evaluations at `[0, 2, 3, ..., n-1]` with the hint `p(0) + p(1)`.
+    ///
+    /// Recovers `p(1) = hint - p(0)` and then interpolates over the full set `{0, 1, ..., n-1}`.
+    pub fn from_evals_and_hint(hint: F, evals: &[F]) -> Self {
+        let mut full = evals.to_vec();
+        let eval_at_1 = hint - full[0];
+        full.insert(1, eval_at_1);
+        Self::from_evals(&full)
+    }
+
+    /// Interpolates from evaluations at `[0, 1, ..., degree-1, ∞]`.
+    ///
+    /// The last entry is interpreted as the evaluation at infinity, i.e., the leading
+    /// coefficient of the polynomial. Uses Gaussian elimination on the augmented
+    /// Vandermonde-plus-infinity system.
+    pub fn from_evals_toom(evals: &[F]) -> Self {
+        let n = evals.len();
+        let mut matrix: Vec<Vec<F>> = Vec::with_capacity(n);
+
+        // Rows for finite x values: x = 0, 1, ..., n-2
+        for (i, &eval) in evals[..n - 1].iter().enumerate() {
+            let mut row = Vec::with_capacity(n + 1);
+            let x = F::from_u64(i as u64);
+            let mut power = F::one();
+            for _ in 0..n {
+                row.push(power);
+                power *= x;
+            }
+            row.push(eval);
+            matrix.push(row);
+        }
+
+        // Row for x = ∞: only the leading coefficient survives
+        let mut row = vec![F::zero(); n];
+        row[n - 1] = F::one();
+        row.push(evals[n - 1]);
+        matrix.push(row);
+
+        Self {
+            coefficients: gaussian_elimination_augmented(&mut matrix),
+        }
+    }
+
+    /// Computes the cubic polynomial `s(X) = l(X) * q(X)`, where `l(X)` is linear
+    /// and `q(X)` is quadratic, given partial information and a hint.
+    ///
+    /// - `linear_coeffs = [l(0), l(∞)]` (constant and leading coefficient)
+    /// - `quadratic_coeff_0 = q(0)` (constant)
+    /// - `quadratic_coeff_2 = q(∞)` (quadratic coefficient, i.e., leading coeff)
+    /// - `hint = s(0) + s(1)`
+    ///
+    /// Used by the split-eq evaluator to construct round polynomials.
+    pub fn from_linear_times_quadratic_with_hint(
+        linear_coeffs: [F; 2],
+        quadratic_coeff_0: F,
+        quadratic_coeff_2: F,
+        hint: F,
+    ) -> Self {
+        let linear_eval_one = linear_coeffs[0] + linear_coeffs[1];
+        let cubic_coeff_0 = linear_coeffs[0] * quadratic_coeff_0;
+
+        // s(0) + s(1) = l(0)*q(0) + l(1)*q(1) = hint
+        // l(1) = l(0) + l(∞) = linear_eval_one
+        // q(1) = q(0) + q(1_coeff) + q(2_coeff)
+        // Solve for the linear coefficient of q:
+        let quadratic_coeff_1 =
+            (hint - cubic_coeff_0) / linear_eval_one - quadratic_coeff_0 - quadratic_coeff_2;
+
+        // s(X) = (a + bX)(c + dX + eX^2) = ac + (ad+bc)X + (ae+bd)X^2 + beX^3
+        let coefficients = vec![
+            cubic_coeff_0,
+            linear_coeffs[0] * quadratic_coeff_1 + linear_coeffs[1] * quadratic_coeff_0,
+            linear_coeffs[0] * quadratic_coeff_2 + linear_coeffs[1] * quadratic_coeff_1,
+            linear_coeffs[1] * quadratic_coeff_2,
+        ];
+        Self { coefficients }
+    }
+
+    /// Returns `true` if all coefficients are zero (or the vector is empty).
+    pub fn is_zero(&self) -> bool {
+        self.coefficients.is_empty() || self.coefficients.iter().all(|c| *c == F::zero())
+    }
+
+    /// The leading (highest-degree) coefficient, or `None` for the zero polynomial.
+    pub fn leading_coefficient(&self) -> Option<&F> {
+        self.coefficients.last()
+    }
+
+    /// Polynomial long division: `self = quotient * divisor + remainder`.
+    ///
+    /// Returns `Some((quotient, remainder))`, or `None` if `divisor` is the
+    /// zero polynomial.
+    pub fn divide_with_remainder(&self, divisor: &Self) -> Option<(Self, Self)> {
+        if self.is_zero() {
+            return Some((Self::zero(), Self::zero()));
+        }
+        if divisor.is_zero() {
+            return None;
+        }
+        if self.coefficients.len() < divisor.coefficients.len() {
+            return Some((Self::zero(), self.clone()));
+        }
+
+        let divisor_leading_inv = divisor
+            .leading_coefficient()
+            .unwrap()
+            .inverse()
+            .expect("leading coefficient must be invertible");
+
+        let mut remainder = self.clone();
+        let mut quotient =
+            vec![F::zero(); self.coefficients.len() - divisor.coefficients.len() + 1];
+
+        while !remainder.is_zero() && remainder.coefficients.len() >= divisor.coefficients.len() {
+            let cur_q_coeff = *remainder.leading_coefficient().unwrap() * divisor_leading_inv;
+            let cur_q_degree = remainder.coefficients.len() - divisor.coefficients.len();
+            quotient[cur_q_degree] = cur_q_coeff;
+
+            for (i, div_coeff) in divisor.coefficients.iter().enumerate() {
+                remainder.coefficients[cur_q_degree + i] -= cur_q_coeff * *div_coeff;
+            }
+
+            // Strip trailing zeros
+            while remainder
+                .coefficients
+                .last()
+                .is_some_and(|c| *c == F::zero())
+            {
+                let _ = remainder.coefficients.pop();
+            }
+        }
+
+        Some((Self::new(quotient), remainder))
+    }
+}
+
+impl<F: Field> Neg for UnivariatePoly<F> {
+    type Output = Self;
+
+    fn neg(mut self) -> Self {
+        for c in &mut self.coefficients {
+            *c = -*c;
+        }
+        self
+    }
+}
+
+impl<F: Field> Add for UnivariatePoly<F> {
+    type Output = Self;
+
+    fn add(mut self, rhs: Self) -> Self {
+        self += &rhs;
+        self
+    }
+}
+
+impl<F: Field> Add for &UnivariatePoly<F> {
+    type Output = UnivariatePoly<F>;
+
+    fn add(self, rhs: Self) -> UnivariatePoly<F> {
+        let (longer, shorter) = if self.coefficients.len() >= rhs.coefficients.len() {
+            (&self.coefficients, &rhs.coefficients)
+        } else {
+            (&rhs.coefficients, &self.coefficients)
+        };
+        let mut coeffs = longer.clone();
+        for (a, b) in coeffs.iter_mut().zip(shorter) {
+            *a += *b;
+        }
+        UnivariatePoly::new(coeffs)
+    }
+}
+
+impl<F: Field> AddAssign<&Self> for UnivariatePoly<F> {
+    fn add_assign(&mut self, rhs: &Self) {
+        if rhs.coefficients.len() > self.coefficients.len() {
+            self.coefficients.resize(rhs.coefficients.len(), F::zero());
+        }
+        for (a, b) in self.coefficients.iter_mut().zip(&rhs.coefficients) {
+            *a += *b;
+        }
+    }
+}
+
+impl<F: Field> Sub for UnivariatePoly<F> {
+    type Output = Self;
+
+    fn sub(mut self, rhs: Self) -> Self {
+        self -= &rhs;
+        self
+    }
+}
+
+impl<F: Field> Sub for &UnivariatePoly<F> {
+    type Output = UnivariatePoly<F>;
+
+    fn sub(self, rhs: Self) -> UnivariatePoly<F> {
+        let max_len = self.coefficients.len().max(rhs.coefficients.len());
+        let mut coeffs = vec![F::zero(); max_len];
+        for (i, c) in self.coefficients.iter().enumerate() {
+            coeffs[i] += *c;
+        }
+        for (i, c) in rhs.coefficients.iter().enumerate() {
+            coeffs[i] -= *c;
+        }
+        UnivariatePoly::new(coeffs)
+    }
+}
+
+impl<F: Field> SubAssign<&Self> for UnivariatePoly<F> {
+    fn sub_assign(&mut self, rhs: &Self) {
+        if rhs.coefficients.len() > self.coefficients.len() {
+            self.coefficients.resize(rhs.coefficients.len(), F::zero());
+        }
+        for (a, b) in self.coefficients.iter_mut().zip(&rhs.coefficients) {
+            *a -= *b;
+        }
+    }
+}
+
+impl<F: Field> Mul<F> for UnivariatePoly<F> {
+    type Output = Self;
+
+    fn mul(mut self, rhs: F) -> Self {
+        self *= rhs;
+        self
+    }
+}
+
+impl<F: Field> Mul<F> for &UnivariatePoly<F> {
+    type Output = UnivariatePoly<F>;
+
+    fn mul(self, rhs: F) -> UnivariatePoly<F> {
+        UnivariatePoly::new(self.coefficients.iter().map(|c| *c * rhs).collect())
+    }
+}
+
+impl<F: Field> MulAssign<F> for UnivariatePoly<F> {
+    fn mul_assign(&mut self, rhs: F) {
+        for c in &mut self.coefficients {
+            *c *= rhs;
+        }
+    }
+}
+
+/// Gaussian elimination on a Vandermonde system for evaluations at `0, 1, ..., n-1`.
+fn gaussian_elimination_vandermonde<F: Field>(evals: &[F]) -> Vec<F> {
+    let n = evals.len();
+    let xs: Vec<F> = (0..n).map(|x| F::from_u64(x as u64)).collect();
+
+    let mut matrix: Vec<Vec<F>> = Vec::with_capacity(n);
+    for i in 0..n {
+        let mut row = Vec::with_capacity(n + 1);
+        let x = xs[i];
+        let mut power = F::one();
+        for _ in 0..n {
+            row.push(power);
+            power *= x;
+        }
+        row.push(evals[i]);
+        matrix.push(row);
+    }
+
+    gaussian_elimination_augmented(&mut matrix)
+}
+
+/// Gaussian elimination on an augmented matrix `[A | b]` where `A` is `n × n`.
+///
+/// Returns the solution vector `x` such that `A x = b`.
+fn gaussian_elimination_augmented<F: Field>(matrix: &mut [Vec<F>]) -> Vec<F> {
+    let size = matrix.len();
+    debug_assert_eq!(size, matrix[0].len() - 1);
+
+    // Forward elimination (row echelon form)
+    for i in 0..size.saturating_sub(1) {
+        for j in i..size - 1 {
+            if matrix[i][i] != F::zero() {
+                let factor = matrix[j + 1][i] / matrix[i][i];
+                #[allow(clippy::needless_range_loop)]
+                for k in i..=size {
+                    let tmp = matrix[i][k];
+                    matrix[j + 1][k] -= factor * tmp;
+                }
+            }
+        }
+    }
+
+    // Back substitution
+    for i in (1..size).rev() {
+        if matrix[i][i] != F::zero() {
+            for j in (1..=i).rev() {
+                let factor = matrix[j - 1][i] / matrix[i][i];
+                for k in (0..=size).rev() {
+                    let tmp = matrix[i][k];
+                    matrix[j - 1][k] -= factor * tmp;
+                }
+            }
+        }
+    }
+
+    let mut result = vec![F::zero(); size];
+    for i in 0..size {
+        result[i] = matrix[i][size] / matrix[i][i];
+    }
+    result
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use jolt_field::Field;
+    use jolt_field::Fr;
+    use num_traits::{One, Zero};
+
+    #[test]
+    fn horner_known_polynomial() {
+        // p(x) = 3 + 2x + x^2
+        let p = UnivariatePoly::new(vec![Fr::from_u64(3), Fr::from_u64(2), Fr::from_u64(1)]);
+        assert_eq!(p.evaluate(Fr::from_u64(0)), Fr::from_u64(3));
+        assert_eq!(p.evaluate(Fr::from_u64(1)), Fr::from_u64(6));
+        assert_eq!(p.evaluate(Fr::from_u64(2)), Fr::from_u64(11));
+    }
+
+    #[test]
+    fn interpolate_round_trip() {
+        let points = vec![
+            (Fr::from_u64(0), Fr::from_u64(1)),
+            (Fr::from_u64(1), Fr::from_u64(4)),
+            (Fr::from_u64(2), Fr::from_u64(11)),
+        ];
+        let p = UnivariatePoly::interpolate(&points);
+
+        for &(x, y) in &points {
+            assert_eq!(p.evaluate(x), y);
+        }
+    }
+
+    #[test]
+    fn degree_is_correct() {
+        let p = UnivariatePoly::<Fr>::zero();
+        assert_eq!(p.degree(), 0);
+
+        let p = UnivariatePoly::new(vec![Fr::from_u64(5)]);
+        assert_eq!(p.degree(), 0);
+
+        let p = UnivariatePoly::new(vec![Fr::from_u64(1), Fr::from_u64(1), Fr::from_u64(1)]);
+        assert_eq!(p.degree(), 2);
+    }
+
+    #[test]
+    fn zero_polynomial_evaluates_to_zero() {
+        let p = UnivariatePoly::<Fr>::zero();
+        assert!(p.evaluate(Fr::from_u64(42)).is_zero());
+    }
+
+    #[test]
+    fn interpolate_linear() {
+        // (0, 1), (1, 3) -> p(x) = 1 + 2x
+        let points = vec![
+            (Fr::from_u64(0), Fr::from_u64(1)),
+            (Fr::from_u64(1), Fr::from_u64(3)),
+        ];
+        let p = UnivariatePoly::interpolate(&points);
+        assert_eq!(p.evaluate(Fr::from_u64(5)), Fr::from_u64(11));
+    }
+
+    #[test]
+    fn serde_round_trip() {
+        let p = UnivariatePoly::new(vec![Fr::from_u64(3), Fr::from_u64(2), Fr::from_u64(1)]);
+        let bytes = bincode::serde::encode_to_vec(&p, bincode::config::standard()).unwrap();
+        let recovered: UnivariatePoly<Fr> =
+            bincode::serde::decode_from_slice(&bytes, bincode::config::standard())
+                .unwrap()
+                .0;
+        assert_eq!(p, recovered);
+    }
+
+    #[test]
+    fn serde_round_trip_zero() {
+        let p = UnivariatePoly::<Fr>::zero();
+        let bytes = bincode::serde::encode_to_vec(&p, bincode::config::standard()).unwrap();
+        let recovered: UnivariatePoly<Fr> =
+            bincode::serde::decode_from_slice(&bytes, bincode::config::standard())
+                .unwrap()
+                .0;
+        assert_eq!(p, recovered);
+    }
+
+    #[test]
+    fn basis_is_one_at_own_index() {
+        let n = 5;
+        for i in 0..n {
+            let val = UnivariatePoly::<Fr>::evaluate_basis(n, i, Fr::from_u64(i as u64));
+            assert_eq!(val, Fr::one(), "L_{i}({i}) should be 1");
+        }
+    }
+
+    #[test]
+    fn basis_is_zero_at_other_indices() {
+        let n = 5;
+        for i in 0..n {
+            for j in 0..n {
+                if i == j {
+                    continue;
+                }
+                let val = UnivariatePoly::<Fr>::evaluate_basis(n, i, Fr::from_u64(j as u64));
+                assert!(val.is_zero(), "L_{i}({j}) should be 0");
+            }
+        }
+    }
+
+    #[test]
+    fn interpolate_over_integers_matches_evaluations() {
+        let evals: Vec<Fr> = vec![
+            Fr::from_u64(7),
+            Fr::from_u64(3),
+            Fr::from_u64(11),
+            Fr::from_u64(2),
+        ];
+        let poly = UnivariatePoly::interpolate_over_integers(&evals);
+
+        for (i, &expected) in evals.iter().enumerate() {
+            let x = Fr::from_u64(i as u64);
+            assert_eq!(poly.evaluate(x), expected, "mismatch at x={i}");
+        }
+    }
+
+    #[test]
+    fn interpolate_over_integers_constant() {
+        let c = Fr::from_u64(42);
+        let evals = vec![c; 4];
+        let poly = UnivariatePoly::interpolate_over_integers(&evals);
+        assert_eq!(poly.evaluate(Fr::from_u64(100)), c);
+    }
+
+    #[test]
+    fn interpolate_single_point_constant() {
+        let c = Fr::from_u64(7);
+        let poly = UnivariatePoly::interpolate(&[(Fr::from_u64(0), c)]);
+        // Degree-0 polynomial: evaluates to c everywhere
+        assert_eq!(poly.evaluate(Fr::from_u64(0)), c);
+        assert_eq!(poly.evaluate(Fr::from_u64(99)), c);
+        assert_eq!(poly.degree(), 0);
+    }
+
+    #[test]
+    fn compress_then_evaluate_with_hint() {
+        // p(x) = 1 + 3x + 2x^2  =>  p(0)=1, p(1)=6
+        let p = UnivariatePoly::new(vec![Fr::from_u64(1), Fr::from_u64(3), Fr::from_u64(2)]);
+        let hint = p.evaluate(Fr::zero()) + p.evaluate(Fr::one());
+
+        let compressed = p.compress();
+        let x = Fr::from_u64(5);
+        assert_eq!(compressed.evaluate_with_hint(hint, x), p.evaluate(x));
+    }
+
+    #[test]
+    fn add_polynomials() {
+        // (1 + 2x) + (3 + x + 5x^2) = 4 + 3x + 5x^2
+        let a = UnivariatePoly::new(vec![Fr::from_u64(1), Fr::from_u64(2)]);
+        let b = UnivariatePoly::new(vec![Fr::from_u64(3), Fr::from_u64(1), Fr::from_u64(5)]);
+        let sum = &a + &b;
+        assert_eq!(
+            sum,
+            UnivariatePoly::new(vec![Fr::from_u64(4), Fr::from_u64(3), Fr::from_u64(5)])
+        );
+    }
+
+    #[test]
+    fn add_assign_extends_shorter() {
+        let mut a = UnivariatePoly::new(vec![Fr::from_u64(1)]);
+        let b = UnivariatePoly::new(vec![Fr::from_u64(2), Fr::from_u64(3), Fr::from_u64(4)]);
+        a += &b;
+        assert_eq!(
+            a,
+            UnivariatePoly::new(vec![Fr::from_u64(3), Fr::from_u64(3), Fr::from_u64(4)])
+        );
+    }
+
+    #[test]
+    fn sub_polynomials() {
+        let a = UnivariatePoly::new(vec![Fr::from_u64(5), Fr::from_u64(3)]);
+        let b = UnivariatePoly::new(vec![Fr::from_u64(2), Fr::from_u64(1)]);
+        let diff = a - b;
+        assert_eq!(
+            diff,
+            UnivariatePoly::new(vec![Fr::from_u64(3), Fr::from_u64(2)])
+        );
+    }
+
+    #[test]
+    fn neg_polynomial() {
+        let p = UnivariatePoly::new(vec![Fr::from_u64(1), Fr::from_u64(2)]);
+        let neg_p = -p.clone();
+        let sum = p + neg_p;
+        assert!(sum.is_zero());
+    }
+
+    #[test]
+    fn scalar_mul() {
+        // (1 + 2x) * 3 = 3 + 6x
+        let p = UnivariatePoly::new(vec![Fr::from_u64(1), Fr::from_u64(2)]);
+        let scaled = &p * Fr::from_u64(3);
+        assert_eq!(
+            scaled,
+            UnivariatePoly::new(vec![Fr::from_u64(3), Fr::from_u64(6)])
+        );
+    }
+
+    #[test]
+    fn scalar_mul_assign() {
+        let mut p = UnivariatePoly::new(vec![Fr::from_u64(2), Fr::from_u64(4)]);
+        p *= Fr::from_u64(5);
+        assert_eq!(
+            p,
+            UnivariatePoly::new(vec![Fr::from_u64(10), Fr::from_u64(20)])
+        );
+    }
+
+    #[test]
+    fn add_then_scalar_mul_pattern() {
+        // Mimics sumcheck batching: batched += &(round_poly * coeff)
+        let mut batched = UnivariatePoly::<Fr>::zero();
+        let poly_a = UnivariatePoly::new(vec![Fr::from_u64(1), Fr::from_u64(2), Fr::from_u64(3)]);
+        let poly_b = UnivariatePoly::new(vec![Fr::from_u64(4), Fr::from_u64(5), Fr::from_u64(6)]);
+        let coeff_a = Fr::from_u64(2);
+        let coeff_b = Fr::from_u64(3);
+
+        batched += &(&poly_a * coeff_a);
+        batched += &(&poly_b * coeff_b);
+
+        // 2*(1+2x+3x^2) + 3*(4+5x+6x^2) = (2+12) + (4+15)x + (6+18)x^2
+        for x_val in 0..5u64 {
+            let x = Fr::from_u64(x_val);
+            let expected = poly_a.evaluate(x) * coeff_a + poly_b.evaluate(x) * coeff_b;
+            assert_eq!(batched.evaluate(x), expected, "mismatch at x={x_val}");
+        }
+    }
+
+    #[test]
+    fn divide_exact() {
+        // (x^2 - 1) / (x - 1) = (x + 1), remainder 0
+        let dividend = UnivariatePoly::new(vec![-Fr::one(), Fr::zero(), Fr::one()]);
+        let divisor = UnivariatePoly::new(vec![-Fr::one(), Fr::one()]);
+        let (q, r) = dividend.divide_with_remainder(&divisor).unwrap();
+        assert_eq!(q, UnivariatePoly::new(vec![Fr::one(), Fr::one()]));
+        assert!(r.is_zero());
+    }
+
+    #[test]
+    fn divide_with_remainder_nonzero() {
+        // (x^2 + 1) / (x - 1): quotient = x + 1, remainder = 2
+        let dividend = UnivariatePoly::new(vec![Fr::one(), Fr::zero(), Fr::one()]);
+        let divisor = UnivariatePoly::new(vec![-Fr::one(), Fr::one()]);
+        let (q, r) = dividend.divide_with_remainder(&divisor).unwrap();
+
+        // Verify: q * divisor + r == dividend
+        for x_val in 0..5u64 {
+            let x = Fr::from_u64(x_val);
+            assert_eq!(
+                q.evaluate(x) * divisor.evaluate(x) + r.evaluate(x),
+                dividend.evaluate(x),
+            );
+        }
+    }
+
+    #[test]
+    fn divide_by_zero_returns_none() {
+        let p = UnivariatePoly::new(vec![Fr::one()]);
+        assert!(p.divide_with_remainder(&UnivariatePoly::zero()).is_none());
+    }
+
+    #[test]
+    fn divide_lower_degree_returns_self_as_remainder() {
+        let dividend = UnivariatePoly::new(vec![Fr::from_u64(3)]);
+        let divisor = UnivariatePoly::new(vec![Fr::one(), Fr::one()]);
+        let (q, r) = dividend.divide_with_remainder(&divisor).unwrap();
+        assert!(q.is_zero());
+        assert_eq!(r, dividend);
+    }
+
+    #[test]
+    fn from_evals_quadratic() {
+        // p(x) = 2x^2 + 3x + 1 → p(0)=1, p(1)=6, p(2)=15
+        let evals = vec![Fr::from_u64(1), Fr::from_u64(6), Fr::from_u64(15)];
+        let poly = UnivariatePoly::from_evals(&evals);
+        assert_eq!(poly.coefficients[0], Fr::from_u64(1));
+        assert_eq!(poly.coefficients[1], Fr::from_u64(3));
+        assert_eq!(poly.coefficients[2], Fr::from_u64(2));
+    }
+
+    #[test]
+    fn from_evals_cubic() {
+        // p(x) = x^3 + 2x^2 + 3x + 1
+        let evals = vec![
+            Fr::from_u64(1),
+            Fr::from_u64(7),
+            Fr::from_u64(23),
+            Fr::from_u64(55),
+        ];
+        let poly = UnivariatePoly::from_evals(&evals);
+        assert_eq!(poly.coefficients[0], Fr::from_u64(1));
+        assert_eq!(poly.coefficients[1], Fr::from_u64(3));
+        assert_eq!(poly.coefficients[2], Fr::from_u64(2));
+        assert_eq!(poly.coefficients[3], Fr::from_u64(1));
+    }
+
+    #[test]
+    fn from_evals_matches_interpolate_over_integers() {
+        let evals: Vec<Fr> = vec![
+            Fr::from_u64(7),
+            Fr::from_u64(3),
+            Fr::from_u64(11),
+            Fr::from_u64(2),
+        ];
+        let via_evals = UnivariatePoly::from_evals(&evals);
+        let via_lagrange = UnivariatePoly::interpolate_over_integers(&evals);
+
+        for i in 0..10u64 {
+            let x = Fr::from_u64(i);
+            assert_eq!(
+                via_evals.evaluate(x),
+                via_lagrange.evaluate(x),
+                "mismatch at x={i}"
+            );
+        }
+    }
+
+    #[test]
+    fn from_evals_and_hint() {
+        // p(x) = 2x^2 + 3x + 1 → p(0)=1, p(1)=6
+        // hint = p(0) + p(1) = 7
+        // Given evals at [0, 2] = [1, 15], recover p(1) = 7 - 1 = 6
+        let hint = Fr::from_u64(7);
+        let evals = vec![Fr::from_u64(1), Fr::from_u64(15)];
+        let poly = UnivariatePoly::from_evals_and_hint(hint, &evals);
+
+        assert_eq!(poly.evaluate(Fr::from_u64(0)), Fr::from_u64(1));
+        assert_eq!(poly.evaluate(Fr::from_u64(1)), Fr::from_u64(6));
+        assert_eq!(poly.evaluate(Fr::from_u64(2)), Fr::from_u64(15));
+    }
+
+    #[test]
+    fn from_evals_toom_cubic() {
+        // p(x) = 9x^3 + 3x^2 + x + 5
+        let gt_poly = UnivariatePoly::new(vec![
+            Fr::from_u64(5),
+            Fr::from_u64(1),
+            Fr::from_u64(3),
+            Fr::from_u64(9),
+        ]);
+        let degree = 3;
+        let mut toom_evals: Vec<Fr> = (0..degree)
+            .map(|x| gt_poly.evaluate(Fr::from_u64(x)))
+            .collect();
+        // eval at ∞ = leading coefficient
+        toom_evals.push(*gt_poly.coefficients().last().unwrap());
+
+        let poly = UnivariatePoly::from_evals_toom(&toom_evals);
+        assert_eq!(gt_poly, poly);
+    }
+
+    #[test]
+    fn from_linear_times_quadratic_with_hint() {
+        // s(x) = (x + 1) * (x^2 + 2x + 3) = x^3 + 3x^2 + 5x + 3
+        // hint = s(0) + s(1) = 3 + 12 = 15
+        let linear_coeffs = [Fr::from_u64(1), Fr::from_u64(1)];
+        let q0 = Fr::from_u64(3);
+        let q2 = Fr::from_u64(1);
+        let hint = Fr::from_u64(15);
+        let poly =
+            UnivariatePoly::from_linear_times_quadratic_with_hint(linear_coeffs, q0, q2, hint);
+
+        let expected = UnivariatePoly::new(vec![
+            Fr::from_u64(3),
+            Fr::from_u64(5),
+            Fr::from_u64(3),
+            Fr::from_u64(1),
+        ]);
+        assert_eq!(poly, expected);
+    }
+}
diff --git a/crates/jolt-poly/tests/integration.rs b/crates/jolt-poly/tests/integration.rs
new file mode 100644
index 000000000..4607fbc5e
--- /dev/null
+++ b/crates/jolt-poly/tests/integration.rs
@@ -0,0 +1,302 @@
+//! Cross-type integration tests for jolt-poly.
+//!
+//! These tests verify composition patterns between polynomial types
+//! (Polynomial, EqPolynomial, UnivariatePoly, IdentityPolynomial, RlcSource)
+//! that are used throughout the proving system.
+
+use jolt_field::{Field, Fr};
+use jolt_poly::{
+    EqPolynomial, IdentityPolynomial, MultilinearPoly, Polynomial, RlcSource, UnivariatePoly,
+};
+use rand_chacha::ChaCha20Rng;
+use rand_core::SeedableRng;
+
+// Polynomial ↔ EqPolynomial: the fundamental MLE identity
+
+/// ⟨f, eq(·, r)⟩ = f̃(r) for any multilinear f and point r.
+#[test]
+fn inner_product_with_eq_is_evaluation() {
+    let mut rng = ChaCha20Rng::seed_from_u64(1000);
+    for nv in 1..=6 {
+        let poly = Polynomial::<Fr>::random(nv, &mut rng);
+        let point: Vec<Fr> = (0..nv).map(|_| Fr::random(&mut rng)).collect();
+
+        let expected = poly.evaluate(&point);
+
+        let eq_evals = EqPolynomial::new(point).evaluations();
+        let inner: Fr = poly
+            .evaluations()
+            .iter()
+            .zip(eq_evals.iter())
+            .map(|(a, b)| *a * *b)
+            .sum();
+
+        assert_eq!(inner, expected, "nv={nv}: inner product ≠ evaluate");
+    }
+}
+
+// Sequential binding converges to evaluate
+
+/// Binding all variables one-by-one yields the same result as evaluate.
+#[test]
+fn sequential_bind_equals_evaluate() {
+    let mut rng = ChaCha20Rng::seed_from_u64(2000);
+    for nv in 1..=5 {
+        let poly = Polynomial::<Fr>::random(nv, &mut rng);
+        let point: Vec<Fr> = (0..nv).map(|_| Fr::random(&mut rng)).collect();
+
+        let expected = poly.evaluate(&point);
+
+        let mut working = poly.clone();
+        for &r in &point {
+            working.bind(r);
+        }
+        assert_eq!(working.len(), 1);
+        assert_eq!(working.evaluations()[0], expected, "nv={nv}");
+    }
+}
+
+// Compact polynomial promotion
+
+/// Polynomial<u8>::bind_to_field agrees with Polynomial<Fr> built from the same data.
+#[test]
+fn compact_u8_bind_matches_field_bind() {
+    let mut rng = ChaCha20Rng::seed_from_u64(3000);
+    let nv = 4;
+    let data: Vec<u8> = (0..1 << nv).map(|i| (i * 37 + 13) as u8).collect();
+    let scalar = Fr::random(&mut rng);
+
+    let compact = Polynomial::new(data.clone());
+    let promoted = compact.bind_to_field::<Fr>(scalar);
+
+    let field_poly: Polynomial<Fr> =
+        Polynomial::new(data.iter().map(|&x| Fr::from_u64(x as u64)).collect());
+    let mut expected = field_poly;
+    expected.bind(scalar);
+
+    assert_eq!(
+        promoted.evaluations(),
+        expected.evaluations(),
+        "compact bind_to_field must match field bind"
+    );
+}
+
+// UnivariatePoly interpolation
+
+/// Lagrange interpolation recovers the original polynomial at domain points.
+#[test]
+fn univariate_interpolation_recovers_points() {
+    let mut rng = ChaCha20Rng::seed_from_u64(4000);
+    let degree = 5;
+    let points: Vec<(Fr, Fr)> = (0..=degree)
+        .map(|i| (Fr::from_u64(i as u64), Fr::random(&mut rng)))
+        .collect();
+
+    let poly = UnivariatePoly::interpolate(&points);
+
+    for (x, y) in &points {
+        let eval = poly.evaluate(*x);
+        assert_eq!(eval, *y, "interpolation must recover point at x={x:?}");
+    }
+}
+
+/// Interpolation over integers matches evaluate at integer domain.
+#[test]
+fn univariate_interpolation_over_integers() {
+    let evals = vec![
+        Fr::from_u64(1),
+        Fr::from_u64(4),
+        Fr::from_u64(9),
+        Fr::from_u64(16),
+    ];
+    let poly = UnivariatePoly::interpolate_over_integers(&evals);
+
+    for (i, expected) in evals.iter().enumerate() {
+        let eval = poly.evaluate(Fr::from_u64(i as u64));
+        assert_eq!(&eval, expected, "mismatch at domain point {i}");
+    }
+}
+
+// CompressedPoly round-trip
+
+/// compress → decompress preserves the polynomial.
+#[test]
+fn compressed_round_trip() {
+    let mut rng = ChaCha20Rng::seed_from_u64(5000);
+    let coeffs: Vec<Fr> = (0..5).map(|_| Fr::random(&mut rng)).collect();
+    let original = UnivariatePoly::new(coeffs);
+    let hint = original.evaluate(Fr::from_u64(0)) + original.evaluate(Fr::from_u64(1));
+
+    let compressed = original.compress();
+    let recovered = compressed.decompress(hint);
+
+    // Check evaluation at several points
+    for i in 0..10 {
+        let x = Fr::from_u64(i);
+        assert_eq!(
+            original.evaluate(x),
+            recovered.evaluate(x),
+            "compress/decompress mismatch at x={i}"
+        );
+    }
+}
+
+/// CompressedPoly::evaluate_with_hint matches the original polynomial.
+#[test]
+fn compressed_evaluate_with_hint() {
+    let mut rng = ChaCha20Rng::seed_from_u64(5001);
+    let coeffs: Vec<Fr> = (0..4).map(|_| Fr::random(&mut rng)).collect();
+    let poly = UnivariatePoly::new(coeffs);
+    let hint = poly.evaluate(Fr::from_u64(0)) + poly.evaluate(Fr::from_u64(1));
+    let compressed = poly.compress();
+
+    for i in 0..8 {
+        let x = Fr::from_u64(i);
+        assert_eq!(
+            poly.evaluate(x),
+            compressed.evaluate_with_hint(hint, x),
+            "evaluate_with_hint mismatch at x={i}"
+        );
+    }
+}
+
+// IdentityPolynomial
+
+/// IdentityPolynomial maps Boolean hypercube points to their integer index.
+#[test]
+fn identity_polynomial_boolean_indexing() {
+    let nv = 4;
+    let id = IdentityPolynomial::new(nv);
+
+    for idx in 0..(1 << nv) {
+        let bits: Vec<Fr> = (0..nv)
+            .map(|j| {
+                if (idx >> (nv - 1 - j)) & 1 == 1 {
+                    Fr::from_u64(1)
+                } else {
+                    Fr::from_u64(0)
+                }
+            })
+            .collect();
+        let eval = id.evaluate::<Fr>(&bits);
+        assert_eq!(eval, Fr::from_u64(idx as u64), "identity at index {idx}");
+    }
+}
+
+/// IdentityPolynomial at a random point matches manual computation.
+#[test]
+fn identity_polynomial_random_point() {
+    let mut rng = ChaCha20Rng::seed_from_u64(6000);
+    let nv = 5;
+    let id = IdentityPolynomial::new(nv);
+    let point: Vec<Fr> = (0..nv).map(|_| Fr::random(&mut rng)).collect();
+
+    let eval = id.evaluate::<Fr>(&point);
+
+    // Manual: sum_i r_i * 2^(n-1-i)
+    let expected: Fr = point
+        .iter()
+        .enumerate()
+        .map(|(i, r)| *r * Fr::from_u64(1u64 << (nv - 1 - i)))
+        .sum();
+
+    assert_eq!(eval, expected);
+}
+
+// RlcSource: lazy random linear combination
+
+/// RlcSource evaluation matches materializing and linearly combining.
+#[test]
+fn rlc_source_matches_materialized_combination() {
+    let mut rng = ChaCha20Rng::seed_from_u64(7000);
+    let nv = 3;
+    let num_polys = 4;
+
+    let polys: Vec<Polynomial<Fr>> = (0..num_polys)
+        .map(|_| Polynomial::<Fr>::random(nv, &mut rng))
+        .collect();
+    let scalars: Vec<Fr> = (0..num_polys).map(|_| Fr::random(&mut rng)).collect();
+    let point: Vec<Fr> = (0..nv).map(|_| Fr::random(&mut rng)).collect();
+
+    // Materialized: sum_i scalar_i * poly_i.evaluate(point)
+    let expected: Fr = polys
+        .iter()
+        .zip(scalars.iter())
+        .map(|(p, s)| *s * p.evaluate(&point))
+        .sum();
+
+    // Lazy via RlcSource
+    let rlc = RlcSource::new(polys, scalars);
+    let actual = rlc.evaluate(&point);
+
+    assert_eq!(actual, expected);
+}
+
+// Polynomial arithmetic
+
+/// Addition is commutative: a + b == b + a.
+#[test]
+fn polynomial_addition_commutative() {
+    let mut rng = ChaCha20Rng::seed_from_u64(8000);
+    let nv = 4;
+    let a = Polynomial::<Fr>::random(nv, &mut rng);
+    let b = Polynomial::<Fr>::random(nv, &mut rng);
+
+    let ab = a.clone() + b.clone();
+    let ba = b + a;
+    assert_eq!(ab.evaluations(), ba.evaluations());
+}
+
+/// Scalar multiplication distributes over addition: s*(a+b) == s*a + s*b.
+#[test]
+fn scalar_mul_distributes_over_addition() {
+    let mut rng = ChaCha20Rng::seed_from_u64(8001);
+    let nv = 3;
+    let a = Polynomial::<Fr>::random(nv, &mut rng);
+    let b = Polynomial::<Fr>::random(nv, &mut rng);
+    let s = Fr::random(&mut rng);
+
+    let sum_then_scale = (a.clone() + b.clone()) * s;
+    let scale_then_sum = a * s + b * s;
+    assert_eq!(sum_then_scale.evaluations(), scale_then_sum.evaluations());
+}
+
+// Serialization
+
+/// bincode round-trip preserves a Polynomial<Fr>.
+#[test]
+fn polynomial_bincode_round_trip() {
+    let mut rng = ChaCha20Rng::seed_from_u64(9000);
+    let nv = 5;
+    let poly = Polynomial::<Fr>::random(nv, &mut rng);
+
+    let bytes =
+        bincode::serde::encode_to_vec(&poly, bincode::config::standard()).expect("serialize");
+    let recovered: Polynomial<Fr> =
+        bincode::serde::decode_from_slice(&bytes, bincode::config::standard())
+            .expect("deserialize")
+            .0;
+
+    assert_eq!(poly.evaluations(), recovered.evaluations());
+    assert_eq!(poly.num_vars(), recovered.num_vars());
+}
+
+/// bincode round-trip preserves UnivariatePoly<Fr>.
+#[test]
+fn univariate_bincode_round_trip() {
+    let mut rng = ChaCha20Rng::seed_from_u64(9001);
+    let coeffs: Vec<Fr> = (0..6).map(|_| Fr::random(&mut rng)).collect();
+    let poly = UnivariatePoly::new(coeffs);
+
+    let bytes =
+        bincode::serde::encode_to_vec(&poly, bincode::config::standard()).expect("serialize");
+    let recovered: UnivariatePoly<Fr> =
+        bincode::serde::decode_from_slice(&bytes, bincode::config::standard())
+            .expect("deserialize")
+            .0;
+
+    for i in 0..10 {
+        let x = Fr::from_u64(i);
+        assert_eq!(poly.evaluate(x), recovered.evaluate(x));
+    }
+}
diff --git a/crates/jolt-profiling/Cargo.toml b/crates/jolt-profiling/Cargo.toml
new file mode 100644
index 000000000..53a2c8329
--- /dev/null
+++ b/crates/jolt-profiling/Cargo.toml
@@ -0,0 +1,53 @@
+[package]
+name = "jolt-profiling"
+version = "0.1.0"
+authors = ["Jolt Contributors"]
+edition = "2021"
+description = "Profiling and tracing infrastructure for the Jolt proving system"
+license = "MIT OR Apache-2.0"
+repository = "https://github.com/a16z/jolt"
+keywords = ["profiling", "tracing", "performance"]
+categories = ["development-tools::profiling"]
+publish = false
+
+[features]
+default = []
+monitor = ["dep:sysinfo"]
+pprof = ["dep:pprof", "dep:prost"]
+allocative = ["dep:inferno", "dep:allocative"]
+
+[dependencies]
+tracing.workspace = true
+tracing-chrome.workspace = true
+tracing-subscriber.workspace = true
+
+[target.'cfg(not(target_arch = "wasm32"))'.dependencies]
+memory-stats.workspace = true
+
+# Optional: system metrics monitoring
+[target.'cfg(not(target_arch = "wasm32"))'.dependencies.sysinfo]
+version = "0.37"
+optional = true
+
+# Optional: CPU profiling via pprof
+[target.'cfg(not(target_arch = "wasm32"))'.dependencies.pprof]
+version = "0.15"
+features = ["prost-codec", "flamegraph", "frame-pointer"]
+optional = true
+
+[target.'cfg(not(target_arch = "wasm32"))'.dependencies.prost]
+version = "0.14"
+optional = true
+
+# Optional: heap flamegraphs
+[dependencies.inferno]
+workspace = true
+optional = true
+
+[dependencies.allocative]
+workspace = true
+optional = true
+
+[package.metadata.cargo-machete]
+# prost is required by pprof's prost-codec feature but not directly imported
+ignored = ["prost"]
diff --git a/crates/jolt-profiling/README.md b/crates/jolt-profiling/README.md
new file mode 100644
index 000000000..0654a9000
--- /dev/null
+++ b/crates/jolt-profiling/README.md
@@ -0,0 +1,62 @@
+# jolt-profiling
+
+Profiling and tracing infrastructure for the Jolt proving system.
+
+Part of the [Jolt](https://github.com/a16z/jolt) zkVM.
+
+## Overview
+
+Provides a unified interface for performance analysis across all Jolt crates. Individual library crates instrument their functions with `#[tracing::instrument]`; the host binary depends on `jolt-profiling` to configure the subscriber that captures those spans.
+
+## Public API
+
+### Tracing Setup
+
+- **`setup_tracing(formats, trace_name)`** — Initializes the global tracing subscriber. Supports console output (`Default`) and Perfetto/Chrome JSON traces (`Chrome`). Returns flush guards that must be kept alive.
+- **`TracingFormat`** — Output format enum: `Default` (console), `Chrome` (Perfetto JSON).
+
+### Memory Profiling
+
+- **`start_memory_tracing_span(label)` / `end_memory_tracing_span(label)`** — Tracks physical memory deltas across labeled code regions.
+- **`report_memory_usage()`** — Logs all collected memory deltas and warns about unclosed spans.
+- **`print_current_memory_usage(label)`** — Logs current physical memory at point of call.
+
+### System Metrics (`monitor` feature)
+
+- **`MetricsMonitor::start(interval_secs)`** — Spawns a background thread sampling CPU usage, memory, active cores, and thread count. Outputs structured `counters.*` fields for Perfetto postprocessing.
+
+### CPU Profiling (`pprof` feature)
+
+- **`pprof_scope!(label)`** — Creates a scoped CPU profiler guard that writes a `.pb` flamegraph on drop.
+- **`PprofGuard`** — The underlying guard type (stub when `pprof` feature is off).
+
+### Heap Flamegraphs (`allocative` feature)
+
+- **`print_data_structure_heap_usage(label, data)`** — Logs heap size of `Allocative`-instrumented values.
+- **`write_flamegraph_svg(flamegraph, path)`** — Renders an `allocative::FlameGraphBuilder` to an SVG file.
+
+## Feature Flags
+
+| Flag | Description |
+|------|-------------|
+| `monitor` | Background system metrics sampling (CPU, memory, cores) |
+| `pprof` | Scoped CPU profiling via `pprof` with `.pb` output |
+| `allocative` | Heap flamegraph generation from `allocative`-instrumented types |
+
+## Dependency Position
+
+```
+tracing ─┐
+tracing-chrome ─┤
+tracing-subscriber ─┼─► jolt-profiling
+memory-stats ─┤
+sysinfo (opt) ─┤
+pprof (opt) ─┤
+allocative (opt) ─┘
+```
+
+Imported by host binaries and benchmarks. Library crates depend only on `tracing`.
+
+## License
+
+MIT OR Apache-2.0
diff --git a/crates/jolt-profiling/src/flamegraph.rs b/crates/jolt-profiling/src/flamegraph.rs
new file mode 100644
index 000000000..1ce95a9ca
--- /dev/null
+++ b/crates/jolt-profiling/src/flamegraph.rs
@@ -0,0 +1,55 @@
+//! Heap flamegraph generation from `allocative`-instrumented data structures.
+
+use std::{fs::File, io::Cursor, path::Path};
+
+use allocative::{Allocative, FlameGraphBuilder};
+use inferno::flamegraph::Options;
+
+use crate::units::{format_memory_size, BYTES_PER_GIB};
+
+/// Logs the heap allocation size of an `Allocative`-instrumented value.
+pub fn print_data_structure_heap_usage<T: Allocative>(label: &str, data: &T) {
+    if tracing::enabled!(tracing::Level::DEBUG) {
+        let memory_gib = allocative::size_of_unique_allocated_data(data) as f64 / BYTES_PER_GIB;
+        tracing::debug!(
+            label = label,
+            usage = %format_memory_size(memory_gib),
+            "heap allocation size"
+        );
+    }
+}
+
+/// Renders a [`FlameGraphBuilder`] to an SVG flamegraph file.
+///
+/// Uses `inferno` for rendering with MiB units and flame-chart mode.
+/// Logs a warning and returns on I/O failure instead of panicking.
+pub fn write_flamegraph_svg<P: AsRef<Path>>(flamegraph: FlameGraphBuilder, path: P) {
+    let mut opts = Options::default();
+    opts.color_diffusion = true;
+    opts.count_name = String::from("MiB");
+    opts.factor = 1.0 / BYTES_PER_GIB * 1024.0;
+    opts.flame_chart = true;
+
+    let flamegraph_src = flamegraph.finish_and_write_flame_graph();
+    let input = Cursor::new(flamegraph_src);
+
+    let output = match File::create(path.as_ref()) {
+        Ok(f) => f,
+        Err(e) => {
+            tracing::warn!(
+                path = %path.as_ref().display(),
+                error = %e,
+                "failed to create flamegraph SVG file"
+            );
+            return;
+        }
+    };
+
+    if let Err(e) = inferno::flamegraph::from_reader(&mut opts, input, output) {
+        tracing::warn!(
+            path = %path.as_ref().display(),
+            error = %e,
+            "failed to render flamegraph SVG"
+        );
+    }
+}
diff --git a/crates/jolt-profiling/src/lib.rs b/crates/jolt-profiling/src/lib.rs
new file mode 100644
index 000000000..e55dc533c
--- /dev/null
+++ b/crates/jolt-profiling/src/lib.rs
@@ -0,0 +1,59 @@
+//! Profiling and tracing infrastructure for the Jolt proving system.
+//!
+//! Provides a unified interface for performance analysis across all Jolt crates:
+//!
+//! - **Tracing subscriber setup** — configures `tracing-chrome` (Perfetto/Chrome JSON)
+//!   and `tracing-subscriber` (console output) for the host binary.
+//! - **Memory profiling** — tracks memory deltas across proving stages via `memory-stats`.
+//! - **System metrics monitoring** (`monitor` feature) — background thread sampling
+//!   CPU usage, memory, active cores, and thread count. Outputs structured counter events
+//!   compatible with the Perfetto postprocessing script.
+//! - **CPU profiling** (`pprof` feature) — scoped `pprof` guards that write `.pb`
+//!   flamegraph files on drop.
+//! - **Heap flamegraphs** (`allocative` feature) — generates SVG flamegraphs from
+//!   `allocative`-instrumented data structures.
+//!
+//! # Usage
+//!
+//! Individual crates add `tracing` as a dependency and instrument their functions with
+//! `#[tracing::instrument]`. The host binary (e.g. `jolt-zkvm` CLI) depends on
+//! `jolt-profiling` to configure the subscriber that captures those spans.
+//!
+//! ```no_run
+//! use jolt_profiling::{setup_tracing, TracingFormat};
+//!
+//! let _guards = setup_tracing(
+//!     &[TracingFormat::Chrome],
+//!     "my_benchmark_20260306",
+//! );
+//! // All tracing spans from any Jolt crate now flow to Perfetto JSON output.
+//! ```
+
+pub mod setup;
+
+#[cfg(not(target_arch = "wasm32"))]
+pub mod memory;
+
+#[cfg(all(not(target_arch = "wasm32"), feature = "monitor"))]
+pub mod monitor;
+
+mod pprof_guard;
+
+#[cfg(feature = "allocative")]
+pub mod flamegraph;
+
+mod units;
+
+pub use setup::{setup_tracing, TracingFormat, TracingGuards};
+pub use units::{format_memory_size, BYTES_PER_GIB, BYTES_PER_MIB};
+
+#[cfg(not(target_arch = "wasm32"))]
+pub use memory::{
+    end_memory_tracing_span, print_current_memory_usage, report_memory_usage,
+    start_memory_tracing_span,
+};
+
+#[cfg(all(not(target_arch = "wasm32"), feature = "monitor"))]
+pub use monitor::MetricsMonitor;
+
+pub use pprof_guard::PprofGuard;
diff --git a/crates/jolt-profiling/src/memory.rs b/crates/jolt-profiling/src/memory.rs
new file mode 100644
index 000000000..507893605
--- /dev/null
+++ b/crates/jolt-profiling/src/memory.rs
@@ -0,0 +1,125 @@
+//! Memory profiling utilities.
+//!
+//! Tracks physical memory deltas across labeled spans. Call
+//! [`start_memory_tracing_span`] before the section and
+//! [`end_memory_tracing_span`] after, then [`report_memory_usage`] to
+//! log all collected deltas.
+
+use memory_stats::memory_stats;
+use std::{
+    collections::BTreeMap,
+    sync::{LazyLock, Mutex},
+};
+
+use crate::units::{format_memory_size, BYTES_PER_GIB};
+
+static MEMORY_USAGE_MAP: LazyLock<Mutex<BTreeMap<&'static str, f64>>> =
+    LazyLock::new(|| Mutex::new(BTreeMap::new()));
+static MEMORY_DELTA_MAP: LazyLock<Mutex<BTreeMap<&'static str, f64>>> =
+    LazyLock::new(|| Mutex::new(BTreeMap::new()));
+
+/// Records the current physical memory usage at the start of a labeled span.
+///
+/// Logs a warning and returns without recording if memory stats are unavailable.
+///
+/// # Panics
+///
+/// Panics if a span with the same label is already open (nested spans need distinct labels).
+pub fn start_memory_tracing_span(label: &'static str) {
+    let Some(stats) = memory_stats() else {
+        tracing::warn!(
+            span = label,
+            "memory stats unavailable, skipping span start"
+        );
+        return;
+    };
+    let memory_gib = stats.physical_mem as f64 / BYTES_PER_GIB;
+    let mut map = MEMORY_USAGE_MAP.lock().unwrap();
+    assert_eq!(
+        map.insert(label, memory_gib),
+        None,
+        "duplicate memory span label: {label}"
+    );
+}
+
+/// Closes a labeled memory span and records the memory delta (in GiB).
+///
+/// Logs a warning and returns without recording if memory stats are unavailable.
+///
+/// # Panics
+///
+/// Panics if no span with the given label was previously opened.
+pub fn end_memory_tracing_span(label: &'static str) {
+    let Some(stats) = memory_stats() else {
+        tracing::warn!(span = label, "memory stats unavailable, skipping span end");
+        return;
+    };
+    let memory_gib_end = stats.physical_mem as f64 / BYTES_PER_GIB;
+    let mut memory_usage_map = MEMORY_USAGE_MAP.lock().unwrap();
+    let memory_gib_start = memory_usage_map
+        .remove(label)
+        .unwrap_or_else(|| panic!("no open memory span: {label}"));
+
+    let delta = memory_gib_end - memory_gib_start;
+    let mut memory_delta_map = MEMORY_DELTA_MAP.lock().unwrap();
+    assert_eq!(memory_delta_map.insert(label, delta), None);
+}
+
+/// Logs all collected memory deltas and warns about any unclosed spans.
+pub fn report_memory_usage() {
+    let memory_usage_map = MEMORY_USAGE_MAP.lock().unwrap();
+    for label in memory_usage_map.keys() {
+        tracing::warn!(span = label, "unclosed memory tracing span");
+    }
+
+    let memory_delta_map = MEMORY_DELTA_MAP.lock().unwrap();
+    for (label, delta) in memory_delta_map.iter() {
+        tracing::info!(
+            span = label,
+            delta = %format_memory_size(*delta),
+            "memory delta"
+        );
+    }
+}
+
+/// Logs the current physical memory usage at the point of call.
+pub fn print_current_memory_usage(label: &str) {
+    if tracing::enabled!(tracing::Level::DEBUG) {
+        if let Some(usage) = memory_stats() {
+            let memory_gib = usage.physical_mem as f64 / BYTES_PER_GIB;
+            tracing::debug!(
+                label = label,
+                usage = %format_memory_size(memory_gib),
+                "current memory usage"
+            );
+        } else {
+            tracing::debug!(label = label, "memory stats unavailable");
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn memory_span_start_end_records_delta() {
+        start_memory_tracing_span("test_span_lifecycle");
+        end_memory_tracing_span("test_span_lifecycle");
+        let map = MEMORY_DELTA_MAP.lock().unwrap();
+        assert!(map.contains_key("test_span_lifecycle"));
+    }
+
+    #[test]
+    #[should_panic(expected = "duplicate memory span label")]
+    fn duplicate_span_label_panics() {
+        start_memory_tracing_span("test_span_dup");
+        start_memory_tracing_span("test_span_dup");
+    }
+
+    #[test]
+    #[should_panic(expected = "no open memory span")]
+    fn end_without_start_panics() {
+        end_memory_tracing_span("test_span_nonexistent");
+    }
+}
diff --git a/crates/jolt-profiling/src/monitor.rs b/crates/jolt-profiling/src/monitor.rs
new file mode 100644
index 000000000..5f3232dba
--- /dev/null
+++ b/crates/jolt-profiling/src/monitor.rs
@@ -0,0 +1,99 @@
+//! Background system metrics monitor.
+//!
+//! Spawns a thread that periodically samples CPU usage, memory, active cores,
+//! and thread count. Metrics are emitted as `tracing::debug!` events with
+//! structured `counters.*` fields, compatible with the Perfetto postprocessing
+//! script (`scripts/postprocess_trace.py`).
+
+use memory_stats::memory_stats;
+use std::sync::atomic::{AtomicBool, Ordering};
+use std::sync::Arc;
+use std::thread::{self, JoinHandle};
+use std::time::Duration;
+use sysinfo::System;
+
+use crate::units::BYTES_PER_GIB;
+
+/// Background monitor that samples system metrics at a fixed interval.
+///
+/// Drop the monitor to terminate the background thread. The destructor
+/// signals the thread and joins it.
+#[must_use = "monitor stops when dropped"]
+pub struct MetricsMonitor {
+    handle: Option<JoinHandle<()>>,
+    stop_flag: Arc<AtomicBool>,
+}
+
+impl MetricsMonitor {
+    /// Starts the monitor with the given sampling interval (in seconds).
+    ///
+    /// Spawns a background thread named `"metrics-monitor"` that logs:
+    /// - `counters.memory_gib` — physical memory usage
+    /// - `counters.cpu_percent` — global CPU utilization
+    /// - `counters.cores_active_avg` — average active cores
+    /// - `counters.cores_active` — cores with >0.1% usage
+    /// - `counters.thread_count` — active thread count (Linux only, 0 elsewhere)
+    pub fn start(interval_secs: f64) -> Self {
+        let stop_flag = Arc::new(AtomicBool::new(false));
+        let stop = stop_flag.clone();
+
+        let handle = thread::Builder::new()
+            .name("metrics-monitor".to_string())
+            .spawn(move || {
+                let interval = Duration::from_millis((interval_secs * 1000.0) as u64);
+                let mut system = System::new_all();
+
+                thread::sleep(sysinfo::MINIMUM_CPU_UPDATE_INTERVAL);
+
+                while !stop.load(Ordering::Relaxed) {
+                    system.refresh_all();
+
+                    let memory_gib = memory_stats()
+                        .map(|s| s.physical_mem as f64 / BYTES_PER_GIB)
+                        .unwrap_or(0.0);
+                    let cpu_percent = system.global_cpu_usage();
+                    let cores_active_avg = cpu_percent / 100.0 * (system.cpus().len() as f32);
+                    let active_cores = system
+                        .cpus()
+                        .iter()
+                        .filter(|cpu| cpu.cpu_usage() > 0.1)
+                        .count();
+
+                    #[cfg(target_os = "linux")]
+                    let active_threads = std::fs::read_dir("/proc/self/task")
+                        .map(|entries| entries.count())
+                        .unwrap_or(0);
+
+                    #[cfg(not(target_os = "linux"))]
+                    let active_threads = 0_usize;
+
+                    tracing::debug!(
+                        counters.memory_gib = memory_gib,
+                        counters.cpu_percent = cpu_percent,
+                        counters.cores_active_avg = cores_active_avg,
+                        counters.cores_active = active_cores,
+                        counters.thread_count = active_threads,
+                    );
+
+                    thread::sleep(interval);
+                }
+
+                tracing::info!("MetricsMonitor stopping");
+            })
+            .expect("Failed to spawn metrics monitor thread");
+
+        MetricsMonitor {
+            handle: Some(handle),
+            stop_flag,
+        }
+    }
+}
+
+impl Drop for MetricsMonitor {
+    fn drop(&mut self) {
+        self.stop_flag.store(true, Ordering::Relaxed);
+        if let Some(handle) = self.handle.take() {
+            let _ = handle.join();
+        }
+    }
+}
diff --git a/crates/jolt-profiling/src/pprof_guard.rs b/crates/jolt-profiling/src/pprof_guard.rs
new file mode 100644
index 000000000..f019d60ec
--- /dev/null
+++ b/crates/jolt-profiling/src/pprof_guard.rs
@@ -0,0 +1,144 @@
+//! Scoped CPU profiler guard for `pprof` integration.
+//!
+//! Use the [`pprof_scope!`] macro to create a guard that starts a CPU profiler
+//! on creation and writes a `.pb` flamegraph file on drop.
+//!
+//! Requires the `pprof` feature. Without it, the macro expands to `None::<PprofGuard>`.
+//!
+//! ```no_run
+//! use jolt_profiling::pprof_scope;
+//!
+//! let _guard = pprof_scope!("my_function");
+//! // ... profiled code ...
+//! // guard drops here, writing benchmark-runs/pprof/my_function.pb
+//! ```
+//!
+//! View with: `go tool pprof -http=:8080 benchmark-runs/pprof/my_function.pb`
+
+/// Guard that holds a running pprof profiler and writes output on drop.
+#[cfg(feature = "pprof")]
+pub struct PprofGuard {
+    guard: pprof::ProfilerGuard<'static>,
+    label: &'static str,
+}
+
+#[cfg(feature = "pprof")]
+impl PprofGuard {
+    /// Creates a new profiler guard with the given label and sampling frequency.
+    ///
+    /// The label determines the output filename: `{PPROF_PREFIX}{label}.pb`.
+    /// Typically called via the [`pprof_scope!`] macro rather than directly.
+    pub fn new(label: &'static str, frequency: i32) -> Self {
+        Self {
+            guard: pprof::ProfilerGuardBuilder::default()
+                .frequency(frequency)
+                .blocklist(&["libc", "libgcc", "pthread", "vdso"])
+                .build()
+                .expect("Failed to initialize profiler"),
+            label,
+        }
+    }
+}
+
+/// Stub type when `pprof` feature is not enabled.
+#[cfg(not(feature = "pprof"))]
+pub struct PprofGuard;
+
+#[cfg(feature = "pprof")]
+impl Drop for PprofGuard {
+    fn drop(&mut self) {
+        use std::io::Write;
+
+        let Ok(report) = self.guard.report().build() else {
+            tracing::warn!(label = self.label, "failed to build pprof report");
+            return;
+        };
+
+        let prefix = crate::setup::PPROF_PREFIX
+            .get()
+            .map(String::as_str)
+            .unwrap_or("benchmark-runs/pprof/");
+        let filename = format!("{prefix}{}.pb", self.label);
+
+        if let Some(dir) = std::path::Path::new(&filename).parent() {
+            let _ = std::fs::create_dir_all(dir);
+        }
+
+        let Ok(mut f) = std::fs::File::create(&filename) else {
+            tracing::warn!(path = %filename, "failed to create pprof output file");
+            return;
+        };
+
+        if let Ok(p) = report.pprof() {
+            use pprof::protos::Message;
+            let mut buf = Vec::new();
+            if p.encode(&mut buf).is_ok() {
+                if f.write_all(&buf).is_ok() {
+                    tracing::info!(path = %filename, "wrote pprof profile");
+                } else {
+                    tracing::warn!(path = %filename, "failed to write pprof data");
+                }
+            }
+        }
+    }
+}
+
+/// Creates a scoped CPU profiler guard.
+///
+/// With the `pprof` feature enabled, returns `Some(PprofGuard)` that writes a
+/// `.pb` file on drop. Without the feature, returns `None::<PprofGuard>`.
+///
+/// When called without arguments, uses `"default"` as the label.
+///
+/// Configure via environment variables:
+/// - `PPROF_PREFIX` — output directory prefix (default: `"benchmark-runs/pprof/"`)
+/// - `PPROF_FREQ` — sampling frequency in Hz (default: 100)
+#[macro_export]
+macro_rules! pprof_scope {
+    ($label:expr) => {{
+        #[cfg(feature = "pprof")]
+        {
+            Some($crate::PprofGuard::new(
+                $label,
+                std::env::var("PPROF_FREQ")
+                    .unwrap_or_else(|_| "100".to_string())
+                    .parse::<i32>()
+                    .unwrap_or(100),
+            ))
+        }
+        #[cfg(not(feature = "pprof"))]
+        None::<$crate::PprofGuard>
+    }};
+    () => {
+        $crate::pprof_scope!("default")
+    };
+}
+
+#[cfg(test)]
+mod tests {
+    #[test]
+    fn pprof_scope_without_feature_returns_none() {
+        let guard = pprof_scope!("test_label");
+        #[cfg(not(feature = "pprof"))]
+        assert!(guard.is_none());
+        #[cfg(feature = "pprof")]
+        assert!(guard.is_some());
+    }
+
+    #[test]
+    fn pprof_scope_no_arg_variant() {
+        let guard = pprof_scope!();
+        #[cfg(not(feature = "pprof"))]
+        assert!(guard.is_none());
+        #[cfg(feature = "pprof")]
+        assert!(guard.is_some());
+    }
+
+    #[test]
+    fn pprof_guard_stub_exists() {
+        #[cfg(not(feature = "pprof"))]
+        {
+            let _guard = super::PprofGuard;
+        }
+    }
+}
diff --git a/crates/jolt-profiling/src/setup.rs b/crates/jolt-profiling/src/setup.rs
new file mode 100644
index 000000000..6a16c61da
--- /dev/null
+++ b/crates/jolt-profiling/src/setup.rs
@@ -0,0 +1,139 @@
+//! Tracing subscriber configuration for Perfetto and console output.
+//!
+//! Call [`setup_tracing`] once at binary startup. The returned [`TracingGuards`]
+//! must be held alive for the duration of the program — dropping them flushes
+//! and closes trace files.
+
+use std::any::Any;
+use std::sync::OnceLock;
+
+use tracing_chrome::ChromeLayerBuilder;
+use tracing_subscriber::{fmt::format::FmtSpan, prelude::*, EnvFilter};
+
+/// Thread-safe storage for the pprof output prefix.
+///
+/// Initialized once during [`setup_tracing`] and read by [`PprofGuard`](crate::PprofGuard)
+/// on drop. Avoids `std::env::set_var` which is unsound in multi-threaded contexts.
+pub(crate) static PPROF_PREFIX: OnceLock<String> = OnceLock::new();
+
+/// Output format for tracing subscribers.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum TracingFormat {
+    /// Console output with span close events and compact formatting.
+    Default,
+    /// Chrome/Perfetto JSON trace file. View at <https://ui.perfetto.dev/>.
+    Chrome,
+}
+
+/// Opaque container for tracing flush guards.
+///
+/// Must be held alive for the duration of profiling. Dropping this flushes
+/// all pending trace data and stops background monitors.
+#[must_use = "guards must be held alive for the duration of profiling"]
+pub struct TracingGuards(#[allow(dead_code)] Vec<Box<dyn Any>>);
+
+/// Initializes the global tracing subscriber with the requested output formats.
+///
+/// Always installs a minimal log layer that respects `RUST_LOG`. Additional
+/// layers are added based on the `formats` slice.
+///
+/// Returns a [`TracingGuards`] value that **must be kept alive** until the
+/// program exits. Dropping the guards flushes pending trace data.
+///
+/// # Chrome format
+///
+/// Writes to `benchmark-runs/perfetto_traces/{trace_name}.json`.
+/// Open in [Perfetto UI](https://ui.perfetto.dev/) for timeline visualization.
+///
+/// # Panics
+///
+/// Panics if called more than once (the global subscriber can only be set once).
+pub fn setup_tracing(formats: &[TracingFormat], trace_name: &str) -> TracingGuards {
+    PPROF_PREFIX.get_or_init(|| {
+        std::env::var("PPROF_PREFIX")
+            .unwrap_or_else(|_| format!("benchmark-runs/pprof/{trace_name}_"))
+    });
+
+    let mut layers = Vec::new();
+
+    let log_layer = tracing_subscriber::fmt::layer()
+        .compact()
+        .with_target(false)
+        .with_file(false)
+        .with_line_number(false)
+        .with_thread_ids(false)
+        .with_thread_names(false)
+        .with_filter(EnvFilter::from_default_env())
+        .boxed();
+    layers.push(log_layer);
+
+    let mut guards: Vec<Box<dyn Any>> = vec![];
+
+    if formats.contains(&TracingFormat::Default) {
+        let collector_layer = tracing_subscriber::fmt::layer()
+            .with_span_events(FmtSpan::CLOSE)
+            .compact()
+            .with_target(false)
+            .with_file(false)
+            .with_line_number(false)
+            .with_thread_ids(false)
+            .with_thread_names(false)
+            .boxed();
+        layers.push(collector_layer);
+    }
+    if formats.contains(&TracingFormat::Chrome) {
+        let trace_file = format!("benchmark-runs/perfetto_traces/{trace_name}.json");
+        std::fs::create_dir_all("benchmark-runs/perfetto_traces").ok();
+        let (chrome_layer, guard) = ChromeLayerBuilder::new()
+            .include_args(true)
+            .file(trace_file)
+            .build();
+        layers.push(chrome_layer.boxed());
+        guards.push(Box::new(guard));
+        tracing::info!(
+            "Chrome tracing enabled. Output: benchmark-runs/perfetto_traces/{trace_name}.json"
+        );
+    }
+
+    tracing_subscriber::registry().with(layers).init();
+
+    #[cfg(all(not(target_arch = "wasm32"), feature = "monitor"))]
+    guards.push(Box::new({
+        tracing::info!(
+            "Starting MetricsMonitor — run python3 scripts/postprocess_trace.py on the output"
+        );
+        crate::monitor::MetricsMonitor::start(
+            std::env::var("MONITOR_INTERVAL")
+                .unwrap_or_else(|_| "0.1".to_string())
+                .parse::<f64>()
+                .unwrap_or(0.1),
+        )
+    }));
+
+    TracingGuards(guards)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn tracing_format_is_copy() {
+        let fmt = TracingFormat::Chrome;
+        let fmt2 = fmt;
+        assert_eq!(fmt, fmt2);
+    }
+
+    #[test]
+    fn tracing_format_debug() {
+        let fmt = TracingFormat::Default;
+        let s = format!("{fmt:?}");
+        assert_eq!(s, "Default");
+    }
+
+    #[test]
+    fn tracing_format_eq() {
+        assert_eq!(TracingFormat::Chrome, TracingFormat::Chrome);
+        assert_ne!(TracingFormat::Chrome, TracingFormat::Default);
+    }
+}
diff --git a/crates/jolt-profiling/src/units.rs b/crates/jolt-profiling/src/units.rs
new file mode 100644
index 000000000..e8b657a77
--- /dev/null
+++ b/crates/jolt-profiling/src/units.rs
@@ -0,0 +1,55 @@
+//! Memory size unit constants and formatting helpers.
+
+/// Bytes per gibibyte (GiB, binary, 2^30).
+pub const BYTES_PER_GIB: f64 = 1_073_741_824.0;
+
+/// Bytes per mebibyte (MiB, binary, 2^20).
+pub const BYTES_PER_MIB: f64 = 1_048_576.0;
+
+/// Formats a memory size given in GiB to a human-readable string.
+///
+/// Uses GiB for values >= 1.0, otherwise MiB.
+pub fn format_memory_size(gib: f64) -> String {
+    if gib >= 1.0 {
+        format!("{gib:.2} GiB")
+    } else {
+        format!("{:.2} MiB", gib * 1024.0)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn format_large_value_uses_gib() {
+        assert_eq!(format_memory_size(2.5), "2.50 GiB");
+    }
+
+    #[test]
+    fn format_exactly_one_gib() {
+        assert_eq!(format_memory_size(1.0), "1.00 GiB");
+    }
+
+    #[test]
+    fn format_small_value_uses_mib() {
+        assert_eq!(format_memory_size(0.5), "512.00 MiB");
+    }
+
+    #[test]
+    fn format_zero() {
+        assert_eq!(format_memory_size(0.0), "0.00 MiB");
+    }
+
+    #[test]
+    fn format_tiny_value() {
+        let result = format_memory_size(0.001);
+        assert!(result.contains("MiB"));
+    }
+
+    #[test]
+    fn constants_are_correct() {
+        assert_eq!(BYTES_PER_GIB, (1u64 << 30) as f64);
+        assert_eq!(BYTES_PER_MIB, (1u64 << 20) as f64);
+    }
+}
diff --git a/crates/jolt-transcript/Cargo.toml b/crates/jolt-transcript/Cargo.toml
new file mode 100644
index 000000000..be6183d87
--- /dev/null
+++ b/crates/jolt-transcript/Cargo.toml
@@ -0,0 +1,31 @@
+[package]
+name = "jolt-transcript"
+version = "0.1.0"
+authors = ["Jolt Contributors"]
+edition = "2021"
+description = "Fiat-Shamir transcript implementations for Jolt"
+repository = "https://github.com/a16z/jolt"
+license = "MIT OR Apache-2.0"
+keywords = ["SNARK", "cryptography", "proofs", "fiat-shamir"]
+categories = ["cryptography", "no-std"]
+
+[lints]
+workspace = true
+
+[dependencies]
+ark-bn254.workspace = true
+ark-ff.workspace = true
+ark-serialize.workspace = true
+blake2.workspace = true
+digest = "0.10"
+light-poseidon = "0.4"
+sha3.workspace = true
+jolt-field = { path = "../jolt-field" }
+
+[dev-dependencies]
+num-traits = { workspace = true }
+criterion = { workspace = true }
+
+[[bench]]
+name = "transcript_ops"
+harness = false
diff --git a/crates/jolt-transcript/README.md b/crates/jolt-transcript/README.md
new file mode 100644
index 000000000..5e00e655c
--- /dev/null
+++ b/crates/jolt-transcript/README.md
@@ -0,0 +1,36 @@
+# jolt-transcript
+
+Fiat-Shamir transcript implementations for Jolt.
+
+Part of the [Jolt](https://github.com/a16z/jolt) zkVM.
+
+## Overview
+
+This crate provides hash-based Fiat-Shamir transcripts that convert interactive proof protocols into non-interactive ones. The transcript maintains a 256-bit running state, absorbs prover messages via hashing, and squeezes deterministic challenges for the verifier.
+
+Three hash backends are provided. All produce 128-bit challenges (drawn from `u128`) and use a `state || round_counter` domain separation scheme.
+
+## Public API
+
+### Core Traits
+
+- **`Transcript`** -- The main transcript trait. Methods: `new(label)`, `append_bytes(bytes)`, `append(value)`, `challenge()`, `challenge_vector(len)`, `state()`.
+- **`AppendToTranscript`** -- Trait for types that can be absorbed into a transcript.
+
+### Implementations
+
+- **`Blake2bTranscript`** -- Uses Blake2b-256. Default choice for Jolt proofs.
+- **`KeccakTranscript`** -- Uses Keccak-256. EVM-compatible for on-chain verification.
+- **`PoseidonTranscript`** -- Uses Poseidon over BN254. SNARK-friendly for recursive verification.
+
+## Dependency Position
+
+`jolt-transcript` depends on `jolt-field` (for the blanket `AppendToTranscript` impl on `Field` types). It is used by `jolt-crypto`, `jolt-sumcheck`, `jolt-openings`, `jolt-dory`, `jolt-blindfold`, and `jolt-zkvm`.
+
+## Feature Flags
+
+This crate has no feature flags.
+
+## License
+
+MIT OR Apache-2.0
diff --git a/crates/jolt-transcript/REVIEW.md b/crates/jolt-transcript/REVIEW.md
new file mode 100644
index 000000000..dbb281a28
--- /dev/null
+++ b/crates/jolt-transcript/REVIEW.md
@@ -0,0 +1,290 @@
+# jolt-transcript Review
+
+**Crate:** jolt-transcript (Level 1)
+**LOC:** 672
+**Baseline:** 0 clippy warnings, 38 tests passing, 1 fuzz target, 2 benchmarks
+
+## Overview
+
+Fiat-Shamir transcript crate providing the `Transcript` trait and three implementations:
+Blake2b, Keccak, and Poseidon. Used by 14 downstream crates as the Fiat-Shamir backbone.
+
+**Verdict:** Well-structured crate with good test coverage and clean macro-based code reuse.
+Relatively few issues — mostly minor hygiene items.
+
+---
+
+## Findings
+
+### [CD-1.1] PoseidonTranscript not exported from lib.rs
+
+**File:** `src/lib.rs`
+**Severity:** MEDIUM
+**Finding:** `PoseidonTranscript` is defined in `src/poseidon.rs` but the module is not
+declared in `lib.rs`. It's completely dead code — unreachable from outside the crate.
+The `mod poseidon;` line is missing, and `PoseidonTranscript` is not in the `pub use` list.
+
+**Suggested fix:** Either add `mod poseidon; pub use poseidon::PoseidonTranscript;` to
+`lib.rs`, or delete the file if Poseidon support isn't needed yet. If kept, it pulls in
+`ark-bn254`, `ark-ff`, `ark-serialize`, `light-poseidon` — none of which are in
+`Cargo.toml` dependencies, so it would fail to compile if the module were enabled.
+
+**Status:** [x] RESOLVED — Added `mod poseidon` to lib.rs, added ark-bn254/ark-ff/ark-serialize/light-poseidon deps, exported `PoseidonTranscript`, added integration tests (17 tests via macro). Removed unused import in inline tests.
+
+---
+
+### [CD-3.1] `hex` dependency used only for Debug impl
+
+**File:** `Cargo.toml`, `src/impl_transcript.rs:49`
+**Severity:** LOW
+**Finding:** The `hex` crate is a dependency solely for the `Debug` impl's
+`hex::encode(self.state)`. This could use `format!("{:02x}", ...)` or the standard
+`write!` with `{:x}` formatting to avoid the external dep.
+
+**Suggested fix:** Remove `hex` dependency, use inline hex formatting:
+```rust
+fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+    f.debug_struct(stringify!($name))
+        .field("state", &format_args!("{:02x?}", self.state))
+        .field("n_rounds", &self.n_rounds)
+        .finish()
+}
+```
+
+**Status:** [x] RESOLVED — Removed `hex` dep, replaced `hex::encode` with `format_args!("{:02x?}", ...)` in both macro-generated and Poseidon Debug impls.
+
+---
+
+### [CD-3.2] `digest` version not workspace-managed
+
+**File:** `Cargo.toml:16`
+**Severity:** LOW
+**Finding:** `digest = "0.4"` is pinned locally while `blake2` and `sha3` use workspace
+versions. If the workspace has a `digest` entry, this should use `workspace = true` for
+consistency. If not, the version `"0.4"` looks wrong — the `digest` crate used by
+`blake2` 0.10 and `sha3` 0.10 is digest `0.10`, not `0.4`.
+
+**Suggested fix:** Verify the correct digest version and either add to workspace or fix
+the version string.
+
+**Status:** [x] RESOLVED — Was already fixed to `"0.10"` before review started.
+
+---
+
+### [CQ-1.1] Label length check uses magic number 33
+
+**File:** `src/impl_transcript.rs:115`, `src/poseidon.rs:150`
+**Severity:** LOW
+**Finding:** `assert!(label.len() < 33)` — the constant 33 relates to the 32-byte state
+buffer + 1, but isn't named. Also the check is `< 33` which means max length is 32, but
+the error says "less than 33 bytes" which is confusing for the user.
+
+**Suggested fix:** Extract a constant and improve the message:
+```rust
+const MAX_LABEL_LEN: usize = 32;
+assert!(label.len() <= MAX_LABEL_LEN, "label must be at most {MAX_LABEL_LEN} bytes");
+```
+
+**Status:** [x] RESOLVED — Extracted `MAX_LABEL_LEN = 32` constant in transcript.rs. Updated assertion in macro and poseidon.rs to use `<= MAX_LABEL_LEN` with clear message. Updated `should_panic` test expected string.
+
+---
+
+### [CQ-1.2] `n_rounds` can overflow u32
+
+**File:** `src/impl_transcript.rs:95`
+**Severity:** LOW
+**Finding:** `self.n_rounds += 1` can panic on overflow in debug mode or wrap in release.
+For a transcript with >4 billion operations this is unrealistic in practice, but a
+`checked_add` or `wrapping_add` would make the intent explicit.
+
+**Status:** [ ] PASS — unrealistic in practice, not worth the complexity.
+
+---
+
+### [CQ-3.1] Poseidon duplicates macro structure manually
+
+**File:** `src/poseidon.rs`
+**Severity:** MEDIUM
+**Finding:** `PoseidonTranscript` manually reimplements the same struct pattern (state,
+n_rounds, test_state, PhantomData, Debug, Default, update_state, challenge_bytes,
+challenge_bytes32) that the `impl_transcript!` macro provides for Blake2b/Keccak.
+The only difference is the hash function — Poseidon uses `light_poseidon` instead of
+`digest::Digest`.
+
+This is either intentional (Poseidon's API doesn't fit the `Digest` trait) or an oversight.
+Given that the file isn't even compiled (see CD-1.1), this is moot unless Poseidon is enabled.
+
+**Status:** [x] RESOLVED (via CD-1.1 — Poseidon now enabled)
+
+---
+
+### [CQ-4.1] `impl_transcript!` macro vs trait default methods
+
+**File:** `src/impl_transcript.rs`
+**Severity:** LOW
+**Finding:** The macro generates `challenge_bytes`, `challenge_bytes32`, `hasher`, and
+`update_state` as inherent methods on each transcript type. These could be trait default
+methods or a shared inner struct, avoiding macro-generated code duplication in the binary.
+
+However, the macro approach is idiomatic for hash-algorithm-parameterized types and keeps
+the binary slim (monomorphization). The only real downside is that the `#[cfg(test)]`
+state tracking is duplicated in Poseidon. Since there are only 2-3 impls, this is acceptable.
+
+**Status:** [ ] PASS — macro approach is fine for this scale.
+
+---
+
+### [CQ-7.1] `poseidon.rs` has extensive docs but is dead code
+
+**File:** `src/poseidon.rs`
+**Severity:** LOW
+**Finding:** The module has thorough `//!` docs explaining Poseidon's purpose, parameters,
+and domain separation — but it's unreachable dead code. The docs give the impression Poseidon
+is a supported feature.
+
+**Status:** [x] RESOLVED (via CD-1.1 — Poseidon now enabled, docs are live)
+
+---
+
+### [CQ-8.1] No Poseidon tests in the integration test suite
+
+**File:** `tests/`
+**Severity:** LOW
+**Finding:** There's no `tests/poseidon_tests.rs` using the `transcript_tests!` macro.
+The Poseidon module has its own inline tests, but they're not reachable since the module
+isn't compiled. If Poseidon is enabled, it should get the standard test suite.
+
+**Status:** [x] RESOLVED — Added `tests/poseidon_tests.rs` using the `transcript_tests!` macro (17 tests).
+
+---
+
+### [NIT-1.1] `#[allow(unused_imports)]` in keccak_tests.rs
+
+**File:** `tests/keccak_tests.rs:7`
+**Severity:** LOW
+**Finding:** `#[allow(unused_imports)]` on `use num_traits::Zero` — if it's unused, remove
+the import. If it's used via the macro, the allow is masking a real dependency.
+
+**Suggested fix:** Remove both the allow and the import if unused, or remove just the allow
+if used.
+
+**Status:** [x] RESOLVED — Removed `#[allow(unused_imports)]`, kept the `use num_traits::Zero` import which IS used in `test_keccak_known_vector`.
+
+---
+
+### [NIT-4.1] `challenge_bytes` loop uses `> 32` not `>= 32`
+
+**File:** `src/impl_transcript.rs:72`
+**Severity:** LOW
+**Finding:** The loop `while remaining > 32` then handles the final chunk separately.
+When `remaining == 32`, it falls through to the final chunk path which does:
+```rust
+let mut final_chunk = [0u8; 32];
+self.challenge_bytes32(&mut final_chunk);
+out[offset..offset + remaining].copy_from_slice(&final_chunk[..remaining]);
+```
+This works correctly (32 bytes copied from 32-byte chunk), but the condition could be
+`while remaining > BYTES_PER_CHUNK` using the constant from poseidon.rs (or a shared one)
+for clarity. Minor.
+
+**Status:** [ ] PASS — correct as-is.
+
+---
+
+### [CD-2.1] `Transcript::new` takes `&'static [u8]` but could take `&[u8]`
+
+**File:** `src/transcript.rs:31`
+**Severity:** LOW
+**Finding:** The `'static` lifetime on the label is restrictive. Labels are immediately
+hashed into the state and not stored. `&[u8]` would be sufficient and more flexible for
+callers that construct labels at runtime.
+
+However, `&'static [u8]` is a deliberate safety choice — it forces labels to be compile-time
+constants, preventing accidental dynamic labels that could cause protocol-level bugs. This
+is a sound design decision for a cryptographic transcript.
+
+**Status:** [ ] PASS — `'static` is intentional for safety.
+
+---
+
+### [CD-2.2] `AppendToTranscript` blanket impl for slices missing
+
+**File:** `src/blanket.rs`
+**Severity:** LOW
+**Finding:** There's a blanket impl for `F: Field` but no impl for common types like
+`&[u8]`, `u64`, `u32`, `bool`, `Vec<u8>`, `[u8; N]`, etc. The crate-level example shows
+`transcript.append(&42u64)` and `transcript.append(&[1u8, 2, 3, 4])` — these would fail
+unless `u64` and `[u8; 4]` implement `AppendToTranscript`.
+
+Looking at downstream usage, callers use `transcript.append_bytes()` directly for raw bytes
+and `transcript.append(&field_element)` for field elements. So the missing impls don't
+block real usage. But the doc example in `lib.rs` lines 27-28 would fail to compile.
+
+**Suggested fix:** Either add impls for `u64`, `&[u8]`, and `[u8; N]`, or fix the doc
+example to use `append_bytes` for non-field types.
+
+**Status:** [x] RESOLVED — Fixed doc example to use `Fr::from_u64(42)` with `transcript.append(&value)` and `append_bytes` for raw bytes.
+
+---
+
+### [CD-6.1] Downstream usage is clean
+
+**Severity:** PASS
+**Finding:** Examined all 14 downstream crate usages:
+- `Transcript` trait bound used pervasively as `T: Transcript` generic param
+- `AppendToTranscript` used as supertrait on `JoltGroup`, `JoltCommitment`
+- `Blake2bTranscript` used as concrete type in tests/benchmarks
+- No workarounds or redundant re-implementations found
+- The trait surface is exactly what downstream needs — minimal and sufficient
+
+**Status:** [x] PASS
+
+---
+
+### [CD-5.1] Missing `poseidon` dependencies in Cargo.toml
+
+**File:** `Cargo.toml`
+**Severity:** MEDIUM
+**Finding:** `poseidon.rs` imports `ark_bn254`, `ark_ff`, `ark_serialize`, and
+`light_poseidon`, but none of these are listed in `[dependencies]`. The file only compiles
+because the module isn't enabled. If `mod poseidon` is added to `lib.rs`, this will fail.
+
+**Status:** [x] RESOLVED (via CD-1.1 — deps added, module compiles)
+
+---
+
+### [CQ-2.1] `round_bytes` zero-pad wastes 28 bytes
+
+**File:** `src/impl_transcript.rs:60-61`
+**Severity:** LOW
+**Finding:** The hasher domain separation uses `round_bytes: [u8; 32]` where only the
+last 4 bytes are populated with `n_rounds.to_be_bytes()`. The leading 28 zero bytes
+provide no additional domain separation. A `[u8; 4]` would be sufficient and clearer.
+
+However, using 32 bytes aligns with the hash block size and makes the domain separation
+consistent in width with the state. This is a defensible choice.
+
+**Status:** [ ] PASS
+
+---
+
+## Summary
+
+| Severity | Count | Resolved | Pass/WontFix |
+|----------|-------|----------|-------------|
+| HIGH     | 0     | 0        | 0           |
+| MEDIUM   | 3     | 3        | 0           |
+| LOW      | 10    | 7        | 3           |
+| **Total** | **13** | **10** | **3** |
+
+**Final state:** 0 clippy warnings, 65 tests passing (was 38), 1 doc test passing, 1 fuzz target.
+
+### Changes made:
+
+1. **Poseidon enabled** — added `mod poseidon` + `pub use PoseidonTranscript` to lib.rs; added ark-bn254, ark-ff, ark-serialize, light-poseidon deps; added integration test suite (17 tests); fixed unused import in inline tests
+2. **`hex` dep removed** — replaced with `format_args!("{:02x?}", ...)` in Debug impls
+3. **Doc example fixed** — uses `Fr::from_u64()` and `append_bytes` instead of broken `append(&42u64)`
+4. **`MAX_LABEL_LEN` constant** — extracted to `transcript.rs`, used in macro and poseidon.rs
+5. **`digest` version** — was already correct (`"0.10"`)
+6. **keccak_tests.rs** — removed stale `#[allow(unused_imports)]`
+7. **`should_panic` test** — updated expected message to match new assertion
diff --git a/crates/jolt-transcript/benches/transcript_ops.rs b/crates/jolt-transcript/benches/transcript_ops.rs
new file mode 100644
index 000000000..a87609723
--- /dev/null
+++ b/crates/jolt-transcript/benches/transcript_ops.rs
@@ -0,0 +1,68 @@
+#![allow(unused_results)]
+
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
+use jolt_field::Fr;
+use jolt_transcript::{Blake2bTranscript, KeccakTranscript, Transcript};
+
+fn bench_append_bytes(c: &mut Criterion) {
+    let mut group = c.benchmark_group("append_bytes");
+    let data_32 = [0xABu8; 32];
+    let data_256 = [0xCDu8; 256];
+
+    for (label, data) in [("32B", &data_32[..]), ("256B", &data_256[..])] {
+        group.bench_with_input(BenchmarkId::new("Blake2b", label), data, |bench, data| {
+            bench.iter_batched(
+                || Blake2bTranscript::<Fr>::new(b"bench"),
+                |mut t| {
+                    t.append_bytes(black_box(data));
+                    t
+                },
+                criterion::BatchSize::SmallInput,
+            );
+        });
+        group.bench_with_input(BenchmarkId::new("Keccak", label), data, |bench, data| {
+            bench.iter_batched(
+                || KeccakTranscript::<Fr>::new(b"bench"),
+                |mut t| {
+                    t.append_bytes(black_box(data));
+                    t
+                },
+                criterion::BatchSize::SmallInput,
+            );
+        });
+    }
+    group.finish();
+}
+
+fn bench_challenge(c: &mut Criterion) {
+    let mut group = c.benchmark_group("challenge");
+
+    group.bench_function("Blake2b", |bench| {
+        bench.iter_batched(
+            || {
+                let mut t = Blake2bTranscript::<Fr>::new(b"bench");
+                t.append_bytes(&[42u8; 32]);
+                t
+            },
+            |mut t| t.challenge(),
+            criterion::BatchSize::SmallInput,
+        );
+    });
+
+    group.bench_function("Keccak", |bench| {
+        bench.iter_batched(
+            || {
+                let mut t = KeccakTranscript::<Fr>::new(b"bench");
+                t.append_bytes(&[42u8; 32]);
+                t
+            },
+            |mut t| t.challenge(),
+            criterion::BatchSize::SmallInput,
+        );
+    });
+
+    group.finish();
+}
+
+criterion_group!(benches, bench_append_bytes, bench_challenge);
+criterion_main!(benches);
diff --git a/crates/jolt-transcript/fuzz/Cargo.toml b/crates/jolt-transcript/fuzz/Cargo.toml
new file mode 100644
index 000000000..7ee293485
--- /dev/null
+++ b/crates/jolt-transcript/fuzz/Cargo.toml
@@ -0,0 +1,19 @@
+[workspace]
+
+[package]
+name = "jolt-transcript-fuzz"
+version = "0.0.0"
+publish = false
+edition = "2021"
+
+[package.metadata]
+cargo-fuzz = true
+
+[dependencies]
+libfuzzer-sys = "0.4"
+jolt-transcript = { path = ".." }
+
+[[bin]]
+name = "transcript_no_panic"
+path = "fuzz_targets/transcript_no_panic.rs"
+doc = false
diff --git a/crates/jolt-transcript/fuzz/fuzz_targets/transcript_no_panic.rs b/crates/jolt-transcript/fuzz/fuzz_targets/transcript_no_panic.rs
new file mode 100644
index 000000000..0b6d696de
--- /dev/null
+++ b/crates/jolt-transcript/fuzz/fuzz_targets/transcript_no_panic.rs
@@ -0,0 +1,19 @@
+#![no_main]
+use jolt_transcript::{Blake2bTranscript, KeccakTranscript, Transcript};
+use libfuzzer_sys::fuzz_target;
+
+fuzz_target!(|data: &[u8]| {
+    // Blake2b: append arbitrary bytes + squeeze challenge — must never panic
+    let mut blake = Blake2bTranscript::default();
+    blake.append_bytes(data);
+    let _ = blake.challenge();
+    blake.append_bytes(data);
+    let _ = blake.challenge();
+
+    // Keccak: same exercise
+    let mut keccak = KeccakTranscript::default();
+    keccak.append_bytes(data);
+    let _ = keccak.challenge();
+    keccak.append_bytes(data);
+    let _ = keccak.challenge();
+});
diff --git a/crates/jolt-transcript/fuzz/rust-toolchain.toml b/crates/jolt-transcript/fuzz/rust-toolchain.toml
new file mode 100644
index 000000000..5d56faf9a
--- /dev/null
+++ b/crates/jolt-transcript/fuzz/rust-toolchain.toml
@@ -0,0 +1,2 @@
+[toolchain]
+channel = "nightly"
diff --git a/crates/jolt-transcript/src/blake2b.rs b/crates/jolt-transcript/src/blake2b.rs
new file mode 100644
index 000000000..15ea3d28a
--- /dev/null
+++ b/crates/jolt-transcript/src/blake2b.rs
@@ -0,0 +1,11 @@
+//! Blake2b-256 based Fiat-Shamir transcript.
+
+use blake2::digest::consts::U32;
+use blake2::Blake2b;
+use digest::Digest;
+
+use crate::impl_transcript::impl_transcript;
+
+type Blake2b256 = Blake2b<U32>;
+
+impl_transcript!(Blake2bTranscript, Blake2b256, Blake2b256::new());
diff --git a/crates/jolt-transcript/src/blanket.rs b/crates/jolt-transcript/src/blanket.rs
new file mode 100644
index 000000000..423f487df
--- /dev/null
+++ b/crates/jolt-transcript/src/blanket.rs
@@ -0,0 +1,15 @@
+//! Blanket implementation of [`AppendToTranscript`] for field elements.
+
+use jolt_field::Field;
+
+use crate::transcript::{AppendToTranscript, Transcript};
+
+/// Absorbs any field element as big-endian bytes (reversed from the canonical
+/// LE layout) for EVM compatibility.
+impl<F: Field> AppendToTranscript for F {
+    fn append_to_transcript<T: Transcript>(&self, transcript: &mut T) {
+        let mut buf = self.to_bytes();
+        buf.reverse();
+        transcript.append_bytes(&buf);
+    }
+}
diff --git a/crates/jolt-transcript/src/impl_transcript.rs b/crates/jolt-transcript/src/impl_transcript.rs
new file mode 100644
index 000000000..42ed0e047
--- /dev/null
+++ b/crates/jolt-transcript/src/impl_transcript.rs
@@ -0,0 +1,166 @@
+//! Macro for implementing the Transcript trait with different hash functions.
+
+/// Implements the `Transcript` trait for a hash-based transcript.
+///
+/// The generated struct is generic over `F: Field`, producing field-element
+/// challenges directly via `F::from_u128()`.
+macro_rules! impl_transcript {
+    ($name:ident, $hasher:ty, $new_hasher:expr) => {
+        use $crate::transcript::Transcript;
+
+        /// Internal state for test-time transcript comparison.
+        #[cfg(test)]
+        #[derive(Clone, Default)]
+        struct TestState {
+            state_history: Vec<[u8; 32]>,
+            expected_state_history: Option<Vec<[u8; 32]>>,
+        }
+
+        #[doc = concat!("Fiat-Shamir transcript backed by ", stringify!($hasher), ".")]
+        ///
+        /// Generic over the field type `F`. Challenges are produced as field
+        /// elements directly via `F::from_u128()`.
+        #[derive(Clone)]
+        pub struct $name<F: jolt_field::Field = jolt_field::Fr> {
+            /// 256-bit running state.
+            state: [u8; 32],
+            /// Round counter for domain separation.
+            n_rounds: u32,
+            /// Test-only state for transcript comparison.
+            #[cfg(test)]
+            test_state: TestState,
+            _field: std::marker::PhantomData<F>,
+        }
+
+        impl<F: jolt_field::Field> Default for $name<F> {
+            fn default() -> Self {
+                Self {
+                    state: [0u8; 32],
+                    n_rounds: 0,
+                    #[cfg(test)]
+                    test_state: TestState::default(),
+                    _field: std::marker::PhantomData,
+                }
+            }
+        }
+
+        impl<F: jolt_field::Field> std::fmt::Debug for $name<F> {
+            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                f.debug_struct(stringify!($name))
+                    .field("state", &format_args!("{:02x?}", self.state))
+                    .field("n_rounds", &self.n_rounds)
+                    .finish()
+            }
+        }
+
+        impl<F: jolt_field::Field> $name<F> {
+            /// Returns a hasher seeded with `state || round_counter` for domain separation.
+            #[inline]
+            fn hasher(&self) -> $hasher {
+                let mut round_bytes = [0u8; 32];
+                round_bytes[28..].copy_from_slice(&self.n_rounds.to_be_bytes());
+                <$hasher as Digest>::new()
+                    .chain_update(self.state)
+                    .chain_update(round_bytes)
+            }
+
+            /// Fills `out` with challenge bytes, using `ceil(len / 32)` hash invocations.
+            fn challenge_bytes(&mut self, out: &mut [u8]) {
+                let mut remaining = out.len();
+                let mut offset = 0;
+
+                while remaining > 32 {
+                    let mut chunk = [0u8; 32];
+                    self.challenge_bytes32(&mut chunk);
+                    out[offset..offset + 32].copy_from_slice(&chunk);
+                    offset += 32;
+                    remaining -= 32;
+                }
+
+                let mut final_chunk = [0u8; 32];
+                self.challenge_bytes32(&mut final_chunk);
+                out[offset..offset + remaining].copy_from_slice(&final_chunk[..remaining]);
+            }
+
+            /// Squeezes exactly 32 bytes from the transcript state.
+            #[inline]
+            fn challenge_bytes32(&mut self, out: &mut [u8; 32]) {
+                let hash: [u8; 32] = self.hasher().finalize().into();
+                out.copy_from_slice(&hash);
+                self.update_state(hash);
+            }
+
+            fn update_state(&mut self, new_state: [u8; 32]) {
+                self.state = new_state;
+                self.n_rounds += 1;
+
+                #[cfg(test)]
+                {
+                    if let Some(ref expected) = self.test_state.expected_state_history {
+                        assert_eq!(
+                            new_state, expected[self.n_rounds as usize],
+                            "Fiat-Shamir transcript mismatch at round {}",
+                            self.n_rounds
+                        );
+                    }
+                    self.test_state.state_history.push(new_state);
+                }
+            }
+        }
+
+        impl<F: jolt_field::Field> Transcript for $name<F> {
+            type Challenge = F;
+
+            fn new(label: &'static [u8]) -> Self {
+                assert!(
+                    label.len() <= $crate::transcript::MAX_LABEL_LEN,
+                    "label must be at most {} bytes",
+                    $crate::transcript::MAX_LABEL_LEN,
+                );
+
+                let mut padded = [0u8; $crate::transcript::MAX_LABEL_LEN];
+                padded[..label.len()].copy_from_slice(label);
+
+                let hash: [u8; 32] = <$hasher as Digest>::new()
+                    .chain_update(padded)
+                    .finalize()
+                    .into();
+
+                Self {
+                    state: hash,
+                    n_rounds: 0,
+                    #[cfg(test)]
+                    test_state: TestState {
+                        state_history: vec![hash],
+                        expected_state_history: None,
+                    },
+                    _field: std::marker::PhantomData,
+                }
+            }
+
+            fn append_bytes(&mut self, bytes: &[u8]) {
+                let hash: [u8; 32] = self.hasher().chain_update(bytes).finalize().into();
+                self.update_state(hash);
+            }
+
+            fn challenge(&mut self) -> F {
+                let mut buf = [0u8; 16];
+                self.challenge_bytes(&mut buf);
+                F::from_u128(u128::from_le_bytes(buf))
+            }
+
+            #[inline]
+            fn state(&self) -> &[u8; 32] {
+                &self.state
+            }
+
+            #[cfg(test)]
+            fn compare_to(&mut self, other: &Self) {
+                self.test_state.expected_state_history =
+                    Some(other.test_state.state_history.clone());
+            }
+        }
+    };
+}
+
+pub(crate) use impl_transcript;
diff --git a/crates/jolt-transcript/src/keccak.rs b/crates/jolt-transcript/src/keccak.rs
new file mode 100644
index 000000000..35f788943
--- /dev/null
+++ b/crates/jolt-transcript/src/keccak.rs
@@ -0,0 +1,8 @@
+//! Keccak-256 based Fiat-Shamir transcript (Ethereum/EVM compatible).
+
+use digest::Digest;
+use sha3::Keccak256;
+
+use crate::impl_transcript::impl_transcript;
+
+impl_transcript!(KeccakTranscript, Keccak256, Keccak256::new());
diff --git a/crates/jolt-transcript/src/lib.rs b/crates/jolt-transcript/src/lib.rs
new file mode 100644
index 000000000..e4cd13ffc
--- /dev/null
+++ b/crates/jolt-transcript/src/lib.rs
@@ -0,0 +1,53 @@
+//! Fiat-Shamir transcript implementations for Jolt.
+//!
+//! This crate provides the [`Transcript`] trait and implementations for
+//! transforming interactive proofs into non-interactive ones via the
+//! Fiat-Shamir heuristic.
+//!
+//! # Overview
+//!
+//! A Fiat-Shamir transcript absorbs data and produces deterministic challenges.
+//! Both prover and verifier maintain identical transcripts, ensuring they
+//! derive the same challenges.
+//!
+//! # Implementations
+//!
+//! - [`Blake2bTranscript`]: Uses Blake2b-256 hash function
+//! - [`KeccakTranscript`]: Ethereum/EVM-compatible, uses Keccak-256
+//! - [`PoseidonTranscript`]: SNARK-friendly, uses Poseidon over BN254
+//!
+//! # Example
+//!
+//! ```
+//! use jolt_transcript::{Transcript, Blake2bTranscript};
+//! use jolt_field::{Field, Fr};
+//!
+//! let mut transcript = Blake2bTranscript::<Fr>::new(b"my_protocol");
+//!
+//! // Absorb field elements using append (AppendToTranscript)
+//! let value = Fr::from_u64(42);
+//! transcript.append(&value);
+//!
+//! // Absorb raw bytes directly
+//! transcript.append_bytes(b"raw bytes");
+//!
+//! // Squeeze a challenge — returns Fr directly
+//! let challenge: Fr = transcript.challenge();
+//! ```
+
+#![deny(missing_docs)]
+#![deny(clippy::all)]
+#![warn(clippy::pedantic)]
+#![allow(clippy::module_name_repetitions)]
+
+mod blake2b;
+mod blanket;
+mod impl_transcript;
+mod keccak;
+mod poseidon;
+mod transcript;
+
+pub use blake2b::Blake2bTranscript;
+pub use keccak::KeccakTranscript;
+pub use poseidon::PoseidonTranscript;
+pub use transcript::{AppendToTranscript, Transcript};
diff --git a/crates/jolt-transcript/src/poseidon.rs b/crates/jolt-transcript/src/poseidon.rs
new file mode 100644
index 000000000..39be07253
--- /dev/null
+++ b/crates/jolt-transcript/src/poseidon.rs
@@ -0,0 +1,349 @@
+//! Poseidon-based Fiat-Shamir transcript for SNARK-friendly verification.
+//!
+//! Uses width-3 Poseidon (3 field element inputs) over BN254 Fr with
+//! circom-compatible parameters via [`light_poseidon`]. Each hash operation:
+//! `state = poseidon(state, n_rounds, data)`.
+//!
+//! # Why Poseidon?
+//!
+//! Poseidon is ~600x cheaper in-circuit than Keccak (~250 constraints vs
+//! ~150,000). When the Jolt verifier runs inside a Groth16/gnark circuit,
+//! all Fiat-Shamir challenges must be recomputed — using a SNARK-friendly
+//! hash makes this feasible.
+//!
+//! # Parameters
+//!
+//! - **Width**: 3 field elements (state, round counter, data)
+//! - **Curve**: BN254 scalar field (Fr)
+//! - **Constants**: circom-compatible (`light_poseidon::new_circom`)
+//! - **Rounds**: 8 full + 56 partial, x^5 S-box
+//!
+//! # Domain separation
+//!
+//! Each `append_bytes` call includes an `n_rounds` counter in the hash input
+//! for domain separation. Multi-chunk appends chain: first chunk includes
+//! `n_rounds`, remaining chunks chain as `poseidon(prev, 0, chunk_i)`.
+
+use ark_bn254::Fr;
+use ark_ff::{PrimeField, Zero};
+use ark_serialize::CanonicalSerialize;
+use light_poseidon::{Poseidon, PoseidonHasher};
+
+use crate::transcript::Transcript;
+
+/// Poseidon hash width: 3 field elements.
+const WIDTH: usize = 3;
+
+/// Bytes per BN254 Fr field element.
+const BYTES_PER_CHUNK: usize = 32;
+
+/// Fiat-Shamir transcript using Poseidon hash over BN254.
+///
+/// Generic over the field type `F`. Challenges are produced as field
+/// elements directly via `F::from_u128()`.
+#[derive(Clone)]
+pub struct PoseidonTranscript<F: jolt_field::Field = jolt_field::Fr> {
+    /// 256-bit running state (canonical LE serialization of Fr).
+    state: [u8; 32],
+    /// Round counter for domain separation.
+    n_rounds: u32,
+    /// Test-only state history for transcript comparison.
+    #[cfg(test)]
+    state_history: Vec<[u8; 32]>,
+    #[cfg(test)]
+    expected_state_history: Option<Vec<[u8; 32]>>,
+    _field: std::marker::PhantomData<F>,
+}
+
+impl<F: jolt_field::Field> Default for PoseidonTranscript<F> {
+    fn default() -> Self {
+        Self {
+            state: [0u8; 32],
+            n_rounds: 0,
+            #[cfg(test)]
+            state_history: Vec::new(),
+            #[cfg(test)]
+            expected_state_history: None,
+            _field: std::marker::PhantomData,
+        }
+    }
+}
+
+impl<F: jolt_field::Field> std::fmt::Debug for PoseidonTranscript<F> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("PoseidonTranscript")
+            .field("state", &format_args!("{:02x?}", self.state))
+            .field("n_rounds", &self.n_rounds)
+            .finish_non_exhaustive()
+    }
+}
+
+impl<F: jolt_field::Field> PoseidonTranscript<F> {
+    fn hasher() -> Poseidon<Fr> {
+        Poseidon::<Fr>::new_circom(WIDTH).expect("failed to initialize Poseidon")
+    }
+
+    /// Squeeze exactly 32 challenge bytes: `poseidon(state, n_rounds, 0)`.
+    fn challenge_bytes32(&mut self, out: &mut [u8; 32]) {
+        let mut poseidon = Self::hasher();
+        let state_f = Fr::from_le_bytes_mod_order(&self.state);
+        let round_f = Fr::from(u64::from(self.n_rounds));
+        let zero = Fr::zero();
+
+        let output = poseidon
+            .hash(&[state_f, round_f, zero])
+            .expect("Poseidon hash failed");
+
+        output
+            .serialize_uncompressed(&mut out[..])
+            .expect("Fr serialization failed");
+
+        self.update_state(*out);
+    }
+
+    /// Fill `out` with challenge bytes using ceil(len / 32) hash invocations.
+    fn challenge_bytes(&mut self, out: &mut [u8]) {
+        let mut remaining = out.len();
+        let mut offset = 0;
+
+        while remaining > BYTES_PER_CHUNK {
+            let mut chunk = [0u8; 32];
+            self.challenge_bytes32(&mut chunk);
+            out[offset..offset + BYTES_PER_CHUNK].copy_from_slice(&chunk);
+            offset += BYTES_PER_CHUNK;
+            remaining -= BYTES_PER_CHUNK;
+        }
+
+        let mut final_chunk = [0u8; 32];
+        self.challenge_bytes32(&mut final_chunk);
+        out[offset..offset + remaining].copy_from_slice(&final_chunk[..remaining]);
+    }
+
+    fn update_state(&mut self, new_state: [u8; 32]) {
+        self.state = new_state;
+        self.n_rounds += 1;
+
+        #[cfg(test)]
+        {
+            if let Some(ref expected) = self.expected_state_history {
+                assert!(
+                    (self.n_rounds as usize) < expected.len(),
+                    "Fiat-Shamir transcript: n_rounds {} exceeds expected history length {}",
+                    self.n_rounds,
+                    expected.len()
+                );
+                assert_eq!(
+                    new_state, expected[self.n_rounds as usize],
+                    "Fiat-Shamir transcript mismatch at round {}",
+                    self.n_rounds
+                );
+            }
+            self.state_history.push(new_state);
+        }
+    }
+}
+
+impl<F: jolt_field::Field> Transcript for PoseidonTranscript<F> {
+    type Challenge = F;
+
+    fn new(label: &'static [u8]) -> Self {
+        use crate::transcript::MAX_LABEL_LEN;
+        assert!(
+            label.len() <= MAX_LABEL_LEN,
+            "label must be at most {MAX_LABEL_LEN} bytes",
+        );
+
+        let mut poseidon = Self::hasher();
+        let label_f = Fr::from_le_bytes_mod_order(label);
+        let zero = Fr::zero();
+
+        let initial = poseidon
+            .hash(&[label_f, zero, zero])
+            .expect("Poseidon hash failed");
+
+        let mut state = [0u8; 32];
+        initial
+            .serialize_uncompressed(&mut state[..])
+            .expect("Fr serialization failed");
+
+        Self {
+            state,
+            n_rounds: 0,
+            #[cfg(test)]
+            state_history: vec![state],
+            #[cfg(test)]
+            expected_state_history: None,
+            _field: std::marker::PhantomData,
+        }
+    }
+
+    fn append_bytes(&mut self, bytes: &[u8]) {
+        let mut poseidon = Self::hasher();
+        let state_f = Fr::from_le_bytes_mod_order(&self.state);
+        let round_f = Fr::from(u64::from(self.n_rounds));
+        let zero = Fr::zero();
+
+        let mut chunks = bytes.chunks(BYTES_PER_CHUNK);
+
+        let first_f = chunks.next().map_or(zero, Fr::from_le_bytes_mod_order);
+
+        let mut current = poseidon
+            .hash(&[state_f, round_f, first_f])
+            .expect("Poseidon hash failed");
+
+        for chunk in chunks {
+            let chunk_f = Fr::from_le_bytes_mod_order(chunk);
+            current = poseidon
+                .hash(&[current, zero, chunk_f])
+                .expect("Poseidon hash failed");
+        }
+
+        let mut new_state = [0u8; 32];
+        current
+            .serialize_uncompressed(&mut new_state[..])
+            .expect("Fr serialization failed");
+
+        self.update_state(new_state);
+    }
+
+    fn challenge(&mut self) -> F {
+        let mut buf = [0u8; 16];
+        self.challenge_bytes(&mut buf);
+        F::from_u128(u128::from_le_bytes(buf))
+    }
+
+    #[inline]
+    fn state(&self) -> &[u8; 32] {
+        &self.state
+    }
+
+    #[cfg(test)]
+    fn compare_to(&mut self, other: &Self) {
+        self.expected_state_history = Some(other.state_history.clone());
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    type Poseidon = PoseidonTranscript<jolt_field::Fr>;
+
+    #[test]
+    fn new_initializes_from_label() {
+        let t1 = Poseidon::new(b"protocol_a");
+        let t2 = Poseidon::new(b"protocol_b");
+
+        assert_ne!(t1.state, t2.state);
+        assert_eq!(t1.n_rounds, 0);
+    }
+
+    #[test]
+    fn same_label_same_state() {
+        let t1 = Poseidon::new(b"same");
+        let t2 = Poseidon::new(b"same");
+
+        assert_eq!(t1.state, t2.state);
+    }
+
+    #[test]
+    fn append_changes_state() {
+        let mut t = Poseidon::new(b"test");
+        let before = t.state;
+
+        t.append_bytes(b"hello");
+        assert_ne!(t.state, before);
+        assert_eq!(t.n_rounds, 1);
+    }
+
+    #[test]
+    fn append_order_matters() {
+        let mut t1 = Poseidon::new(b"test");
+        let mut t2 = Poseidon::new(b"test");
+
+        t1.append_bytes(b"a");
+        t1.append_bytes(b"b");
+
+        t2.append_bytes(b"b");
+        t2.append_bytes(b"a");
+
+        assert_ne!(t1.state, t2.state);
+    }
+
+    #[test]
+    fn challenge_advances_state() {
+        let mut t = Poseidon::new(b"test");
+        t.append_bytes(b"data");
+        let before = t.state;
+
+        let _ = t.challenge();
+        assert_ne!(t.state, before);
+    }
+
+    #[test]
+    fn deterministic_challenges() {
+        let mut t1 = Poseidon::new(b"test");
+        let mut t2 = Poseidon::new(b"test");
+
+        t1.append_bytes(b"same_data");
+        t2.append_bytes(b"same_data");
+
+        assert_eq!(t1.challenge(), t2.challenge());
+    }
+
+    #[test]
+    fn multi_chunk_append() {
+        let mut t = Poseidon::new(b"test");
+
+        let data = [0xABu8; 64];
+        t.append_bytes(&data);
+
+        assert_eq!(t.n_rounds, 1);
+    }
+
+    #[test]
+    fn challenge_vector_produces_distinct() {
+        let mut t = Poseidon::new(b"test");
+        t.append_bytes(b"seed");
+
+        let challenges: Vec<jolt_field::Fr> = t.challenge_vector(5);
+        for i in 0..5 {
+            for j in (i + 1)..5 {
+                assert_ne!(challenges[i], challenges[j]);
+            }
+        }
+    }
+
+    #[test]
+    fn clone_independence() {
+        let mut t = Poseidon::new(b"test");
+        t.append_bytes(b"shared");
+
+        let mut fork = t.clone();
+        t.append_bytes(b"branch_a");
+        fork.append_bytes(b"branch_b");
+
+        assert_ne!(t.state, fork.state);
+    }
+
+    #[test]
+    fn hash_zeros_produces_known_output() {
+        let mut poseidon = PoseidonTranscript::<jolt_field::Fr>::hasher();
+        let result = poseidon
+            .hash(&[Fr::zero(), Fr::zero(), Fr::zero()])
+            .expect("hash failed");
+        assert_ne!(result, Fr::zero(), "hash(0,0,0) should not be zero");
+    }
+
+    #[test]
+    fn transcript_comparison() {
+        let mut prover = Poseidon::new(b"test");
+        prover.append_bytes(b"data");
+        let _ = prover.challenge();
+
+        let mut verifier = Poseidon::new(b"test");
+        verifier.compare_to(&prover);
+        verifier.append_bytes(b"data");
+        let _ = verifier.challenge();
+    }
+}
diff --git a/crates/jolt-transcript/src/transcript.rs b/crates/jolt-transcript/src/transcript.rs
new file mode 100644
index 000000000..5b581f831
--- /dev/null
+++ b/crates/jolt-transcript/src/transcript.rs
@@ -0,0 +1,81 @@
+//! Core traits for Fiat-Shamir transcript transformation.
+//!
+//! This module provides the [`Transcript`] trait for building Fiat-Shamir transcripts
+//! and the [`AppendToTranscript`] trait for types that can be absorbed into a transcript.
+
+/// Fiat-Shamir transcript for non-interactive proofs.
+///
+/// A transcript absorbs data and produces deterministic challenges. Both prover
+/// and verifier maintain identical transcripts to derive the same challenges,
+/// transforming an interactive proof into a non-interactive one.
+///
+/// Hash-based transcripts (`Blake2bTranscript`, `KeccakTranscript`) are generic
+/// over `F: Field` and produce field-element challenges directly.
+///
+/// # Security
+///
+/// Domain separation is provided via the label in [`new`](Transcript::new).
+/// Use unique labels per protocol to prevent cross-protocol attacks.
+pub trait Transcript: Default + Clone + Sync + Send + 'static {
+    /// The challenge type produced by this transcript.
+    ///
+    /// For hash-based transcripts this is `F` (the field type), so challenges
+    /// can be used directly in polynomial operations without conversion.
+    type Challenge: Copy + Default;
+
+    /// Creates a new transcript with the given domain separation label.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `label.len() >= 33`.
+    fn new(label: &'static [u8]) -> Self;
+
+    /// Absorbs raw bytes into the transcript.
+    ///
+    /// Prefer [`append`](Transcript::append)
+    /// for a type-safe/ergonomic absorption of data.
+    fn append_bytes(&mut self, bytes: &[u8]);
+
+    /// Absorbs a value into the transcript.
+    ///
+    /// This is the primary method for adding data to the transcript. Any type
+    /// implementing [`AppendToTranscript`] can be absorbed.
+    fn append<A: AppendToTranscript>(&mut self, value: &A) {
+        value.append_to_transcript(self);
+    }
+
+    /// Squeezes a challenge from the transcript.
+    ///
+    /// Each call produces a new challenge and advances the transcript state.
+    #[must_use]
+    fn challenge(&mut self) -> Self::Challenge;
+
+    /// Squeezes multiple challenges from the transcript.
+    #[must_use]
+    fn challenge_vector(&mut self, len: usize) -> Vec<Self::Challenge> {
+        (0..len).map(|_| self.challenge()).collect()
+    }
+
+    /// Returns the current 256-bit transcript state.
+    ///
+    /// Useful for debugging and testing transcript synchronization.
+    #[must_use]
+    fn state(&self) -> &[u8; 32];
+
+    /// Enables transcript comparison for testing.
+    ///
+    /// After calling this, the transcript will panic if its state ever diverges
+    /// from the expected state history recorded in `other`.
+    #[cfg(test)]
+    fn compare_to(&mut self, other: &Self);
+}
+
+/// Maximum label length in bytes. Labels are padded to this size before hashing.
+pub const MAX_LABEL_LEN: usize = 32;
+
+/// Implement this trait to define how your type serializes into transcript bytes.
+/// This keeps the [`Transcript`] trait decoupled from specific serialization formats.
+pub trait AppendToTranscript {
+    /// Absorbs this value into the transcript.
+    fn append_to_transcript<T: Transcript>(&self, transcript: &mut T);
+}
diff --git a/crates/jolt-transcript/tests/blake2b_tests.rs b/crates/jolt-transcript/tests/blake2b_tests.rs
new file mode 100644
index 000000000..3dba55874
--- /dev/null
+++ b/crates/jolt-transcript/tests/blake2b_tests.rs
@@ -0,0 +1,83 @@
+//! Tests for Blake2bTranscript implementation.
+
+mod common;
+
+use jolt_field::Fr;
+use jolt_transcript::Blake2bTranscript;
+use num_traits::Zero;
+
+type B2b = Blake2bTranscript<Fr>;
+
+transcript_tests!(B2b);
+
+#[test]
+fn test_blake2b_known_vector() {
+    use jolt_transcript::Transcript;
+
+    let mut transcript = Blake2bTranscript::<Fr>::new(b"Jolt");
+    transcript.append_bytes(&12345u64.to_be_bytes());
+
+    let challenge: Fr = transcript.challenge();
+
+    assert!(!challenge.is_zero());
+
+    let mut transcript2 = Blake2bTranscript::<Fr>::new(b"Jolt");
+    transcript2.append_bytes(&12345u64.to_be_bytes());
+    assert_eq!(challenge, transcript2.challenge());
+}
+
+#[test]
+fn test_blake2b_state_accessor() {
+    use jolt_transcript::Transcript;
+
+    let transcript = Blake2bTranscript::<Fr>::new(b"test");
+    let state = transcript.state();
+
+    assert_eq!(state.len(), 32);
+
+    assert!(!state.iter().all(|&b| b == 0));
+}
+
+#[test]
+fn test_field_zero_one_distinct_states() {
+    use jolt_field::Fr;
+    use jolt_transcript::{AppendToTranscript, Transcript};
+    use num_traits::{One, Zero};
+
+    let mut t_zero = Blake2bTranscript::<Fr>::new(b"field_test");
+    Fr::zero().append_to_transcript(&mut t_zero);
+    let c_zero: Fr = t_zero.challenge();
+
+    let mut t_one = Blake2bTranscript::<Fr>::new(b"field_test");
+    Fr::one().append_to_transcript(&mut t_one);
+    let c_one: Fr = t_one.challenge();
+
+    assert_ne!(
+        c_zero, c_one,
+        "Fr::zero() and Fr::one() must produce distinct transcript states"
+    );
+}
+
+#[test]
+fn test_field_element_ordering_sensitivity() {
+    use jolt_field::{Field, Fr};
+    use jolt_transcript::{AppendToTranscript, Transcript};
+
+    let a = Fr::from_u64(42);
+    let b = Fr::from_u64(99);
+
+    let mut t1 = Blake2bTranscript::<Fr>::new(b"order_test");
+    a.append_to_transcript(&mut t1);
+    b.append_to_transcript(&mut t1);
+    let c1: Fr = t1.challenge();
+
+    let mut t2 = Blake2bTranscript::<Fr>::new(b"order_test");
+    b.append_to_transcript(&mut t2);
+    a.append_to_transcript(&mut t2);
+    let c2: Fr = t2.challenge();
+
+    assert_ne!(
+        c1, c2,
+        "append(a, b) and append(b, a) must produce different challenges"
+    );
+}
diff --git a/crates/jolt-transcript/tests/common/mod.rs b/crates/jolt-transcript/tests/common/mod.rs
new file mode 100644
index 000000000..08942ffc8
--- /dev/null
+++ b/crates/jolt-transcript/tests/common/mod.rs
@@ -0,0 +1,260 @@
+//! Common test utilities and standardized test suite for transcript implementations.
+
+/// Standardized test suite macro for any `Transcript` implementation.
+///
+/// This macro generates a comprehensive test suite that verifies the core
+/// properties required of any Fiat-Shamir transcript implementation:
+///
+/// - Determinism: Same inputs produce same outputs
+/// - Domain separation: Different labels produce different transcripts
+/// - Challenge uniqueness: Sequential challenges are unique
+/// - State mutation: Appending data changes the state
+/// - Prover/verifier consistency: Both sides derive identical challenges
+#[macro_export]
+macro_rules! transcript_tests {
+    ($transcript_type:ty) => {
+        use jolt_transcript::Transcript;
+        use std::collections::HashSet;
+
+        #[test]
+        fn test_determinism() {
+            let mut t1 = <$transcript_type>::new(b"determinism_test");
+            let mut t2 = <$transcript_type>::new(b"determinism_test");
+
+            t1.append_bytes(&42u64.to_be_bytes());
+            t2.append_bytes(&42u64.to_be_bytes());
+            assert_eq!(
+                t1.state(),
+                t2.state(),
+                "States should match after identical operations"
+            );
+
+            t1.append_bytes(b"hello world");
+            t2.append_bytes(b"hello world");
+            assert_eq!(t1.state(), t2.state());
+
+            assert_eq!(
+                t1.challenge(),
+                t2.challenge(),
+                "Challenges should be identical for identical transcripts"
+            );
+        }
+
+        #[test]
+        fn test_domain_separation() {
+            let mut t1 = <$transcript_type>::new(b"protocol_a");
+            let mut t2 = <$transcript_type>::new(b"protocol_b");
+
+            assert_ne!(
+                t1.state(),
+                t2.state(),
+                "Different labels should produce different initial states"
+            );
+
+            assert_ne!(
+                t1.challenge(),
+                t2.challenge(),
+                "Different labels should produce different challenges"
+            );
+        }
+
+        #[test]
+        fn test_challenge_uniqueness() {
+            let mut transcript = <$transcript_type>::new(b"uniqueness_test");
+            let mut challenges = HashSet::new();
+
+            for i in 0..10_000 {
+                let c = transcript.challenge();
+                assert!(
+                    challenges.insert(c),
+                    "Duplicate challenge found at iteration {i}"
+                );
+            }
+        }
+
+        #[test]
+        fn test_append_changes_state() {
+            let mut transcript = <$transcript_type>::new(b"mutation_test");
+            let initial_state = *transcript.state();
+
+            transcript.append_bytes(&1u64.to_be_bytes());
+            assert_ne!(
+                *transcript.state(),
+                initial_state,
+                "append should change state"
+            );
+
+            let state_after_append = *transcript.state();
+            transcript.append_bytes(b"test");
+            assert_ne!(
+                *transcript.state(),
+                state_after_append,
+                "append_bytes should change state"
+            );
+        }
+
+        #[test]
+        fn test_challenge_changes_state() {
+            let mut transcript = <$transcript_type>::new(b"challenge_mutation");
+            let initial_state = *transcript.state();
+
+            let _ = transcript.challenge();
+            assert_ne!(
+                *transcript.state(),
+                initial_state,
+                "challenge should change state"
+            );
+        }
+
+        #[test]
+        fn test_order_matters() {
+            let mut t1 = <$transcript_type>::new(b"order_test");
+            let mut t2 = <$transcript_type>::new(b"order_test");
+
+            t1.append_bytes(&1u64.to_be_bytes());
+            t1.append_bytes(&2u64.to_be_bytes());
+
+            t2.append_bytes(&2u64.to_be_bytes());
+            t2.append_bytes(&1u64.to_be_bytes());
+
+            assert_ne!(
+                t1.state(),
+                t2.state(),
+                "Order of operations should affect state"
+            );
+        }
+
+        #[test]
+        fn test_data_sensitivity() {
+            let mut t1 = <$transcript_type>::new(b"data_test");
+            let mut t2 = <$transcript_type>::new(b"data_test");
+
+            t1.append_bytes(&0u64.to_be_bytes());
+            t2.append_bytes(&1u64.to_be_bytes());
+
+            assert_ne!(
+                t1.state(),
+                t2.state(),
+                "Different data should produce different states"
+            );
+        }
+
+        #[test]
+        fn test_empty_bytes() {
+            let mut t1 = <$transcript_type>::new(b"empty_test");
+            let mut t2 = <$transcript_type>::new(b"empty_test");
+            let initial_state = *t1.state();
+
+            t1.append_bytes(&[]);
+            assert_ne!(
+                *t1.state(),
+                initial_state,
+                "Empty bytes should change state"
+            );
+
+            t2.append_bytes(&[]);
+            assert_eq!(t1.state(), t2.state());
+        }
+
+        #[test]
+        fn test_large_data() {
+            let mut transcript = <$transcript_type>::new(b"large_data_test");
+            let large_data = vec![0xABu8; 10_000];
+
+            transcript.append_bytes(&large_data);
+            let _ = transcript.challenge();
+        }
+
+        #[test]
+        fn test_prover_verifier_consistency() {
+            let mut prover = <$transcript_type>::new(b"protocol");
+            prover.append_bytes(&42u64.to_be_bytes());
+            prover.append_bytes(b"commitment");
+            let prover_challenge = prover.challenge();
+
+            let mut verifier = <$transcript_type>::new(b"protocol");
+            verifier.append_bytes(&42u64.to_be_bytes());
+            verifier.append_bytes(b"commitment");
+            let verifier_challenge = verifier.challenge();
+
+            assert_eq!(
+                prover_challenge, verifier_challenge,
+                "Prover and verifier should derive identical challenges"
+            );
+        }
+
+        #[test]
+        fn test_clone_independence() {
+            let mut original = <$transcript_type>::new(b"clone_test");
+            original.append_bytes(&1u64.to_be_bytes());
+
+            let mut cloned = original.clone();
+
+            cloned.append_bytes(&2u64.to_be_bytes());
+
+            let original_challenge = original.challenge();
+
+            let mut fresh = <$transcript_type>::new(b"clone_test");
+            fresh.append_bytes(&1u64.to_be_bytes());
+            fresh.append_bytes(&2u64.to_be_bytes());
+            let fresh_challenge = fresh.challenge();
+
+            assert_ne!(
+                original_challenge, fresh_challenge,
+                "Clone mutation should not affect original"
+            );
+        }
+
+        #[test]
+        fn test_debug_impl() {
+            let transcript = <$transcript_type>::new(b"debug_test");
+            let debug_str = format!("{:?}", transcript);
+
+            assert!(
+                debug_str.contains("state"),
+                "Debug output should contain state"
+            );
+            assert!(
+                debug_str.contains("n_rounds"),
+                "Debug output should contain n_rounds"
+            );
+        }
+
+        #[test]
+        fn test_default_vs_new() {
+            let default_transcript = <$transcript_type>::default();
+            let new_transcript = <$transcript_type>::new(b"");
+
+            assert_ne!(
+                default_transcript.state(),
+                new_transcript.state(),
+                "Default and new with empty label should differ"
+            );
+        }
+
+        #[test]
+        #[should_panic(expected = "label must be at most 32 bytes")]
+        fn test_label_too_long() {
+            let long_label: &[u8; 33] = &[b'x'; 33];
+            let _ = <$transcript_type>::new(long_label);
+        }
+
+        #[test]
+        fn test_max_valid_label() {
+            let max_label: &[u8; 32] = &[b'L'; 32];
+            let transcript = <$transcript_type>::new(max_label);
+            assert!(!transcript.state().iter().all(|&b| b == 0));
+        }
+
+        #[test]
+        fn test_challenge_vector() {
+            let mut transcript = <$transcript_type>::new(b"vector_test");
+            let challenges = transcript.challenge_vector(5);
+
+            assert_eq!(challenges.len(), 5);
+
+            let unique: HashSet<_> = challenges.iter().collect();
+            assert_eq!(unique.len(), 5, "All challenges in vector should be unique");
+        }
+    };
+}
diff --git a/crates/jolt-transcript/tests/keccak_tests.rs b/crates/jolt-transcript/tests/keccak_tests.rs
new file mode 100644
index 000000000..9058f8da6
--- /dev/null
+++ b/crates/jolt-transcript/tests/keccak_tests.rs
@@ -0,0 +1,39 @@
+//! Tests for KeccakTranscript implementation.
+
+mod common;
+
+use jolt_field::Fr;
+use jolt_transcript::KeccakTranscript;
+use num_traits::Zero;
+
+type Kec = KeccakTranscript<Fr>;
+
+transcript_tests!(Kec);
+
+#[test]
+fn test_keccak_known_vector() {
+    use jolt_transcript::Transcript;
+
+    let mut transcript = KeccakTranscript::<Fr>::new(b"Jolt");
+    transcript.append_bytes(&12345u64.to_be_bytes());
+
+    let challenge: Fr = transcript.challenge();
+
+    assert!(!challenge.is_zero());
+
+    let mut transcript2 = KeccakTranscript::<Fr>::new(b"Jolt");
+    transcript2.append_bytes(&12345u64.to_be_bytes());
+    assert_eq!(challenge, transcript2.challenge());
+}
+
+#[test]
+fn test_keccak_state_accessor() {
+    use jolt_transcript::Transcript;
+
+    let transcript = KeccakTranscript::<Fr>::new(b"test");
+    let state = transcript.state();
+
+    assert_eq!(state.len(), 32);
+
+    assert!(!state.iter().all(|&b| b == 0));
+}
diff --git a/crates/jolt-transcript/tests/poseidon_tests.rs b/crates/jolt-transcript/tests/poseidon_tests.rs
new file mode 100644
index 000000000..f147be2d2
--- /dev/null
+++ b/crates/jolt-transcript/tests/poseidon_tests.rs
@@ -0,0 +1,10 @@
+//! Tests for PoseidonTranscript implementation.
+
+mod common;
+
+use jolt_field::Fr;
+use jolt_transcript::PoseidonTranscript;
+
+type Pos = PoseidonTranscript<Fr>;
+
+transcript_tests!(Pos);