From d946f23333572e173e61e396ed69dc4e45497091 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Tue, 31 Mar 2026 18:38:02 -0400 Subject: [PATCH 01/86] feat: add jolt-eval crate for invariant checking and objective measurement Introduces a framework for mechanically verifiable invariants and measurable objectives, enabling automated testing, fuzzing, AI red-teaming, and performance optimization of the Jolt zkVM. Invariants (binary pass/fail): - Soundness: mutated proofs must be rejected by the verifier - Verifier completeness: honest proofs must be accepted - Prover completeness: prover must not panic on valid inputs - Determinism: same inputs produce byte-identical proofs - Serialization roundtrip: serialize/deserialize preserves proof bytes - ZK consistency: prove+verify succeeds in current compilation mode Objectives (scalar measurements): - Peak RSS, prover time, proof size, verifier time - Guest cycle count, inline instruction lengths, wrapping cost - auto_optimize harness for AI-driven optimization loops Synthesis infrastructure: - Test generation from seed corpus + random Arbitrary inputs - Fuzz target code generation for libfuzzer_sys - Red-team harness with git worktree isolation - #[invariant(targets = [Test, Fuzz, RedTeam])] proc macro CLI binaries: check-invariants, measure-objectives, redteam Co-Authored-By: Claude Opus 4.6 (1M context) --- Cargo.lock | 75 ++++++++ Cargo.toml | 2 + jolt-eval/Cargo.toml | 40 ++++ jolt-eval/bin/check_invariants.rs | 146 +++++++++++++++ jolt-eval/bin/measure_objectives.rs | 149 +++++++++++++++ jolt-eval/bin/redteam.rs | 118 ++++++++++++ jolt-eval/macros/Cargo.toml | 12 ++ jolt-eval/macros/src/lib.rs | 175 +++++++++++++++++ .../src/invariant/completeness_prover.rs | 100 ++++++++++ .../src/invariant/completeness_verifier.rs | 88 +++++++++ jolt-eval/src/invariant/determinism.rs | 108 +++++++++++ jolt-eval/src/invariant/mod.rs | 175 +++++++++++++++++ .../src/invariant/serialization_roundtrip.rs | 92 +++++++++ jolt-eval/src/invariant/soundness.rs | 126 +++++++++++++ jolt-eval/src/invariant/synthesis/fuzz.rs | 43 +++++ jolt-eval/src/invariant/synthesis/mod.rs | 41 ++++ jolt-eval/src/invariant/synthesis/redteam.rs | 129 +++++++++++++ jolt-eval/src/invariant/synthesis/test.rs | 62 ++++++ jolt-eval/src/invariant/zk_consistency.rs | 100 ++++++++++ jolt-eval/src/lib.rs | 170 +++++++++++++++++ jolt-eval/src/objective/guest_cycles.rs | 36 ++++ jolt-eval/src/objective/inline_lengths.rs | 47 +++++ jolt-eval/src/objective/mod.rs | 120 ++++++++++++ jolt-eval/src/objective/optimize.rs | 70 +++++++ jolt-eval/src/objective/peak_rss.rs | 67 +++++++ jolt-eval/src/objective/proof_size.rs | 47 +++++ jolt-eval/src/objective/prover_time.rs | 52 ++++++ jolt-eval/src/objective/verifier_time.rs | 58 ++++++ jolt-eval/src/objective/wrapping_cost.rs | 43 +++++ jolt-eval/tests/integration.rs | 140 ++++++++++++++ jolt-eval/tests/macro_test.rs | 176 ++++++++++++++++++ 31 files changed, 2807 insertions(+) create mode 100644 jolt-eval/Cargo.toml create mode 100644 jolt-eval/bin/check_invariants.rs create mode 100644 jolt-eval/bin/measure_objectives.rs create mode 100644 jolt-eval/bin/redteam.rs create mode 100644 jolt-eval/macros/Cargo.toml create mode 100644 jolt-eval/macros/src/lib.rs create mode 100644 jolt-eval/src/invariant/completeness_prover.rs create mode 100644 jolt-eval/src/invariant/completeness_verifier.rs create mode 100644 jolt-eval/src/invariant/determinism.rs create mode 100644 jolt-eval/src/invariant/mod.rs create mode 100644 jolt-eval/src/invariant/serialization_roundtrip.rs create mode 100644 jolt-eval/src/invariant/soundness.rs create mode 100644 jolt-eval/src/invariant/synthesis/fuzz.rs create mode 100644 jolt-eval/src/invariant/synthesis/mod.rs create mode 100644 jolt-eval/src/invariant/synthesis/redteam.rs create mode 100644 jolt-eval/src/invariant/synthesis/test.rs create mode 100644 jolt-eval/src/invariant/zk_consistency.rs create mode 100644 jolt-eval/src/lib.rs create mode 100644 jolt-eval/src/objective/guest_cycles.rs create mode 100644 jolt-eval/src/objective/inline_lengths.rs create mode 100644 jolt-eval/src/objective/mod.rs create mode 100644 jolt-eval/src/objective/optimize.rs create mode 100644 jolt-eval/src/objective/peak_rss.rs create mode 100644 jolt-eval/src/objective/proof_size.rs create mode 100644 jolt-eval/src/objective/prover_time.rs create mode 100644 jolt-eval/src/objective/verifier_time.rs create mode 100644 jolt-eval/src/objective/wrapping_cost.rs create mode 100644 jolt-eval/tests/integration.rs create mode 100644 jolt-eval/tests/macro_test.rs diff --git a/Cargo.lock b/Cargo.lock index 59c3993ef..6b2c12542 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -564,6 +564,15 @@ version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" +[[package]] +name = "arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1" +dependencies = [ + "derive_arbitrary", +] + [[package]] name = "ark-bn254" version = "0.5.0" @@ -1679,6 +1688,17 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "derive_arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "derive_more" version = "2.1.1" @@ -1895,6 +1915,27 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "enumset" +version = "1.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25b07a8dfbbbfc0064c0a6bdf9edcf966de6b1c33ce344bdeca3b41615452634" +dependencies = [ + "enumset_derive", +] + +[[package]] +name = "enumset_derive" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f43e744e4ea338060faee68ed933e46e722fb7f3617e722a5772d7e856d8b3ce" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "env_filter" version = "1.0.0" @@ -2899,6 +2940,40 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "jolt-eval" +version = "0.1.0" +dependencies = [ + "arbitrary", + "ark-bn254", + "ark-serialize 0.5.0", + "clap", + "common", + "enumset", + "eyre", + "jolt-core", + "jolt-eval-macros", + "postcard", + "rand 0.8.5", + "rayon", + "serde", + "sysinfo", + "tempfile", + "thiserror 2.0.18", + "tracer", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "jolt-eval-macros" +version = "0.1.0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "jolt-inlines-bigint" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 6076c9983..be20856ee 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -92,6 +92,8 @@ members = [ "examples/sig-recovery/guest", "zklean-extractor", "z3-verifier", + "jolt-eval", + "jolt-eval/macros", ] [features] diff --git a/jolt-eval/Cargo.toml b/jolt-eval/Cargo.toml new file mode 100644 index 000000000..c94442528 --- /dev/null +++ b/jolt-eval/Cargo.toml @@ -0,0 +1,40 @@ +[package] +name = "jolt-eval" +version = "0.1.0" +edition = "2021" + +[dependencies] +jolt-core = { workspace = true, features = ["host"] } +common = { workspace = true, features = ["std"] } +tracer = { workspace = true } + +ark-bn254 = { workspace = true } +ark-serialize = { workspace = true } +serde = { workspace = true, features = ["derive"] } +postcard = { workspace = true, features = ["use-std"] } +thiserror = { workspace = true } +eyre = { workspace = true } +tracing = { workspace = true } +clap = { workspace = true, features = ["derive"] } +rayon = { workspace = true } +rand = { workspace = true } +sysinfo = { workspace = true } +tracing-subscriber = { workspace = true } + +arbitrary = { version = "1", features = ["derive"] } +enumset = "1" +tempfile = "3" + +jolt-eval-macros = { path = "macros" } + +[[bin]] +name = "check-invariants" +path = "bin/check_invariants.rs" + +[[bin]] +name = "measure-objectives" +path = "bin/measure_objectives.rs" + +[[bin]] +name = "redteam" +path = "bin/redteam.rs" diff --git a/jolt-eval/bin/check_invariants.rs b/jolt-eval/bin/check_invariants.rs new file mode 100644 index 000000000..3eb8d4666 --- /dev/null +++ b/jolt-eval/bin/check_invariants.rs @@ -0,0 +1,146 @@ +use std::sync::Arc; + +use clap::Parser; +use tracing::info; + +use jolt_eval::invariant::completeness_prover::ProverCompletenessInvariant; +use jolt_eval::invariant::completeness_verifier::VerifierCompletenessInvariant; +use jolt_eval::invariant::determinism::DeterminismInvariant; +use jolt_eval::invariant::serialization_roundtrip::SerializationRoundtripInvariant; +use jolt_eval::invariant::soundness::SoundnessInvariant; +use jolt_eval::invariant::synthesis::SynthesisRegistry; +use jolt_eval::invariant::zk_consistency::ZkConsistencyInvariant; +use jolt_eval::invariant::{DynInvariant, InvariantReport}; +use jolt_eval::TestCase; + +#[derive(Parser)] +#[command(name = "check-invariants")] +#[command(about = "Run Jolt invariant checks")] +struct Cli { + /// Only run the named invariant (default: all) + #[arg(long)] + invariant: Option, + + /// Number of random inputs per invariant + #[arg(long, default_value = "10")] + num_random: usize, + + /// Path to a pre-compiled guest ELF + #[arg(long)] + elf: Option, + + /// Max trace length for the test program + #[arg(long, default_value = "65536")] + max_trace_length: usize, +} + +fn main() -> eyre::Result<()> { + tracing_subscriber::fmt::init(); + let cli = Cli::parse(); + + let test_case = if let Some(elf_path) = &cli.elf { + let elf_bytes = std::fs::read(elf_path)?; + let memory_config = common::jolt_device::MemoryConfig { + max_input_size: 4096, + max_output_size: 4096, + max_untrusted_advice_size: 0, + max_trusted_advice_size: 0, + stack_size: 65536, + heap_size: 32768, + program_size: None, + }; + Arc::new(TestCase { + elf_contents: elf_bytes, + memory_config, + max_trace_length: cli.max_trace_length, + }) + } else { + eprintln!("Error: --elf is required. Provide a pre-compiled guest ELF."); + eprintln!( + "Example: compile with `cargo build -p --release` then pass the ELF path." + ); + std::process::exit(1); + }; + + let default_inputs = vec![]; + + let mut registry = SynthesisRegistry::new(); + register_invariants(&mut registry, &test_case, &default_inputs); + + let invariants: Vec<&dyn DynInvariant> = if let Some(name) = &cli.invariant { + registry + .invariants() + .iter() + .filter(|inv| inv.name() == name.as_str()) + .map(|inv| inv.as_ref()) + .collect() + } else { + registry + .invariants() + .iter() + .map(|inv| inv.as_ref()) + .collect() + }; + + if invariants.is_empty() { + eprintln!("No matching invariants found."); + if let Some(name) = &cli.invariant { + eprintln!("Available: soundness, verifier_completeness, prover_completeness, determinism, serialization_roundtrip, zk_consistency"); + eprintln!("Requested: {name}"); + } + std::process::exit(1); + } + + let mut all_passed = true; + for inv in &invariants { + info!("Running invariant: {}", inv.name()); + let results = inv.run_checks(cli.num_random); + let report = InvariantReport::from_results(inv.name(), &results); + print_report(&report); + if report.failed > 0 { + all_passed = false; + } + } + + if all_passed { + info!("All invariants passed."); + } else { + eprintln!("Some invariants FAILED."); + std::process::exit(1); + } + + Ok(()) +} + +fn register_invariants( + registry: &mut SynthesisRegistry, + test_case: &Arc, + default_inputs: &[u8], +) { + registry.register(Box::new(SoundnessInvariant::new( + Arc::clone(test_case), + default_inputs.to_vec(), + ))); + registry.register(Box::new(VerifierCompletenessInvariant::new(Arc::clone( + test_case, + )))); + registry.register(Box::new(ProverCompletenessInvariant::new(Arc::clone( + test_case, + )))); + registry.register(Box::new(DeterminismInvariant::new(Arc::clone(test_case)))); + registry.register(Box::new(SerializationRoundtripInvariant::new( + Arc::clone(test_case), + default_inputs.to_vec(), + ))); + registry.register(Box::new(ZkConsistencyInvariant::new(Arc::clone(test_case)))); +} + +fn print_report(report: &InvariantReport) { + println!( + " {} — {}/{} passed", + report.name, report.passed, report.total + ); + for violation in &report.violations { + println!(" FAIL: {violation}"); + } +} diff --git a/jolt-eval/bin/measure_objectives.rs b/jolt-eval/bin/measure_objectives.rs new file mode 100644 index 000000000..cb7c78ef7 --- /dev/null +++ b/jolt-eval/bin/measure_objectives.rs @@ -0,0 +1,149 @@ +use std::sync::Arc; + +use clap::Parser; + +use jolt_eval::objective::guest_cycles::GuestCycleCountObjective; +use jolt_eval::objective::inline_lengths::InlineLengthsObjective; +use jolt_eval::objective::peak_rss::PeakRssObjective; +use jolt_eval::objective::proof_size::ProofSizeObjective; +use jolt_eval::objective::prover_time::ProverTimeObjective; +use jolt_eval::objective::verifier_time::VerifierTimeObjective; +use jolt_eval::objective::wrapping_cost::WrappingCostObjective; +use jolt_eval::objective::Objective; +use jolt_eval::TestCase; + +#[derive(Parser)] +#[command(name = "measure-objectives")] +#[command(about = "Measure Jolt performance objectives")] +struct Cli { + /// Only measure the named objective (default: all) + #[arg(long)] + objective: Option, + + /// Number of samples per objective + #[arg(long)] + samples: Option, + + /// Path to a pre-compiled guest ELF + #[arg(long)] + elf: Option, + + /// Max trace length for the test program + #[arg(long, default_value = "65536")] + max_trace_length: usize, +} + +fn main() -> eyre::Result<()> { + tracing_subscriber::fmt::init(); + let cli = Cli::parse(); + + let test_case = if let Some(elf_path) = &cli.elf { + let elf_bytes = std::fs::read(elf_path)?; + let memory_config = common::jolt_device::MemoryConfig { + max_input_size: 4096, + max_output_size: 4096, + max_untrusted_advice_size: 0, + max_trusted_advice_size: 0, + stack_size: 65536, + heap_size: 32768, + program_size: None, + }; + Arc::new(TestCase { + elf_contents: elf_bytes, + memory_config, + max_trace_length: cli.max_trace_length, + }) + } else { + eprintln!("Error: --elf is required. Provide a pre-compiled guest ELF."); + std::process::exit(1); + }; + + let inputs = vec![]; + let prover_pp = Arc::new(test_case.prover_preprocessing()); + let verifier_pp = Arc::new(TestCase::verifier_preprocessing(&prover_pp)); + + let objectives = build_objectives(&test_case, &prover_pp, &verifier_pp, &inputs); + + let filtered: Vec<&Objective> = if let Some(name) = &cli.objective { + objectives + .iter() + .filter(|o| o.name() == name.as_str()) + .collect() + } else { + objectives.iter().collect() + }; + + if filtered.is_empty() { + eprintln!("No matching objectives found."); + std::process::exit(1); + } + + println!("{:<25} {:>15} {:>10}", "Objective", "Value", "Direction"); + println!("{}", "-".repeat(52)); + + for obj in &filtered { + let samples = cli.samples.unwrap_or(1); + let mut measurements = Vec::new(); + + for _ in 0..samples { + match obj.collect_measurement() { + Ok(val) => measurements.push(val), + Err(e) => { + println!("{:<25} {:>15}", obj.name(), format!("ERROR: {e}")); + continue; + } + } + } + + if !measurements.is_empty() { + let mean = measurements.iter().sum::() / measurements.len() as f64; + let dir = match obj.direction() { + jolt_eval::Direction::Minimize => "min", + jolt_eval::Direction::Maximize => "max", + }; + println!("{:<25} {:>15.2} {:>10}", obj.name(), mean, dir); + } + } + + Ok(()) +} + +fn build_objectives( + test_case: &Arc, + prover_pp: &Arc, + verifier_pp: &Arc, + inputs: &[u8], +) -> Vec { + vec![ + Objective::PeakRss(PeakRssObjective::new( + Arc::clone(test_case), + Arc::clone(prover_pp), + inputs.to_vec(), + )), + Objective::ProverTime(ProverTimeObjective::new( + Arc::clone(test_case), + Arc::clone(prover_pp), + inputs.to_vec(), + )), + Objective::ProofSize(ProofSizeObjective::new( + Arc::clone(test_case), + Arc::clone(prover_pp), + inputs.to_vec(), + )), + Objective::VerifierTime(VerifierTimeObjective::new( + Arc::clone(test_case), + Arc::clone(prover_pp), + Arc::clone(verifier_pp), + inputs.to_vec(), + )), + Objective::GuestCycleCount(GuestCycleCountObjective::new( + Arc::clone(test_case), + inputs.to_vec(), + )), + Objective::InlineLengths(InlineLengthsObjective::new(Arc::clone(test_case))), + Objective::WrappingCost(WrappingCostObjective::new( + Arc::clone(test_case), + Arc::clone(prover_pp), + )), + ] +} diff --git a/jolt-eval/bin/redteam.rs b/jolt-eval/bin/redteam.rs new file mode 100644 index 000000000..99d45228e --- /dev/null +++ b/jolt-eval/bin/redteam.rs @@ -0,0 +1,118 @@ +use std::sync::Arc; + +use clap::Parser; +use tracing::info; + +use jolt_eval::invariant::soundness::SoundnessInvariant; +use jolt_eval::invariant::synthesis::redteam::{auto_redteam, RedTeamConfig, RedTeamResult}; +use jolt_eval::invariant::synthesis::SynthesisRegistry; +use jolt_eval::TestCase; + +#[derive(Parser)] +#[command(name = "redteam")] +#[command(about = "AI-driven red team testing of Jolt invariants")] +struct Cli { + /// Name of the invariant to test + #[arg(long)] + invariant: String, + + /// Number of red-team iterations + #[arg(long, default_value = "10")] + iterations: usize, + + /// AI model to use + #[arg(long, default_value = "claude-sonnet-4-20250514")] + model: String, + + /// Path to a pre-compiled guest ELF + #[arg(long)] + elf: String, + + /// Max trace length for the test program + #[arg(long, default_value = "65536")] + max_trace_length: usize, +} + +fn main() -> eyre::Result<()> { + tracing_subscriber::fmt::init(); + let cli = Cli::parse(); + + let elf_bytes = std::fs::read(&cli.elf)?; + let memory_config = common::jolt_device::MemoryConfig { + max_input_size: 4096, + max_output_size: 4096, + max_untrusted_advice_size: 0, + max_trusted_advice_size: 0, + stack_size: 65536, + heap_size: 32768, + program_size: None, + }; + let test_case = Arc::new(TestCase { + elf_contents: elf_bytes, + memory_config, + max_trace_length: cli.max_trace_length, + }); + + let mut registry = SynthesisRegistry::new(); + registry.register(Box::new(SoundnessInvariant::new( + Arc::clone(&test_case), + vec![], + ))); + + let invariant = registry + .invariants() + .iter() + .find(|inv| inv.name() == cli.invariant.as_str()) + .map(|inv| inv.as_ref()); + + let Some(invariant) = invariant else { + eprintln!("Invariant '{}' not found.", cli.invariant); + eprintln!("Available: soundness"); + std::process::exit(1); + }; + + let config = RedTeamConfig { + invariant_name: cli.invariant.clone(), + num_iterations: cli.iterations, + model: cli.model.clone(), + working_dir: std::env::current_dir()?, + }; + + info!( + "Starting red team: invariant={}, iterations={}, model={}", + cli.invariant, cli.iterations, cli.model + ); + + // The invoke_agent callback is a placeholder for actual AI interaction. + // In production, this would shell out to `claude` CLI or use the API. + let result = auto_redteam(invariant, &config, |description, failed_attempts| { + info!( + "Agent prompt: find violation of: {}", + &description[..description.len().min(100)] + ); + info!("Previous failed attempts: {}", failed_attempts.len()); + // Placeholder: return None (no candidate produced) + // Real implementation would invoke Claude Code in a worktree + None + }); + + match result { + RedTeamResult::Violation { description, error } => { + println!("VIOLATION FOUND!"); + println!(" Approach: {description}"); + println!(" Error: {error}"); + std::process::exit(1); + } + RedTeamResult::NoViolation { attempts } => { + println!("No violations found after {} attempts.", attempts.len()); + for attempt in &attempts { + println!( + " {}: {} — {}", + attempt.description, attempt.approach, attempt.failure_reason + ); + } + } + } + + Ok(()) +} diff --git a/jolt-eval/macros/Cargo.toml b/jolt-eval/macros/Cargo.toml new file mode 100644 index 000000000..78cd57312 --- /dev/null +++ b/jolt-eval/macros/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "jolt-eval-macros" +version = "0.1.0" +edition = "2021" + +[lib] +proc-macro = true + +[dependencies] +syn = { workspace = true } +quote = { workspace = true } +proc-macro2 = { workspace = true } diff --git a/jolt-eval/macros/src/lib.rs b/jolt-eval/macros/src/lib.rs new file mode 100644 index 000000000..83b5e2745 --- /dev/null +++ b/jolt-eval/macros/src/lib.rs @@ -0,0 +1,175 @@ +extern crate proc_macro; + +use proc_macro::TokenStream; +use quote::quote; +use syn::{parse_macro_input, DeriveInput, Ident}; + +/// Attribute macro for invariant structs. +/// +/// Generates test and fuzz harness functions based on the specified targets. +/// +/// # Usage +/// +/// ```ignore +/// #[jolt_eval_macros::invariant(targets = [Test, Fuzz, RedTeam])] +/// #[derive(Default)] +/// pub struct MySoundnessInvariant { ... } +/// ``` +/// +/// Generates: +/// - For `Test`: A `#[cfg(test)]` module with seed corpus and random tests +/// - For `Fuzz`: A `fuzz_check` function suitable for `libfuzzer_sys` +/// - For `RedTeam`: A `redteam_description` function returning the invariant's description +/// +/// The struct must implement `Invariant + Default`. +#[proc_macro_attribute] +pub fn invariant(attr: TokenStream, item: TokenStream) -> TokenStream { + let input = parse_macro_input!(item as DeriveInput); + let struct_name = &input.ident; + let snake_name = to_snake_case(&struct_name.to_string()); + let test_mod_name = Ident::new(&format!("{snake_name}_synthesized"), struct_name.span()); + + let targets = parse_targets(attr); + let has_test = targets.contains(&"Test".to_string()); + let has_fuzz = targets.contains(&"Fuzz".to_string()); + let has_redteam = targets.contains(&"RedTeam".to_string()); + + let test_block = if has_test { + quote! { + #[cfg(test)] + mod #test_mod_name { + use super::*; + use jolt_eval::Invariant; + + #[test] + fn seed_corpus() { + let invariant = #struct_name::default(); + let setup = invariant.setup(); + for (i, input) in invariant.seed_corpus().into_iter().enumerate() { + invariant.check(&setup, input).unwrap_or_else(|e| { + panic!( + "Invariant '{}' violated on seed {}: {}", + invariant.name(), i, e + ); + }); + } + } + + #[test] + fn random_inputs() { + use jolt_eval::rand::RngCore; + let invariant = #struct_name::default(); + let setup = invariant.setup(); + let mut rng = jolt_eval::rand::thread_rng(); + for _ in 0..10 { + let mut raw = vec![0u8; 4096]; + rng.fill_bytes(&mut raw); + let mut u = jolt_eval::arbitrary::Unstructured::new(&raw); + if let Ok(input) = < + <#struct_name as jolt_eval::Invariant>::Input + as jolt_eval::arbitrary::Arbitrary + >::arbitrary(&mut u) { + invariant.check(&setup, input).unwrap_or_else(|e| { + panic!( + "Invariant '{}' violated: {}", + invariant.name(), e + ); + }); + } + } + } + } + } + } else { + quote! {} + }; + + let fuzz_fn_name = Ident::new(&format!("{snake_name}_fuzz_check"), struct_name.span()); + let fuzz_block = if has_fuzz { + quote! { + pub fn #fuzz_fn_name(data: &[u8]) { + use jolt_eval::Invariant; + use std::sync::LazyLock; + + static SETUP: LazyLock<( + #struct_name, + <#struct_name as jolt_eval::Invariant>::Setup, + )> = LazyLock::new(|| { + let invariant = #struct_name::default(); + let setup = invariant.setup(); + (invariant, setup) + }); + + let mut u = jolt_eval::arbitrary::Unstructured::new(data); + if let Ok(input) = < + <#struct_name as jolt_eval::Invariant>::Input + as jolt_eval::arbitrary::Arbitrary + >::arbitrary(&mut u) { + let (invariant, setup) = &*SETUP; + if let Err(e) = invariant.check(setup, input) { + panic!("Invariant '{}' violated: {}", invariant.name(), e); + } + } + } + } + } else { + quote! {} + }; + + let redteam_fn_name = Ident::new( + &format!("{snake_name}_redteam_description"), + struct_name.span(), + ); + let redteam_block = if has_redteam { + quote! { + pub fn #redteam_fn_name() -> String { + use jolt_eval::Invariant; + let invariant = #struct_name::default(); + invariant.description() + } + } + } else { + quote! {} + }; + + let expanded = quote! { + #input + + #test_block + #fuzz_block + #redteam_block + }; + + expanded.into() +} + +fn to_snake_case(s: &str) -> String { + let mut result = String::new(); + for (i, c) in s.chars().enumerate() { + if c.is_uppercase() { + if i > 0 { + result.push('_'); + } + result.push(c.to_lowercase().next().unwrap()); + } else { + result.push(c); + } + } + result +} + +fn parse_targets(attr: TokenStream) -> Vec { + let attr_str = attr.to_string(); + // Parse: targets = [Test, Fuzz, RedTeam] + if let Some(bracket_start) = attr_str.find('[') { + if let Some(bracket_end) = attr_str.find(']') { + let inner = &attr_str[bracket_start + 1..bracket_end]; + return inner + .split(',') + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) + .collect(); + } + } + vec![] +} diff --git a/jolt-eval/src/invariant/completeness_prover.rs b/jolt-eval/src/invariant/completeness_prover.rs new file mode 100644 index 000000000..cb405e904 --- /dev/null +++ b/jolt-eval/src/invariant/completeness_prover.rs @@ -0,0 +1,100 @@ +use std::sync::Arc; + +use arbitrary::Arbitrary; +use enumset::EnumSet; + +use super::{Invariant, InvariantViolation, SynthesisTarget}; +use crate::{ProverPreprocessing, TestCase}; + +/// Prover completeness: for a fixed program, input, and valid size parameters, +/// the prover should produce a proof without panicking. +pub struct ProverCompletenessInvariant { + pub test_case: Arc, +} + +pub struct ProverCompletenessSetup { + test_case: Arc, + prover_preprocessing: ProverPreprocessing, +} + +/// Program inputs for prover completeness testing. +#[derive(Debug, Clone, Arbitrary)] +pub struct ProverInputs { + pub data: Vec, +} + +impl ProverCompletenessInvariant { + pub fn new(test_case: Arc) -> Self { + Self { test_case } + } +} + +impl Invariant for ProverCompletenessInvariant { + type Setup = ProverCompletenessSetup; + type Input = ProverInputs; + + fn name(&self) -> &str { + "prover_completeness" + } + + fn description(&self) -> String { + "For a fixed program, input, and valid size parameters, \ + the prover should produce a proof (not panic)." + .to_string() + } + + fn targets(&self) -> EnumSet { + SynthesisTarget::Test | SynthesisTarget::Fuzz + } + + fn setup(&self) -> Self::Setup { + let prover_pp = self.test_case.prover_preprocessing(); + ProverCompletenessSetup { + test_case: Arc::clone(&self.test_case), + prover_preprocessing: prover_pp, + } + } + + fn check(&self, setup: &Self::Setup, input: ProverInputs) -> Result<(), InvariantViolation> { + let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + setup + .test_case + .prove(&setup.prover_preprocessing, &input.data) + })); + + match result { + Ok((_proof, io_device)) => { + // Guest panics are acceptable (the guest may reject bad input). + // Prover panics are not -- those are caught by catch_unwind above. + if io_device.panic { + // Guest panicked, but prover completed successfully + Ok(()) + } else { + Ok(()) + } + } + Err(panic_info) => { + let msg = if let Some(s) = panic_info.downcast_ref::() { + s.clone() + } else if let Some(s) = panic_info.downcast_ref::<&str>() { + s.to_string() + } else { + "unknown panic".to_string() + }; + Err(InvariantViolation::with_details( + "Prover panicked", + format!("inputs: {} bytes, panic: {msg}", input.data.len()), + )) + } + } + } + + fn seed_corpus(&self) -> Vec { + vec![ + ProverInputs { data: vec![] }, + ProverInputs { + data: vec![0u8; 64], + }, + ] + } +} diff --git a/jolt-eval/src/invariant/completeness_verifier.rs b/jolt-eval/src/invariant/completeness_verifier.rs new file mode 100644 index 000000000..4ab56d98f --- /dev/null +++ b/jolt-eval/src/invariant/completeness_verifier.rs @@ -0,0 +1,88 @@ +use std::sync::Arc; + +use arbitrary::Arbitrary; +use enumset::EnumSet; + +use super::{Invariant, InvariantViolation, SynthesisTarget}; +use crate::{ProverPreprocessing, TestCase, VerifierPreprocessing}; + +/// Verifier completeness: for a fixed program and honest prover output/proof, +/// the verifier accepts the honest output/proof. +pub struct VerifierCompletenessInvariant { + pub test_case: Arc, +} + +/// Pre-computed preprocessing shared across checks. +pub struct VerifierCompletenessSetup { + test_case: Arc, + prover_preprocessing: ProverPreprocessing, + verifier_preprocessing: VerifierPreprocessing, +} + +/// Program inputs for completeness testing. +#[derive(Debug, Clone, Arbitrary)] +pub struct ProgramInputs { + pub data: Vec, +} + +impl VerifierCompletenessInvariant { + pub fn new(test_case: Arc) -> Self { + Self { test_case } + } +} + +impl Invariant for VerifierCompletenessInvariant { + type Setup = VerifierCompletenessSetup; + type Input = ProgramInputs; + + fn name(&self) -> &str { + "verifier_completeness" + } + + fn description(&self) -> String { + "For a fixed program, input, and honest prover output/proof, \ + the verifier accepts the honest output/proof." + .to_string() + } + + fn targets(&self) -> EnumSet { + SynthesisTarget::Test | SynthesisTarget::Fuzz + } + + fn setup(&self) -> Self::Setup { + let prover_pp = self.test_case.prover_preprocessing(); + let verifier_pp = TestCase::verifier_preprocessing(&prover_pp); + VerifierCompletenessSetup { + test_case: Arc::clone(&self.test_case), + prover_preprocessing: prover_pp, + verifier_preprocessing: verifier_pp, + } + } + + fn check(&self, setup: &Self::Setup, input: ProgramInputs) -> Result<(), InvariantViolation> { + let (proof, io_device) = setup + .test_case + .prove(&setup.prover_preprocessing, &input.data); + + // If the guest panicked, skip -- we only care about non-panicking executions + if io_device.panic { + return Ok(()); + } + + TestCase::verify(&setup.verifier_preprocessing, proof, &io_device).map_err(|e| { + InvariantViolation::with_details( + "Verifier rejected honest proof", + format!("inputs: {} bytes, error: {e}", input.data.len()), + ) + }) + } + + fn seed_corpus(&self) -> Vec { + vec![ + ProgramInputs { data: vec![] }, + ProgramInputs { + data: vec![0u8; 32], + }, + ] + } +} diff --git a/jolt-eval/src/invariant/determinism.rs b/jolt-eval/src/invariant/determinism.rs new file mode 100644 index 000000000..f5ce761dd --- /dev/null +++ b/jolt-eval/src/invariant/determinism.rs @@ -0,0 +1,108 @@ +use std::sync::Arc; + +use arbitrary::Arbitrary; +use enumset::EnumSet; + +use super::{Invariant, InvariantViolation, SynthesisTarget}; +use crate::{serialize_proof, ProverPreprocessing, TestCase}; + +/// Determinism invariant: same program + input must produce byte-identical proofs. +pub struct DeterminismInvariant { + pub test_case: Arc, +} + +pub struct DeterminismSetup { + test_case: Arc, + prover_preprocessing: ProverPreprocessing, +} + +/// Program inputs for determinism testing. +#[derive(Debug, Clone, Arbitrary)] +pub struct DeterminismInputs { + pub data: Vec, +} + +impl DeterminismInvariant { + pub fn new(test_case: Arc) -> Self { + Self { test_case } + } +} + +impl Invariant for DeterminismInvariant { + type Setup = DeterminismSetup; + type Input = DeterminismInputs; + + fn name(&self) -> &str { + "determinism" + } + + fn description(&self) -> String { + "Same program + input must produce the same proof (byte-identical).".to_string() + } + + fn targets(&self) -> EnumSet { + SynthesisTarget::Test | SynthesisTarget::Fuzz + } + + fn setup(&self) -> Self::Setup { + let prover_pp = self.test_case.prover_preprocessing(); + DeterminismSetup { + test_case: Arc::clone(&self.test_case), + prover_preprocessing: prover_pp, + } + } + + fn check( + &self, + setup: &Self::Setup, + input: DeterminismInputs, + ) -> Result<(), InvariantViolation> { + let (proof1, io1) = setup + .test_case + .prove(&setup.prover_preprocessing, &input.data); + let (proof2, io2) = setup + .test_case + .prove(&setup.prover_preprocessing, &input.data); + + let bytes1 = serialize_proof(&proof1); + let bytes2 = serialize_proof(&proof2); + + if bytes1 != bytes2 { + // Find first differing byte + let first_diff = bytes1 + .iter() + .zip(bytes2.iter()) + .position(|(a, b)| a != b) + .unwrap_or(bytes1.len().min(bytes2.len())); + + return Err(InvariantViolation::with_details( + "Non-deterministic proof generation", + format!( + "proofs differ at byte {first_diff} (len1={}, len2={})", + bytes1.len(), + bytes2.len() + ), + )); + } + + // Also check that I/O is deterministic + if io1.outputs != io2.outputs { + return Err(InvariantViolation::new("Non-deterministic program outputs")); + } + + if io1.panic != io2.panic { + return Err(InvariantViolation::new("Non-deterministic panic behavior")); + } + + Ok(()) + } + + fn seed_corpus(&self) -> Vec { + vec![ + DeterminismInputs { data: vec![] }, + DeterminismInputs { + data: vec![1, 2, 3, 4], + }, + ] + } +} diff --git a/jolt-eval/src/invariant/mod.rs b/jolt-eval/src/invariant/mod.rs new file mode 100644 index 000000000..0a4245a38 --- /dev/null +++ b/jolt-eval/src/invariant/mod.rs @@ -0,0 +1,175 @@ +pub mod completeness_prover; +pub mod completeness_verifier; +pub mod determinism; +pub mod serialization_roundtrip; +pub mod soundness; +pub mod synthesis; +pub mod zk_consistency; + +use std::fmt; + +use arbitrary::Arbitrary; +use enumset::{EnumSet, EnumSetType}; +use rand::RngCore; + +/// What to synthesize from an invariant definition. +#[derive(Debug, EnumSetType)] +pub enum SynthesisTarget { + Test, + Fuzz, + RedTeam, +} + +/// Error indicating an invariant was violated. +#[derive(Debug, Clone)] +pub struct InvariantViolation { + pub message: String, + pub details: Option, +} + +impl fmt::Display for InvariantViolation { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.message)?; + if let Some(details) = &self.details { + write!(f, ": {details}")?; + } + Ok(()) + } +} + +impl std::error::Error for InvariantViolation {} + +impl InvariantViolation { + pub fn new(message: impl Into) -> Self { + Self { + message: message.into(), + details: None, + } + } + + pub fn with_details(message: impl Into, details: impl Into) -> Self { + Self { + message: message.into(), + details: Some(details.into()), + } + } +} + +/// Core invariant trait. Each invariant defines a setup phase (run once) +/// and a check phase (run per input). The `Input` type must support +/// `Arbitrary` for fuzzing and random testing. +pub trait Invariant: Send + Sync { + type Setup; + type Input: for<'a> Arbitrary<'a> + fmt::Debug + Clone; + + fn name(&self) -> &str; + + /// Human-readable description, also used as context for AI red-teaming. + fn description(&self) -> String; + + fn targets(&self) -> EnumSet; + + /// One-time setup (e.g. preprocessing, generating an honest proof). + fn setup(&self) -> Self::Setup; + + /// Check the invariant for a single input against the pre-computed setup. + fn check(&self, setup: &Self::Setup, input: Self::Input) -> Result<(), InvariantViolation>; + + /// Known-interesting inputs for deterministic test generation. + fn seed_corpus(&self) -> Vec { + vec![] + } +} + +/// A counterexample produced when an invariant is violated. +pub struct InvariantCounterexample { + pub description: String, + pub input: I::Input, + pub error: InvariantViolation, +} + +/// Record of a red-team attempt that failed to find a violation. +pub struct FailedAttempt { + pub description: String, + pub approach: String, + pub failure_reason: String, +} + +/// Object-safe wrapper for `Invariant`, enabling heterogeneous collections. +pub trait DynInvariant: Send + Sync { + fn name(&self) -> &str; + fn description(&self) -> String; + fn targets(&self) -> EnumSet; + + /// Run seed corpus checks followed by `num_random` randomly-generated inputs. + fn run_checks(&self, num_random: usize) -> Vec>; +} + +impl DynInvariant for I { + fn name(&self) -> &str { + Invariant::name(self) + } + + fn description(&self) -> String { + Invariant::description(self) + } + + fn targets(&self) -> EnumSet { + Invariant::targets(self) + } + + fn run_checks(&self, num_random: usize) -> Vec> { + let setup = self.setup(); + let mut results = Vec::new(); + + for input in self.seed_corpus() { + results.push(self.check(&setup, input)); + } + + let mut rng = rand::thread_rng(); + for _ in 0..num_random { + let mut raw = vec![0u8; 4096]; + rng.fill_bytes(&mut raw); + let mut u = arbitrary::Unstructured::new(&raw); + if let Ok(input) = I::Input::arbitrary(&mut u) { + results.push(self.check(&setup, input)); + } + } + + results + } +} + +/// Result of running an invariant check suite. +pub struct InvariantReport { + pub name: String, + pub total: usize, + pub passed: usize, + pub failed: usize, + pub violations: Vec, +} + +impl InvariantReport { + pub fn from_results(name: &str, results: &[Result<(), InvariantViolation>]) -> Self { + let total = results.len(); + let mut passed = 0; + let mut failed = 0; + let mut violations = Vec::new(); + for r in results { + match r { + Ok(()) => passed += 1, + Err(e) => { + failed += 1; + violations.push(e.to_string()); + } + } + } + Self { + name: name.to_string(), + total, + passed, + failed, + violations, + } + } +} diff --git a/jolt-eval/src/invariant/serialization_roundtrip.rs b/jolt-eval/src/invariant/serialization_roundtrip.rs new file mode 100644 index 000000000..9dd00663d --- /dev/null +++ b/jolt-eval/src/invariant/serialization_roundtrip.rs @@ -0,0 +1,92 @@ +use std::sync::Arc; + +use arbitrary::Arbitrary; +use enumset::EnumSet; + +use super::{Invariant, InvariantViolation, SynthesisTarget}; +use crate::{deserialize_proof, serialize_proof, TestCase}; + +/// Serialization roundtrip invariant: `deserialize(serialize(proof)) == proof`, +/// verified by checking that re-serialization produces identical bytes. +pub struct SerializationRoundtripInvariant { + pub test_case: Arc, + pub default_inputs: Vec, +} + +pub struct SerializationRoundtripSetup { + proof_bytes: Vec, +} + +/// Unit input -- the roundtrip check has no variable input beyond the +/// proof generated during setup. +#[derive(Debug, Clone, Arbitrary)] +pub struct RoundtripInput { + _dummy: u8, +} + +impl SerializationRoundtripInvariant { + pub fn new(test_case: Arc, default_inputs: Vec) -> Self { + Self { + test_case, + default_inputs, + } + } +} + +impl Invariant for SerializationRoundtripInvariant { + type Setup = SerializationRoundtripSetup; + type Input = RoundtripInput; + + fn name(&self) -> &str { + "serialization_roundtrip" + } + + fn description(&self) -> String { + "deserialize(serialize(proof)) == proof, verified via byte-identical \ + re-serialization." + .to_string() + } + + fn targets(&self) -> EnumSet { + SynthesisTarget::Test.into() + } + + fn setup(&self) -> Self::Setup { + let prover_pp = self.test_case.prover_preprocessing(); + let (proof, _io) = self.test_case.prove(&prover_pp, &self.default_inputs); + let proof_bytes = serialize_proof(&proof); + SerializationRoundtripSetup { proof_bytes } + } + + fn check(&self, setup: &Self::Setup, _input: RoundtripInput) -> Result<(), InvariantViolation> { + let deserialized = deserialize_proof(&setup.proof_bytes).map_err(|e| { + InvariantViolation::with_details("Deserialization failed", e.to_string()) + })?; + + let reserialized = serialize_proof(&deserialized); + + if setup.proof_bytes != reserialized { + let first_diff = setup + .proof_bytes + .iter() + .zip(reserialized.iter()) + .position(|(a, b)| a != b) + .unwrap_or(setup.proof_bytes.len().min(reserialized.len())); + + Err(InvariantViolation::with_details( + "Serialization roundtrip mismatch", + format!( + "bytes differ at offset {first_diff} (original={}, roundtripped={})", + setup.proof_bytes.len(), + reserialized.len() + ), + )) + } else { + Ok(()) + } + } + + fn seed_corpus(&self) -> Vec { + vec![RoundtripInput { _dummy: 0 }] + } +} diff --git a/jolt-eval/src/invariant/soundness.rs b/jolt-eval/src/invariant/soundness.rs new file mode 100644 index 000000000..f70ea1bf0 --- /dev/null +++ b/jolt-eval/src/invariant/soundness.rs @@ -0,0 +1,126 @@ +use std::sync::Arc; + +use arbitrary::Arbitrary; +use enumset::EnumSet; + +use super::{Invariant, InvariantViolation, SynthesisTarget}; +use crate::{serialize_proof, JoltDevice, Proof, TestCase, VerifierPreprocessing}; + +/// Mutation applied to a serialized proof to test soundness. +#[derive(Debug, Clone, Arbitrary)] +pub struct ProofMutation { + pub byte_index: usize, + pub new_value: u8, +} + +/// Pre-computed honest proof and verification data. +pub struct SoundnessSetup { + proof_bytes: Vec, + io_device: JoltDevice, + verifier_preprocessing: VerifierPreprocessing, +} + +/// Soundness invariant: for a fixed program and honest prover output/proof, +/// the verifier must reject any mutated (different) proof. +pub struct SoundnessInvariant { + pub test_case: Arc, + pub default_inputs: Vec, +} + +impl SoundnessInvariant { + pub fn new(test_case: Arc, default_inputs: Vec) -> Self { + Self { + test_case, + default_inputs, + } + } +} + +impl Invariant for SoundnessInvariant { + type Setup = SoundnessSetup; + type Input = ProofMutation; + + fn name(&self) -> &str { + "soundness" + } + + fn description(&self) -> String { + "For a fixed program, input, and honest prover output/proof, \ + the verifier does not accept for any other output/proof." + .to_string() + } + + fn targets(&self) -> EnumSet { + SynthesisTarget::Test | SynthesisTarget::Fuzz | SynthesisTarget::RedTeam + } + + fn setup(&self) -> Self::Setup { + let prover_pp = self.test_case.prover_preprocessing(); + let verifier_pp = TestCase::verifier_preprocessing(&prover_pp); + let (proof, io_device) = self.test_case.prove(&prover_pp, &self.default_inputs); + let proof_bytes = serialize_proof(&proof); + SoundnessSetup { + proof_bytes, + io_device, + verifier_preprocessing: verifier_pp, + } + } + + fn check(&self, setup: &Self::Setup, input: ProofMutation) -> Result<(), InvariantViolation> { + if setup.proof_bytes.is_empty() { + return Ok(()); + } + + let idx = input.byte_index % setup.proof_bytes.len(); + + // Skip no-op mutations + if setup.proof_bytes[idx] == input.new_value { + return Ok(()); + } + + let mut mutated = setup.proof_bytes.clone(); + mutated[idx] = input.new_value; + + // If deserialization fails, the mutation was caught + let mutated_proof: Proof = match crate::deserialize_proof(&mutated) { + Ok(p) => p, + Err(_) => return Ok(()), + }; + + // Verification of a mutated proof must fail + match TestCase::verify( + &setup.verifier_preprocessing, + mutated_proof, + &setup.io_device, + ) { + Ok(()) => Err(InvariantViolation::with_details( + "Verifier accepted mutated proof", + format!( + "mutation at byte {idx}: 0x{:02x} -> 0x{:02x}", + setup.proof_bytes[idx], input.new_value + ), + )), + Err(_) => Ok(()), + } + } + + fn seed_corpus(&self) -> Vec { + vec![ + // Mutate first byte + ProofMutation { + byte_index: 0, + new_value: 0xFF, + }, + // Mutate a byte in the middle + ProofMutation { + byte_index: 1000, + new_value: 0x00, + }, + // Flip a single bit + ProofMutation { + byte_index: 42, + new_value: 0x01, + }, + ] + } +} diff --git a/jolt-eval/src/invariant/synthesis/fuzz.rs b/jolt-eval/src/invariant/synthesis/fuzz.rs new file mode 100644 index 000000000..34314708a --- /dev/null +++ b/jolt-eval/src/invariant/synthesis/fuzz.rs @@ -0,0 +1,43 @@ +use super::super::{DynInvariant, SynthesisTarget}; +use super::SynthesisRegistry; + +/// Generate `libfuzzer_sys` fuzz target source code for a named invariant. +/// +/// The generated code should be placed in a `fuzz/fuzz_targets/` directory +/// and compiled as a separate binary with `cargo fuzz`. +pub fn generate_fuzz_target(_invariant_name: &str, struct_path: &str) -> String { + format!( + r#"#![no_main] +use libfuzzer_sys::fuzz_target; +use arbitrary::{{Arbitrary, Unstructured}}; +use jolt_eval::Invariant; + +// Lazily initialize the invariant and setup (expensive one-time cost) +use std::sync::LazyLock; +static SETUP: LazyLock<({struct_path}, <{struct_path} as Invariant>::Setup)> = LazyLock::new(|| {{ + let invariant = {struct_path}::default(); + let setup = invariant.setup(); + (invariant, setup) +}}); + +fuzz_target!(|data: &[u8]| {{ + let mut u = Unstructured::new(data); + if let Ok(input) = <<{struct_path} as Invariant>::Input as Arbitrary>::arbitrary(&mut u) {{ + let (invariant, setup) = &*SETUP; + // We don't panic on invariant violations during fuzzing -- + // instead we log them. The fuzzer's job is to find inputs + // that trigger violations. + if let Err(e) = invariant.check(setup, input) {{ + eprintln!("INVARIANT VIOLATION: {{}}", e); + panic!("Invariant '{{}}' violated: {{}}", invariant.name(), e); + }} + }} +}}); +"# + ) +} + +/// List all invariants suitable for fuzz target generation. +pub fn fuzzable_invariants(registry: &SynthesisRegistry) -> Vec<&dyn DynInvariant> { + registry.for_target(SynthesisTarget::Fuzz) +} diff --git a/jolt-eval/src/invariant/synthesis/mod.rs b/jolt-eval/src/invariant/synthesis/mod.rs new file mode 100644 index 000000000..1042aca24 --- /dev/null +++ b/jolt-eval/src/invariant/synthesis/mod.rs @@ -0,0 +1,41 @@ +pub mod fuzz; +pub mod redteam; +pub mod test; + +use super::{DynInvariant, SynthesisTarget}; + +/// Registry of invariants available for synthesis. +pub struct SynthesisRegistry { + invariants: Vec>, +} + +impl SynthesisRegistry { + pub fn new() -> Self { + Self { + invariants: Vec::new(), + } + } + + pub fn register(&mut self, invariant: Box) { + self.invariants.push(invariant); + } + + pub fn invariants(&self) -> &[Box] { + &self.invariants + } + + /// Return invariants that include the given synthesis target. + pub fn for_target(&self, target: SynthesisTarget) -> Vec<&dyn DynInvariant> { + self.invariants + .iter() + .filter(|inv| inv.targets().contains(target)) + .map(|inv| inv.as_ref()) + .collect() + } +} + +impl Default for SynthesisRegistry { + fn default() -> Self { + Self::new() + } +} diff --git a/jolt-eval/src/invariant/synthesis/redteam.rs b/jolt-eval/src/invariant/synthesis/redteam.rs new file mode 100644 index 000000000..e409fd357 --- /dev/null +++ b/jolt-eval/src/invariant/synthesis/redteam.rs @@ -0,0 +1,129 @@ +use std::path::{Path, PathBuf}; +use std::process::Command; + +use super::super::{DynInvariant, FailedAttempt, SynthesisTarget}; +use super::SynthesisRegistry; + +/// Result of a red-team session. +pub enum RedTeamResult { + /// Found a counterexample that violates the invariant. + Violation { description: String, error: String }, + /// All attempts failed to find a violation. + NoViolation { attempts: Vec }, +} + +/// Configuration for an AI red-team session. +pub struct RedTeamConfig { + pub invariant_name: String, + pub num_iterations: usize, + pub model: String, + pub working_dir: PathBuf, +} + +impl Default for RedTeamConfig { + fn default() -> Self { + Self { + invariant_name: String::new(), + num_iterations: 10, + model: "claude-sonnet-4-20250514".to_string(), + working_dir: PathBuf::from("."), + } + } +} + +/// Create an isolated git worktree for the AI agent to work in. +pub fn create_worktree(repo_dir: &Path, _branch_name: &str) -> Result { + let tmp = tempfile::tempdir().map_err(|e| format!("Failed to create temp dir: {e}"))?; + // Persist the temp dir so the worktree outlives this function + let worktree_dir = tmp.path().to_path_buf(); + std::mem::forget(tmp); + + let status = Command::new("git") + .current_dir(repo_dir) + .args(["worktree", "add", "--detach"]) + .arg(&worktree_dir) + .status() + .map_err(|e| format!("Failed to run git worktree: {e}"))?; + + if !status.success() { + return Err("git worktree add failed".to_string()); + } + + Ok(worktree_dir) +} + +/// Remove a git worktree. +pub fn remove_worktree(repo_dir: &Path, worktree_dir: &Path) { + let _ = Command::new("git") + .current_dir(repo_dir) + .args(["worktree", "remove", "--force"]) + .arg(worktree_dir) + .status(); +} + +/// Run an AI red-team session against a single invariant. +/// +/// The AI agent runs in an isolated worktree to produce a claimed bad input. +/// The invariant is checked in the original working tree so the AI cannot cheat. +/// +/// This function orchestrates the loop but delegates the actual AI interaction +/// to the `invoke_agent` callback, which should: +/// 1. Receive the invariant description and past failed attempts +/// 2. Have the AI produce a candidate counterexample (as bytes) +/// 3. Return the candidate or None if the AI couldn't produce one +pub fn auto_redteam( + invariant: &dyn DynInvariant, + config: &RedTeamConfig, + mut invoke_agent: impl FnMut(&str, &[FailedAttempt]) -> Option<(String, Vec)>, +) -> RedTeamResult { + let description = invariant.description(); + let mut failed_attempts = Vec::new(); + + for iteration in 0..config.num_iterations { + tracing::info!( + "Red team iteration {}/{} for '{}'", + iteration + 1, + config.num_iterations, + invariant.name() + ); + + let result = invoke_agent(&description, &failed_attempts); + + match result { + Some((approach, _candidate_bytes)) => { + // Run the invariant's checks to see if the agent found a violation + let check_results = invariant.run_checks(0); + let violation = check_results.iter().find(|r| r.is_err()); + + if let Some(Err(e)) = violation { + return RedTeamResult::Violation { + description: approach, + error: e.to_string(), + }; + } + + failed_attempts.push(FailedAttempt { + description: format!("Iteration {}", iteration + 1), + approach, + failure_reason: "Invariant check passed for all inputs".to_string(), + }); + } + None => { + failed_attempts.push(FailedAttempt { + description: format!("Iteration {}", iteration + 1), + approach: "Agent could not produce a candidate".to_string(), + failure_reason: "No candidate generated".to_string(), + }); + } + } + } + + RedTeamResult::NoViolation { + attempts: failed_attempts, + } +} + +/// List all invariants suitable for red-team testing. +pub fn redteamable_invariants(registry: &SynthesisRegistry) -> Vec<&dyn DynInvariant> { + registry.for_target(SynthesisTarget::RedTeam) +} diff --git a/jolt-eval/src/invariant/synthesis/test.rs b/jolt-eval/src/invariant/synthesis/test.rs new file mode 100644 index 000000000..bbee8c3a3 --- /dev/null +++ b/jolt-eval/src/invariant/synthesis/test.rs @@ -0,0 +1,62 @@ +use super::super::{InvariantReport, SynthesisTarget}; +use super::SynthesisRegistry; + +/// Run all invariants registered for the `Test` synthesis target. +/// +/// Runs each invariant's seed corpus, then `num_random` randomly-generated +/// inputs per invariant. +pub fn run_test_suite(registry: &SynthesisRegistry, num_random: usize) -> Vec { + let test_invariants = registry.for_target(SynthesisTarget::Test); + let mut reports = Vec::new(); + + for inv in test_invariants { + let results = inv.run_checks(num_random); + reports.push(InvariantReport::from_results(inv.name(), &results)); + } + + reports +} + +/// Generate `#[test]` function source code for a named invariant. +/// +/// Produces a test module that creates the invariant, runs its seed corpus, +/// and optionally runs a configurable number of random inputs. +pub fn generate_test_source(invariant_name: &str, struct_path: &str) -> String { + format!( + r#"#[cfg(test)] +mod {invariant_name}_tests {{ + use super::*; + use jolt_eval::Invariant; + + #[test] + fn test_{invariant_name}_seed_corpus() {{ + let invariant = {struct_path}::default(); + let setup = invariant.setup(); + for (i, input) in invariant.seed_corpus().into_iter().enumerate() {{ + invariant.check(&setup, input).unwrap_or_else(|e| {{ + panic!("Invariant '{{}}' violated on seed {{}}: {{}}", invariant.name(), i, e); + }}); + }} + }} + + #[test] + fn test_{invariant_name}_random() {{ + use rand::RngCore; + let invariant = {struct_path}::default(); + let setup = invariant.setup(); + let mut rng = rand::thread_rng(); + for _ in 0..10 {{ + let mut raw = vec![0u8; 4096]; + rng.fill_bytes(&mut raw); + let mut u = arbitrary::Unstructured::new(&raw); + if let Ok(input) = <_ as arbitrary::Arbitrary>::arbitrary(&mut u) {{ + invariant.check(&setup, input).unwrap_or_else(|e| {{ + panic!("Invariant '{{}}' violated: {{}}", invariant.name(), e); + }}); + }} + }} + }} +}} +"# + ) +} diff --git a/jolt-eval/src/invariant/zk_consistency.rs b/jolt-eval/src/invariant/zk_consistency.rs new file mode 100644 index 000000000..2bef70d42 --- /dev/null +++ b/jolt-eval/src/invariant/zk_consistency.rs @@ -0,0 +1,100 @@ +use std::sync::Arc; + +use arbitrary::Arbitrary; +use enumset::EnumSet; + +use super::{Invariant, InvariantViolation, SynthesisTarget}; +use crate::{ProverPreprocessing, TestCase, VerifierPreprocessing}; + +/// ZK consistency invariant: both `host` and `host,zk` compilation modes +/// produce valid proofs that pass verification. +/// +/// Since the ZK feature is compile-time, this invariant tests whichever mode +/// the binary was compiled with. Run the binary with both feature configurations +/// to get full coverage: +/// cargo nextest run -p jolt-eval --features host +/// cargo nextest run -p jolt-eval --features host,zk +pub struct ZkConsistencyInvariant { + pub test_case: Arc, +} + +pub struct ZkConsistencySetup { + test_case: Arc, + prover_preprocessing: ProverPreprocessing, + verifier_preprocessing: VerifierPreprocessing, +} + +#[derive(Debug, Clone, Arbitrary)] +pub struct ZkInputs { + pub data: Vec, +} + +impl ZkConsistencyInvariant { + pub fn new(test_case: Arc) -> Self { + Self { test_case } + } + + /// Returns which ZK mode the binary was compiled with. + pub fn current_mode() -> &'static str { + // Note: the `zk` feature is on jolt-core, not jolt-eval. + // Detect at runtime by checking if the crate was compiled with it. + "standard" + } +} + +impl Invariant for ZkConsistencyInvariant { + type Setup = ZkConsistencySetup; + type Input = ZkInputs; + + fn name(&self) -> &str { + "zk_consistency" + } + + fn description(&self) -> String { + format!( + "Both host and host+zk modes produce valid proofs. \ + Currently running in {} mode.", + Self::current_mode() + ) + } + + fn targets(&self) -> EnumSet { + SynthesisTarget::Test | SynthesisTarget::Fuzz + } + + fn setup(&self) -> Self::Setup { + let prover_pp = self.test_case.prover_preprocessing(); + let verifier_pp = TestCase::verifier_preprocessing(&prover_pp); + ZkConsistencySetup { + test_case: Arc::clone(&self.test_case), + prover_preprocessing: prover_pp, + verifier_preprocessing: verifier_pp, + } + } + + fn check(&self, setup: &Self::Setup, input: ZkInputs) -> Result<(), InvariantViolation> { + let (proof, io_device) = setup + .test_case + .prove(&setup.prover_preprocessing, &input.data); + + if io_device.panic { + return Ok(()); + } + + TestCase::verify(&setup.verifier_preprocessing, proof, &io_device).map_err(|e| { + InvariantViolation::with_details( + format!("Proof verification failed in {} mode", Self::current_mode()), + format!("inputs: {} bytes, error: {e}", input.data.len()), + ) + }) + } + + fn seed_corpus(&self) -> Vec { + vec![ + ZkInputs { data: vec![] }, + ZkInputs { + data: vec![0u8; 16], + }, + ] + } +} diff --git a/jolt-eval/src/lib.rs b/jolt-eval/src/lib.rs new file mode 100644 index 000000000..4417a15cb --- /dev/null +++ b/jolt-eval/src/lib.rs @@ -0,0 +1,170 @@ +#![allow(non_snake_case)] + +pub mod invariant; +pub mod objective; + +use std::collections::HashMap; +use std::sync::Arc; + +use ark_bn254::Fr; +use ark_serialize::{CanonicalDeserialize, CanonicalSerialize}; +use jolt_core::curve::Bn254Curve; +use jolt_core::poly::commitment::dory::DoryCommitmentScheme; +use jolt_core::transcripts::Blake2bTranscript; + +pub use invariant::{DynInvariant, Invariant, InvariantViolation, SynthesisTarget}; +pub use objective::{AbstractObjective, Direction, MeasurementError, Objective}; + +// Re-exports used by the #[invariant] proc macro generated code. +// Users of the macro don't need to add these to their own Cargo.toml. +pub use arbitrary; +pub use rand; + +pub type F = Fr; +pub type C = Bn254Curve; +pub type PCS = DoryCommitmentScheme; +pub type FS = Blake2bTranscript; + +pub type Proof = jolt_core::zkvm::proof_serialization::JoltProof; +pub type ProverPreprocessing = jolt_core::zkvm::prover::JoltProverPreprocessing; +pub type VerifierPreprocessing = jolt_core::zkvm::verifier::JoltVerifierPreprocessing; +pub type SharedPreprocessing = jolt_core::zkvm::verifier::JoltSharedPreprocessing; + +pub use jolt_core::guest::program::Program as GuestProgram; +pub use jolt_core::poly::commitment::commitment_scheme::CommitmentScheme; +pub use jolt_core::utils::errors::ProofVerifyError; +pub use jolt_core::zkvm::Serializable; +pub use tracer::JoltDevice; + +/// A self-contained test case wrapping a compiled guest program. +/// +/// `TestCase` stores the raw ELF bytes and memory configuration so it can +/// reconstruct a `GuestProgram` on demand without requiring `Clone` on the +/// program itself. +pub struct TestCase { + pub elf_contents: Vec, + pub memory_config: common::jolt_device::MemoryConfig, + pub max_trace_length: usize, +} + +impl TestCase { + pub fn new(program: GuestProgram, max_trace_length: usize) -> Self { + Self { + elf_contents: program.elf_contents, + memory_config: program.memory_config, + max_trace_length, + } + } + + pub fn make_program(&self) -> GuestProgram { + GuestProgram::new(&self.elf_contents, &self.memory_config) + } + + /// Create prover preprocessing for this test case. + pub fn prover_preprocessing(&self) -> ProverPreprocessing { + let program = self.make_program(); + jolt_core::guest::prover::preprocess(&program, self.max_trace_length) + .expect("prover preprocessing failed") + } + + /// Create verifier preprocessing from prover preprocessing. + pub fn verifier_preprocessing(prover_pp: &ProverPreprocessing) -> VerifierPreprocessing { + VerifierPreprocessing::from(prover_pp) + } + + /// Prove execution of this program with the given inputs. + /// Returns (proof, io_device). + pub fn prove(&self, prover_pp: &ProverPreprocessing, inputs: &[u8]) -> (Proof, JoltDevice) { + let program = self.make_program(); + let mut output_bytes = vec![0u8; self.memory_config.max_output_size as usize]; + let (proof, io_device, _debug) = jolt_core::guest::prover::prove::( + &program, + inputs, + &[], + &[], + None, + None, + &mut output_bytes, + prover_pp, + ); + (proof, io_device) + } + + /// Verify a proof against the given preprocessing and I/O. + pub fn verify( + verifier_pp: &VerifierPreprocessing, + proof: Proof, + io_device: &JoltDevice, + ) -> Result<(), ProofVerifyError> { + jolt_core::guest::verifier::verify::( + &io_device.inputs, + None, + &io_device.outputs, + proof, + verifier_pp, + ) + } +} + +/// Serialize a proof to bytes. +pub fn serialize_proof(proof: &Proof) -> Vec { + let mut buf = Vec::new(); + proof + .serialize_compressed(&mut buf) + .expect("proof serialization failed"); + buf +} + +/// Deserialize a proof from bytes. +pub fn deserialize_proof(bytes: &[u8]) -> Result { + Proof::deserialize_compressed(bytes) +} + +/// Run all registered invariants, returning results keyed by name. +pub fn check_all_invariants( + invariants: &[Box], + num_random: usize, +) -> HashMap>> { + invariants + .iter() + .map(|inv| { + let name = inv.name().to_string(); + let results = inv.run_checks(num_random); + (name, results) + }) + .collect() +} + +/// Measure all provided objectives, returning results keyed by name. +pub fn measure_all_objectives( + objectives: &[Objective], +) -> HashMap> { + objectives + .iter() + .map(|obj| { + let name = obj.name().to_string(); + let result = obj.collect_measurement(); + (name, result) + }) + .collect() +} + +/// Shared setup that can be reused across multiple invariants/objectives +/// operating on the same program. +pub struct SharedSetup { + pub test_case: Arc, + pub prover_preprocessing: Arc, + pub verifier_preprocessing: Arc, +} + +impl SharedSetup { + pub fn new(test_case: TestCase) -> Self { + let prover_pp = test_case.prover_preprocessing(); + let verifier_pp = TestCase::verifier_preprocessing(&prover_pp); + Self { + test_case: Arc::new(test_case), + prover_preprocessing: Arc::new(prover_pp), + verifier_preprocessing: Arc::new(verifier_pp), + } + } +} diff --git a/jolt-eval/src/objective/guest_cycles.rs b/jolt-eval/src/objective/guest_cycles.rs new file mode 100644 index 000000000..40ff488a3 --- /dev/null +++ b/jolt-eval/src/objective/guest_cycles.rs @@ -0,0 +1,36 @@ +use std::sync::Arc; + +use super::{AbstractObjective, Direction, MeasurementError}; +use crate::TestCase; + +/// Measures guest instruction cycle count via program tracing. +pub struct GuestCycleCountObjective { + pub test_case: Arc, + pub inputs: Vec, +} + +impl GuestCycleCountObjective { + pub fn new(test_case: Arc, inputs: Vec) -> Self { + Self { test_case, inputs } + } +} + +impl AbstractObjective for GuestCycleCountObjective { + fn name(&self) -> &str { + "guest_cycle_count" + } + + fn collect_measurement(&self) -> Result { + let program = self.test_case.make_program(); + let (_lazy_trace, trace, _memory, _io) = program.trace(&self.inputs, &[], &[]); + Ok(trace.len() as f64) + } + + fn recommended_samples(&self) -> usize { + 1 + } + + fn direction(&self) -> Direction { + Direction::Minimize + } +} diff --git a/jolt-eval/src/objective/inline_lengths.rs b/jolt-eval/src/objective/inline_lengths.rs new file mode 100644 index 000000000..54136a3b1 --- /dev/null +++ b/jolt-eval/src/objective/inline_lengths.rs @@ -0,0 +1,47 @@ +use std::sync::Arc; + +use super::{AbstractObjective, Direction, MeasurementError}; +use crate::TestCase; + +/// Measures total virtual/inline sequence length in the decoded bytecode. +/// +/// Inline sequences replace guest-side computation with constraint-native +/// implementations, so their total length reflects how much of the program +/// is handled by optimized inline instructions. +pub struct InlineLengthsObjective { + pub test_case: Arc, +} + +impl InlineLengthsObjective { + pub fn new(test_case: Arc) -> Self { + Self { test_case } + } +} + +impl AbstractObjective for InlineLengthsObjective { + fn name(&self) -> &str { + "inline_lengths" + } + + fn collect_measurement(&self) -> Result { + let program = self.test_case.make_program(); + let (instructions, _memory_init, _program_size, _entry) = program.decode(); + + // Count INLINE instructions (optimized constraint-native operations) + let total_inline_length: usize = instructions + .iter() + .filter(|instr| matches!(instr, tracer::instruction::Instruction::INLINE(_))) + .count(); + + Ok(total_inline_length as f64) + } + + fn recommended_samples(&self) -> usize { + 1 + } + + fn direction(&self) -> Direction { + // More inlines generally means more efficient execution + Direction::Maximize + } +} diff --git a/jolt-eval/src/objective/mod.rs b/jolt-eval/src/objective/mod.rs new file mode 100644 index 000000000..9df907510 --- /dev/null +++ b/jolt-eval/src/objective/mod.rs @@ -0,0 +1,120 @@ +pub mod guest_cycles; +pub mod inline_lengths; +pub mod optimize; +pub mod peak_rss; +pub mod proof_size; +pub mod prover_time; +pub mod verifier_time; +pub mod wrapping_cost; + +use std::collections::HashMap; +use std::fmt; + +/// Whether lower or higher values are better. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Direction { + Minimize, + Maximize, +} + +/// Error during objective measurement. +#[derive(Debug, Clone)] +pub struct MeasurementError { + pub message: String, +} + +impl fmt::Display for MeasurementError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.message) + } +} + +impl std::error::Error for MeasurementError {} + +impl MeasurementError { + pub fn new(message: impl Into) -> Self { + Self { + message: message.into(), + } + } +} + +/// Core objective trait for measurable properties. +pub trait AbstractObjective: Send + Sync { + fn name(&self) -> &str; + + /// Take a single measurement and return its scalar value. + fn collect_measurement(&self) -> Result; + + /// How many samples to take for statistical significance. + fn recommended_samples(&self) -> usize { + 1 + } + + /// What threshold is considered a regression (e.g. 0.05 = 5% slowdown). + fn regression_threshold(&self) -> Option { + None + } + + fn direction(&self) -> Direction; +} + +/// Centralized objective enum dispatching to concrete implementations. +pub enum Objective { + PeakRss(peak_rss::PeakRssObjective), + ProverTime(prover_time::ProverTimeObjective), + ProofSize(proof_size::ProofSizeObjective), + VerifierTime(verifier_time::VerifierTimeObjective), + GuestCycleCount(guest_cycles::GuestCycleCountObjective), + InlineLengths(inline_lengths::InlineLengthsObjective), + WrappingCost(wrapping_cost::WrappingCostObjective), +} + +impl Objective { + pub fn name(&self) -> &str { + match self { + Self::PeakRss(o) => o.name(), + Self::ProverTime(o) => o.name(), + Self::ProofSize(o) => o.name(), + Self::VerifierTime(o) => o.name(), + Self::GuestCycleCount(o) => o.name(), + Self::InlineLengths(o) => o.name(), + Self::WrappingCost(o) => o.name(), + } + } + + pub fn collect_measurement(&self) -> Result { + match self { + Self::PeakRss(o) => o.collect_measurement(), + Self::ProverTime(o) => o.collect_measurement(), + Self::ProofSize(o) => o.collect_measurement(), + Self::VerifierTime(o) => o.collect_measurement(), + Self::GuestCycleCount(o) => o.collect_measurement(), + Self::InlineLengths(o) => o.collect_measurement(), + Self::WrappingCost(o) => o.collect_measurement(), + } + } + + pub fn direction(&self) -> Direction { + match self { + Self::PeakRss(o) => o.direction(), + Self::ProverTime(o) => o.direction(), + Self::ProofSize(o) => o.direction(), + Self::VerifierTime(o) => o.direction(), + Self::GuestCycleCount(o) => o.direction(), + Self::InlineLengths(o) => o.direction(), + Self::WrappingCost(o) => o.direction(), + } + } +} + +/// Measure all objectives and return a map of name -> value. +pub fn measure_objectives(objectives: &[Objective]) -> HashMap { + objectives + .iter() + .filter_map(|obj| { + let name = obj.name().to_string(); + obj.collect_measurement().ok().map(|v| (name, v)) + }) + .collect() +} diff --git a/jolt-eval/src/objective/optimize.rs b/jolt-eval/src/objective/optimize.rs new file mode 100644 index 000000000..d58f43617 --- /dev/null +++ b/jolt-eval/src/objective/optimize.rs @@ -0,0 +1,70 @@ +use std::collections::HashMap; + +use super::{measure_objectives, Objective}; +use crate::invariant::DynInvariant; + +/// Record of an optimization attempt. +pub struct OptimizationAttempt { + pub description: String, + pub diff: String, + pub measurements: HashMap, + pub invariants_passed: bool, +} + +/// Run an AI-driven optimization loop. +/// +/// The objective function maps measured values to a single scalar score. +/// Each iteration: +/// 1. Measures all objectives +/// 2. Checks that invariants still hold +/// 3. If the score improved and invariants pass, commits the change +/// 4. Otherwise reverts +/// +/// This function provides the measurement and comparison infrastructure. +/// The actual AI interaction (telling Claude to optimize) is handled by +/// the caller. +pub fn auto_optimize( + objectives: &[Objective], + invariants: &[Box], + objective_function: F, + num_iterations: usize, + mut on_iteration: impl FnMut(usize, f64, &HashMap) -> Option, +) -> Vec +where + F: Fn(&HashMap) -> f64, +{ + let baseline_measurements = measure_objectives(objectives); + let mut baseline_score = objective_function(&baseline_measurements); + let mut attempts = Vec::new(); + + for i in 0..num_iterations { + // Let the caller drive the optimization (e.g. invoke Claude) + let diff = match on_iteration(i, baseline_score, &baseline_measurements) { + Some(d) => d, + None => break, + }; + + let new_measurements = measure_objectives(objectives); + let new_score = objective_function(&new_measurements); + + // Check that all invariants still hold + let invariants_passed = invariants + .iter() + .all(|inv| inv.run_checks(0).iter().all(|r| r.is_ok())); + + let attempt = OptimizationAttempt { + description: format!("iteration {i}"), + diff, + measurements: new_measurements, + invariants_passed, + }; + + if invariants_passed && new_score > baseline_score { + baseline_score = new_score; + } + + attempts.push(attempt); + } + + attempts +} diff --git a/jolt-eval/src/objective/peak_rss.rs b/jolt-eval/src/objective/peak_rss.rs new file mode 100644 index 000000000..ca49f924a --- /dev/null +++ b/jolt-eval/src/objective/peak_rss.rs @@ -0,0 +1,67 @@ +use std::sync::Arc; + +use sysinfo::{Pid, System}; + +use super::{AbstractObjective, Direction, MeasurementError}; +use crate::{ProverPreprocessing, TestCase}; + +/// Measures peak resident set size (RSS) during proving. +/// +/// Uses the `sysinfo` crate to sample memory before and after proving. +/// For more accurate results, run in an isolated process. +pub struct PeakRssObjective { + pub test_case: Arc, + pub prover_preprocessing: Arc, + pub inputs: Vec, +} + +impl PeakRssObjective { + pub fn new( + test_case: Arc, + prover_preprocessing: Arc, + inputs: Vec, + ) -> Self { + Self { + test_case, + prover_preprocessing, + inputs, + } + } +} + +impl AbstractObjective for PeakRssObjective { + fn name(&self) -> &str { + "peak_rss" + } + + fn collect_measurement(&self) -> Result { + let pid = Pid::from_u32(std::process::id()); + let mut sys = System::new(); + + sys.refresh_processes(sysinfo::ProcessesToUpdate::Some(&[pid]), true); + let rss_before = sys.process(pid).map(|p| p.memory()).unwrap_or(0); + + let (_proof, _io) = self + .test_case + .prove(&self.prover_preprocessing, &self.inputs); + + sys.refresh_processes(sysinfo::ProcessesToUpdate::Some(&[pid]), true); + let rss_after = sys.process(pid).map(|p| p.memory()).unwrap_or(0); + + // Report peak RSS in megabytes + let peak_mb = rss_after.max(rss_before) as f64 / (1024.0 * 1024.0); + Ok(peak_mb) + } + + fn recommended_samples(&self) -> usize { + 1 + } + + fn regression_threshold(&self) -> Option { + Some(0.10) + } + + fn direction(&self) -> Direction { + Direction::Minimize + } +} diff --git a/jolt-eval/src/objective/proof_size.rs b/jolt-eval/src/objective/proof_size.rs new file mode 100644 index 000000000..29211c2db --- /dev/null +++ b/jolt-eval/src/objective/proof_size.rs @@ -0,0 +1,47 @@ +use std::sync::Arc; + +use super::{AbstractObjective, Direction, MeasurementError}; +use crate::{serialize_proof, ProverPreprocessing, TestCase}; + +/// Measures serialized proof size in bytes. +pub struct ProofSizeObjective { + pub test_case: Arc, + pub prover_preprocessing: Arc, + pub inputs: Vec, +} + +impl ProofSizeObjective { + pub fn new( + test_case: Arc, + prover_preprocessing: Arc, + inputs: Vec, + ) -> Self { + Self { + test_case, + prover_preprocessing, + inputs, + } + } +} + +impl AbstractObjective for ProofSizeObjective { + fn name(&self) -> &str { + "proof_size" + } + + fn collect_measurement(&self) -> Result { + let (proof, _io) = self + .test_case + .prove(&self.prover_preprocessing, &self.inputs); + let bytes = serialize_proof(&proof); + Ok(bytes.len() as f64) + } + + fn recommended_samples(&self) -> usize { + 1 + } + + fn direction(&self) -> Direction { + Direction::Minimize + } +} diff --git a/jolt-eval/src/objective/prover_time.rs b/jolt-eval/src/objective/prover_time.rs new file mode 100644 index 000000000..7b839f576 --- /dev/null +++ b/jolt-eval/src/objective/prover_time.rs @@ -0,0 +1,52 @@ +use std::sync::Arc; +use std::time::Instant; + +use super::{AbstractObjective, Direction, MeasurementError}; +use crate::{ProverPreprocessing, TestCase}; + +/// Measures wall-clock prover time in seconds. +pub struct ProverTimeObjective { + pub test_case: Arc, + pub prover_preprocessing: Arc, + pub inputs: Vec, +} + +impl ProverTimeObjective { + pub fn new( + test_case: Arc, + prover_preprocessing: Arc, + inputs: Vec, + ) -> Self { + Self { + test_case, + prover_preprocessing, + inputs, + } + } +} + +impl AbstractObjective for ProverTimeObjective { + fn name(&self) -> &str { + "prover_time" + } + + fn collect_measurement(&self) -> Result { + let start = Instant::now(); + let (_proof, _io) = self + .test_case + .prove(&self.prover_preprocessing, &self.inputs); + Ok(start.elapsed().as_secs_f64()) + } + + fn recommended_samples(&self) -> usize { + 3 + } + + fn regression_threshold(&self) -> Option { + Some(0.05) + } + + fn direction(&self) -> Direction { + Direction::Minimize + } +} diff --git a/jolt-eval/src/objective/verifier_time.rs b/jolt-eval/src/objective/verifier_time.rs new file mode 100644 index 000000000..1223f95a9 --- /dev/null +++ b/jolt-eval/src/objective/verifier_time.rs @@ -0,0 +1,58 @@ +use std::sync::Arc; +use std::time::Instant; + +use super::{AbstractObjective, Direction, MeasurementError}; +use crate::{ProverPreprocessing, TestCase, VerifierPreprocessing}; + +/// Measures wall-clock verifier time in seconds. +pub struct VerifierTimeObjective { + pub test_case: Arc, + pub prover_preprocessing: Arc, + pub verifier_preprocessing: Arc, + pub inputs: Vec, +} + +impl VerifierTimeObjective { + pub fn new( + test_case: Arc, + prover_preprocessing: Arc, + verifier_preprocessing: Arc, + inputs: Vec, + ) -> Self { + Self { + test_case, + prover_preprocessing, + verifier_preprocessing, + inputs, + } + } +} + +impl AbstractObjective for VerifierTimeObjective { + fn name(&self) -> &str { + "verifier_time" + } + + fn collect_measurement(&self) -> Result { + let (proof, io_device) = self + .test_case + .prove(&self.prover_preprocessing, &self.inputs); + + let start = Instant::now(); + TestCase::verify(&self.verifier_preprocessing, proof, &io_device) + .map_err(|e| MeasurementError::new(format!("Verification failed: {e}")))?; + Ok(start.elapsed().as_secs_f64()) + } + + fn recommended_samples(&self) -> usize { + 5 + } + + fn regression_threshold(&self) -> Option { + Some(0.05) + } + + fn direction(&self) -> Direction { + Direction::Minimize + } +} diff --git a/jolt-eval/src/objective/wrapping_cost.rs b/jolt-eval/src/objective/wrapping_cost.rs new file mode 100644 index 000000000..dfcddb924 --- /dev/null +++ b/jolt-eval/src/objective/wrapping_cost.rs @@ -0,0 +1,43 @@ +use std::sync::Arc; + +use super::{AbstractObjective, Direction, MeasurementError}; +use crate::{ProverPreprocessing, TestCase}; + +/// Measures the "wrapping cost" as the total number of constraints in the R1CS. +/// +/// This is derived from the preprocessing data which encodes the constraint +/// structure. Lower constraint counts mean cheaper verification. +pub struct WrappingCostObjective { + pub test_case: Arc, + pub prover_preprocessing: Arc, +} + +impl WrappingCostObjective { + pub fn new(test_case: Arc, prover_preprocessing: Arc) -> Self { + Self { + test_case, + prover_preprocessing, + } + } +} + +impl AbstractObjective for WrappingCostObjective { + fn name(&self) -> &str { + "wrapping_cost" + } + + fn collect_measurement(&self) -> Result { + // The padded trace length from preprocessing reflects the constraint + // system size, which is the dominant factor in wrapping cost. + let max_padded = self.prover_preprocessing.shared.max_padded_trace_length; + Ok(max_padded as f64) + } + + fn recommended_samples(&self) -> usize { + 1 + } + + fn direction(&self) -> Direction { + Direction::Minimize + } +} diff --git a/jolt-eval/tests/integration.rs b/jolt-eval/tests/integration.rs new file mode 100644 index 000000000..0f7c4d431 --- /dev/null +++ b/jolt-eval/tests/integration.rs @@ -0,0 +1,140 @@ +use jolt_eval::invariant::synthesis::SynthesisRegistry; +use jolt_eval::invariant::{DynInvariant, InvariantReport, InvariantViolation, SynthesisTarget}; +use jolt_eval::objective::{AbstractObjective, Direction, MeasurementError}; + +/// A trivial invariant for testing the framework itself. +struct TrivialInvariant; + +impl jolt_eval::Invariant for TrivialInvariant { + type Setup = (); + type Input = u8; + + fn name(&self) -> &str { + "trivial" + } + + fn description(&self) -> String { + "Always passes".to_string() + } + + fn targets(&self) -> enumset::EnumSet { + SynthesisTarget::Test.into() + } + + fn setup(&self) -> Self::Setup {} + + fn check(&self, _setup: &Self::Setup, _input: u8) -> Result<(), InvariantViolation> { + Ok(()) + } + + fn seed_corpus(&self) -> Vec { + vec![0, 1, 255] + } +} + +/// An invariant that always fails, for testing violation reporting. +struct FailingInvariant; + +impl jolt_eval::Invariant for FailingInvariant { + type Setup = (); + type Input = u8; + + fn name(&self) -> &str { + "failing" + } + + fn description(&self) -> String { + "Always fails".to_string() + } + + fn targets(&self) -> enumset::EnumSet { + SynthesisTarget::Test.into() + } + + fn setup(&self) -> Self::Setup {} + + fn check(&self, _setup: &Self::Setup, input: u8) -> Result<(), InvariantViolation> { + Err(InvariantViolation::new(format!("failed for input {input}"))) + } + + fn seed_corpus(&self) -> Vec { + vec![42] + } +} + +/// A trivial objective for testing the framework. +struct ConstantObjective { + value: f64, +} + +impl AbstractObjective for ConstantObjective { + fn name(&self) -> &str { + "constant" + } + + fn collect_measurement(&self) -> Result { + Ok(self.value) + } + + fn direction(&self) -> Direction { + Direction::Minimize + } +} + +#[test] +fn test_trivial_invariant_passes() { + let inv = TrivialInvariant; + let results = inv.run_checks(5); + // 3 seed corpus + 5 random + assert!(results.len() >= 3); + assert!(results.iter().all(|r| r.is_ok())); +} + +#[test] +fn test_failing_invariant_reports_violations() { + let inv = FailingInvariant; + let results = inv.run_checks(0); + assert_eq!(results.len(), 1); // 1 seed corpus item + assert!(results[0].is_err()); +} + +#[test] +fn test_invariant_report() { + let results: Vec> = + vec![Ok(()), Ok(()), Err(InvariantViolation::new("bad"))]; + let report = InvariantReport::from_results("test", &results); + assert_eq!(report.total, 3); + assert_eq!(report.passed, 2); + assert_eq!(report.failed, 1); + assert_eq!(report.violations.len(), 1); +} + +#[test] +fn test_synthesis_registry() { + let mut registry = SynthesisRegistry::new(); + registry.register(Box::new(TrivialInvariant)); + registry.register(Box::new(FailingInvariant)); + + assert_eq!(registry.invariants().len(), 2); + assert_eq!(registry.for_target(SynthesisTarget::Test).len(), 2); + assert_eq!(registry.for_target(SynthesisTarget::Fuzz).len(), 0); +} + +#[test] +fn test_constant_objective() { + let obj = ConstantObjective { value: 42.0 }; + assert_eq!(obj.name(), "constant"); + assert_eq!(obj.collect_measurement().unwrap(), 42.0); + assert_eq!(obj.direction(), Direction::Minimize); +} + +#[test] +fn test_measure_objectives() { + use jolt_eval::objective::measure_objectives; + + // measure_objectives takes &[Objective], which uses the enum. + // For unit testing we just verify the function signature works + // with an empty slice. + let results = measure_objectives(&[]); + assert!(results.is_empty()); +} diff --git a/jolt-eval/tests/macro_test.rs b/jolt-eval/tests/macro_test.rs new file mode 100644 index 000000000..66e67d384 --- /dev/null +++ b/jolt-eval/tests/macro_test.rs @@ -0,0 +1,176 @@ +use enumset::EnumSet; +use jolt_eval::invariant::{Invariant, InvariantViolation, SynthesisTarget}; + +// --------------------------------------------------------------------------- +// AlwaysPass: exercises all three synthesis targets +// --------------------------------------------------------------------------- + +#[jolt_eval_macros::invariant(targets = [Test, Fuzz, RedTeam])] +#[derive(Default)] +pub struct AlwaysPassInvariant; + +impl Invariant for AlwaysPassInvariant { + type Setup = (); + type Input = u8; + + fn name(&self) -> &str { + "always_pass" + } + fn description(&self) -> String { + "Trivial invariant that always passes — used to test macro synthesis.".to_string() + } + fn targets(&self) -> EnumSet { + SynthesisTarget::Test | SynthesisTarget::Fuzz | SynthesisTarget::RedTeam + } + fn setup(&self) -> Self::Setup {} + fn check(&self, _: &(), _input: u8) -> Result<(), InvariantViolation> { + Ok(()) + } + fn seed_corpus(&self) -> Vec { + vec![0, 1, 42, 128, 255] + } +} + +// --------------------------------------------------------------------------- +// BoundsCheck: Test + Fuzz only, uses a struct Input type +// --------------------------------------------------------------------------- + +#[derive(Debug, Clone, jolt_eval::arbitrary::Arbitrary)] +pub struct RangeInput { + pub lo: u32, + pub hi: u32, +} + +#[jolt_eval_macros::invariant(targets = [Test, Fuzz])] +#[derive(Default)] +pub struct BoundsCheckInvariant; + +impl Invariant for BoundsCheckInvariant { + type Setup = (); + type Input = RangeInput; + + fn name(&self) -> &str { + "bounds_check" + } + fn description(&self) -> String { + "Checks that max(lo,hi) >= min(lo,hi).".to_string() + } + fn targets(&self) -> EnumSet { + SynthesisTarget::Test | SynthesisTarget::Fuzz + } + fn setup(&self) -> Self::Setup {} + fn check(&self, _: &(), input: RangeInput) -> Result<(), InvariantViolation> { + let lo = input.lo.min(input.hi); + let hi = input.lo.max(input.hi); + if hi >= lo { + Ok(()) + } else { + Err(InvariantViolation::new("max < min — impossible")) + } + } + fn seed_corpus(&self) -> Vec { + vec![ + RangeInput { lo: 0, hi: 0 }, + RangeInput { lo: 0, hi: u32::MAX }, + RangeInput { lo: u32::MAX, hi: 0 }, + RangeInput { lo: 100, hi: 50 }, + ] + } +} + +// --------------------------------------------------------------------------- +// RedTeamOnly: only the RedTeam target +// --------------------------------------------------------------------------- + +#[jolt_eval_macros::invariant(targets = [RedTeam])] +#[derive(Default)] +pub struct RedTeamOnlyInvariant; + +impl Invariant for RedTeamOnlyInvariant { + type Setup = String; + type Input = u16; + + fn name(&self) -> &str { + "redteam_only" + } + fn description(&self) -> String { + "An invariant that only generates a red-team description.".to_string() + } + fn targets(&self) -> EnumSet { + SynthesisTarget::RedTeam.into() + } + fn setup(&self) -> String { + "setup_value".to_string() + } + fn check(&self, setup: &String, _input: u16) -> Result<(), InvariantViolation> { + if setup.is_empty() { + Err(InvariantViolation::new("empty setup")) + } else { + Ok(()) + } + } + fn seed_corpus(&self) -> Vec { + vec![0, 1000, u16::MAX] + } +} + +// =========================================================================== +// Tests that verify the macro-generated functions exist and work correctly +// =========================================================================== + +// --- Fuzz harness functions --- + +#[test] +fn fuzz_always_pass_with_various_inputs() { + // Generated by #[invariant(targets = [... Fuzz ...])] + always_pass_invariant_fuzz_check(&[]); + always_pass_invariant_fuzz_check(&[0]); + always_pass_invariant_fuzz_check(&[1, 2, 3, 4, 5]); + always_pass_invariant_fuzz_check(&[255; 100]); +} + +#[test] +fn fuzz_bounds_check_with_various_inputs() { + // Needs at least 8 bytes for two u32s + bounds_check_invariant_fuzz_check(&[0u8; 8]); + bounds_check_invariant_fuzz_check(&[0xFF; 8]); + bounds_check_invariant_fuzz_check(&[1, 0, 0, 0, 2, 0, 0, 0]); +} + +// --- Red-team description functions --- + +#[test] +fn redteam_always_pass_description() { + let desc = always_pass_invariant_redteam_description(); + assert!( + desc.contains("always passes"), + "Expected description to mention 'always passes', got: {desc}" + ); +} + +#[test] +fn redteam_only_description() { + let desc = red_team_only_invariant_redteam_description(); + assert!( + desc.contains("red-team description"), + "Expected description to mention 'red-team description', got: {desc}" + ); +} + +// --- Verify that no fuzz/redteam functions are generated for wrong targets --- +// (These are compile-time checks — if the functions existed, we'd get +// ambiguity or the test would compile when it shouldn't.) + +#[test] +fn redteam_only_has_no_fuzz() { + // RedTeamOnlyInvariant was declared with targets = [RedTeam], + // so `red_team_only_invariant_fuzz_check` should NOT exist. + // We can't assert "function doesn't exist" at runtime, but if this + // file compiles without calling it, the macro correctly omitted it. +} + +// --- Synthesized test modules are auto-discovered by nextest --- +// The #[test] functions `seed_corpus` and `random_inputs` inside the +// generated `*_synthesized` modules are run automatically. We verify +// their presence indirectly: if `cargo nextest run` reports them, the +// macro is working. From ea99ac884d136f3ad4fb8a57c0ae3b89d10a6cd7 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Tue, 31 Mar 2026 18:42:28 -0400 Subject: [PATCH 02/86] docs: add README for jolt-eval crate Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/README.md | 175 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100644 jolt-eval/README.md diff --git a/jolt-eval/README.md b/jolt-eval/README.md new file mode 100644 index 000000000..c88d8770b --- /dev/null +++ b/jolt-eval/README.md @@ -0,0 +1,175 @@ +# jolt-eval + +Mechanically checkable **invariants** and **objectives** for the Jolt zkVM. + +The motivation is twofold: +1. **Maximize agent productivity** -- give AI agents a way to check their work without a human in the loop. +2. **Minimize the human verification surface** -- humans gain assurance about the larger codebase while only focusing on a smaller kernel of invariants and objectives. + +## Concepts + +**Invariants** are evaluations with a binary outcome -- things that must always hold. From a single invariant description (a small amount of Rust), the framework can synthesize: +- A `#[test]` +- A `libfuzzer_sys` fuzz target +- A "red team" harness for AI agents to try to find a violation + +**Objectives** are evaluations with a numerical outcome -- things we want to optimize. They serve as building blocks for AI-driven optimization loops. + +## Built-in invariants + +| Invariant | Description | +|---|---| +| **Soundness** | Mutated proofs must be rejected by the verifier | +| **Verifier completeness** | Honest proofs must be accepted by the verifier | +| **Prover completeness** | The prover must not panic on valid inputs | +| **Determinism** | Same program + input produces byte-identical proofs | +| **Serialization roundtrip** | `deserialize(serialize(proof)) == proof` | +| **ZK consistency** | Prove + verify succeeds in the current compilation mode (run with both `--features host` and `--features host,zk`) | + +## Built-in objectives + +| Objective | Direction | Description | +|---|---|---| +| `peak_rss` | Minimize | Peak resident set size during proving (MB) | +| `prover_time` | Minimize | Wall-clock prover time (seconds) | +| `proof_size` | Minimize | Serialized proof byte length | +| `verifier_time` | Minimize | Wall-clock verifier time (seconds) | +| `guest_cycle_count` | Minimize | Guest instruction cycle count | +| `inline_lengths` | Maximize | Count of optimized inline instructions | +| `wrapping_cost` | Minimize | Constraint system size (padded trace length) | + +## Usage + +### Defining an invariant + +```rust +use jolt_eval::{Invariant, InvariantViolation, SynthesisTarget}; +use enumset::EnumSet; + +#[jolt_eval_macros::invariant(targets = [Test, Fuzz, RedTeam])] +#[derive(Default)] +pub struct MyInvariant; + +impl Invariant for MyInvariant { + type Setup = (); + type Input = u64; + + fn name(&self) -> &str { "my_invariant" } + fn description(&self) -> String { + "Human-readable description, also used as context for AI red-teaming.".into() + } + fn targets(&self) -> EnumSet { + SynthesisTarget::Test | SynthesisTarget::Fuzz | SynthesisTarget::RedTeam + } + fn setup(&self) -> Self::Setup {} + fn check(&self, _setup: &(), input: u64) -> Result<(), InvariantViolation> { + // ... check the invariant ... + Ok(()) + } + fn seed_corpus(&self) -> Vec { + vec![0, 1, u64::MAX] + } +} +``` + +The `#[invariant]` macro generates: +- `#[test] fn seed_corpus()` -- runs every seed input through `check` +- `#[test] fn random_inputs()` -- runs 10 randomly-generated `Arbitrary` inputs +- `my_invariant_fuzz_check(data: &[u8])` -- call from a `fuzz_target!` body +- `my_invariant_redteam_description() -> String` -- for the red-team harness + +### Running invariants with the CLI + +```bash +# Check all invariants against a compiled guest ELF +cargo run --bin check-invariants -- --elf path/to/guest.elf + +# Check a specific invariant with more random inputs +cargo run --bin check-invariants -- --elf path/to/guest.elf \ + --invariant soundness --num-random 100 +``` + +### Measuring objectives + +```bash +# Measure all objectives +cargo run --bin measure-objectives -- --elf path/to/guest.elf + +# Measure a specific objective with multiple samples +cargo run --bin measure-objectives -- --elf path/to/guest.elf \ + --objective prover_time --samples 5 +``` + +### AI red-teaming + +```bash +cargo run --bin redteam -- --elf path/to/guest.elf \ + --invariant soundness --iterations 10 --model claude-sonnet-4-20250514 +``` + +The red-team harness runs the AI agent in an isolated git worktree. The invariant is checked in the original working tree so the agent cannot cheat. + +### Programmatic API + +```rust +use std::sync::Arc; +use jolt_eval::{TestCase, SharedSetup, check_all_invariants}; +use jolt_eval::invariant::soundness::SoundnessInvariant; + +// Create a test case from a compiled guest program +let test_case = Arc::new(TestCase { elf_contents, memory_config, max_trace_length: 65536 }); + +// Run a specific invariant +let inv = SoundnessInvariant::new(Arc::clone(&test_case), default_inputs); +let results = inv.run_checks(/* num_random */ 10); + +// Or measure objectives +use jolt_eval::objective::{Objective, prover_time::ProverTimeObjective}; +let setup = SharedSetup::new(test_case); +let obj = ProverTimeObjective::new(setup.test_case, setup.prover_preprocessing, inputs); +let seconds = obj.collect_measurement().unwrap(); +``` + +## Framing tasks in terms of invariants and objectives + +| Task | Invariants | Objectives | +|---|---|---| +| **New feature** | Add new invariants capturing the feature's behavior; modify existing invariants as necessary | Document expected impact; mechanically validate | +| **Bug fix** | Add/modify invariant to fail without the fix; verify all others still hold | Document impact | +| **Security review** | Try to find a counterexample to some invariant (via red-team) | -- | +| **Optimization** | Ensure all invariants still hold | Maximize an objective function $f(o_1, \ldots, o_n)$ | +| **Refactor** | Ensure all invariants still hold | Special case of optimization where the objective captures code quality | + +## Crate structure + +``` +jolt-eval/ + src/ + lib.rs # Type aliases, TestCase, top-level check/measure fns + invariant/ + mod.rs # Invariant trait, DynInvariant, InvariantReport + soundness.rs # Proof mutation fuzzing + completeness_verifier.rs # Honest proof acceptance + completeness_prover.rs # Prover panic detection + determinism.rs # Byte-identical proof comparison + serialization_roundtrip.rs # Serialize/deserialize equality + zk_consistency.rs # ZK mode prove+verify + synthesis/ + mod.rs # SynthesisRegistry + test.rs # #[test] generation + fuzz.rs # libfuzzer_sys target generation + redteam.rs # AI red-team loop with worktree isolation + objective/ + mod.rs # AbstractObjective trait, Objective enum + peak_rss.rs # Peak RSS via sysinfo + prover_time.rs # Wall-clock prover time + proof_size.rs # Serialized proof size + verifier_time.rs # Wall-clock verifier time + guest_cycles.rs # Guest cycle count via tracing + inline_lengths.rs # INLINE instruction count + wrapping_cost.rs # Constraint system size + optimize.rs # auto_optimize loop + macros/ # #[invariant(targets = [...])] proc macro + bin/ # CLI binaries + tests/ # Framework smoke tests +``` From 6f64e3d738c5ba2051626680c8aa501ddb3e8a77 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Tue, 31 Mar 2026 18:45:41 -0400 Subject: [PATCH 03/86] feat(jolt-eval): add fuzz CLI binary for invariant fuzz testing Runs randomized fuzz inputs against all fuzzable invariants (or a specific one via --invariant). Supports --iterations, --duration time limits, --input-size, and --list to enumerate targets. Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/Cargo.toml | 4 + jolt-eval/bin/fuzz.rs | 262 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 266 insertions(+) create mode 100644 jolt-eval/bin/fuzz.rs diff --git a/jolt-eval/Cargo.toml b/jolt-eval/Cargo.toml index c94442528..5f4add60e 100644 --- a/jolt-eval/Cargo.toml +++ b/jolt-eval/Cargo.toml @@ -38,3 +38,7 @@ path = "bin/measure_objectives.rs" [[bin]] name = "redteam" path = "bin/redteam.rs" + +[[bin]] +name = "fuzz" +path = "bin/fuzz.rs" diff --git a/jolt-eval/bin/fuzz.rs b/jolt-eval/bin/fuzz.rs new file mode 100644 index 000000000..5ddf8fb6b --- /dev/null +++ b/jolt-eval/bin/fuzz.rs @@ -0,0 +1,262 @@ +use std::sync::Arc; +use std::time::{Duration, Instant}; + +use clap::Parser; +use jolt_eval::invariant::completeness_prover::ProverCompletenessInvariant; +use jolt_eval::invariant::completeness_verifier::VerifierCompletenessInvariant; +use jolt_eval::invariant::determinism::DeterminismInvariant; +use jolt_eval::invariant::serialization_roundtrip::SerializationRoundtripInvariant; +use jolt_eval::invariant::soundness::SoundnessInvariant; +use jolt_eval::invariant::synthesis::SynthesisRegistry; +use jolt_eval::invariant::zk_consistency::ZkConsistencyInvariant; +use jolt_eval::invariant::{DynInvariant, InvariantReport, SynthesisTarget}; +use jolt_eval::TestCase; + +#[derive(Parser)] +#[command(name = "fuzz")] +#[command(about = "Fuzz-test Jolt invariants with random inputs")] +struct Cli { + /// Only fuzz the named invariant (default: all fuzzable) + #[arg(long)] + invariant: Option, + + /// Total number of fuzz iterations (across all invariants) + #[arg(long, default_value = "1000")] + iterations: usize, + + /// Maximum wall-clock duration (e.g. "60s", "5m", "1h") + #[arg(long)] + duration: Option, + + /// Size of random byte buffer fed to Arbitrary (bytes) + #[arg(long, default_value = "4096")] + input_size: usize, + + /// Path to a pre-compiled guest ELF + #[arg(long)] + elf: Option, + + /// Max trace length for the test program + #[arg(long, default_value = "65536")] + max_trace_length: usize, + + /// List available fuzzable invariants and exit + #[arg(long)] + list: bool, +} + +fn main() -> eyre::Result<()> { + tracing_subscriber::fmt::init(); + let cli = Cli::parse(); + + let test_case = if let Some(elf_path) = &cli.elf { + let elf_bytes = std::fs::read(elf_path)?; + let memory_config = common::jolt_device::MemoryConfig { + max_input_size: 4096, + max_output_size: 4096, + max_untrusted_advice_size: 0, + max_trusted_advice_size: 0, + stack_size: 65536, + heap_size: 32768, + program_size: None, + }; + Arc::new(TestCase { + elf_contents: elf_bytes, + memory_config, + max_trace_length: cli.max_trace_length, + }) + } else if !cli.list { + eprintln!("Error: --elf is required. Provide a pre-compiled guest ELF."); + std::process::exit(1); + } else { + // --list doesn't need an ELF; use a dummy to populate names + print_available_invariants(); + return Ok(()); + }; + + let default_inputs = vec![]; + let mut registry = SynthesisRegistry::new(); + register_invariants(&mut registry, &test_case, &default_inputs); + + if cli.list { + for inv in registry.for_target(SynthesisTarget::Fuzz) { + println!(" {}", inv.name()); + } + return Ok(()); + } + + let fuzzable: Vec<&dyn DynInvariant> = if let Some(name) = &cli.invariant { + let matches: Vec<_> = registry + .for_target(SynthesisTarget::Fuzz) + .into_iter() + .filter(|inv| inv.name() == name.as_str()) + .collect(); + if matches.is_empty() { + eprintln!("Invariant '{name}' not found or not fuzzable."); + eprintln!("Run with --list to see available invariants."); + std::process::exit(1); + } + matches + } else { + registry.for_target(SynthesisTarget::Fuzz) + }; + + if fuzzable.is_empty() { + eprintln!("No fuzzable invariants registered."); + std::process::exit(1); + } + + let deadline = cli.duration.as_deref().map(|s| { + let dur = parse_duration(s).unwrap_or_else(|| { + eprintln!("Invalid duration '{s}'. Use e.g. 60s, 5m, 1h."); + std::process::exit(1); + }); + Instant::now() + dur + }); + + println!( + "Fuzzing {} invariant(s), {} iterations, input size {} bytes", + fuzzable.len(), + cli.iterations, + cli.input_size, + ); + if let Some(d) = &cli.duration { + println!("Time limit: {d}"); + } + println!(); + + let mut total_checks = 0usize; + let mut total_violations = 0usize; + let start = Instant::now(); + + for inv in &fuzzable { + println!(" {} — setting up...", inv.name()); + + // DynInvariant::run_checks handles setup internally, but for a fuzz + // loop we want to amortize setup across many iterations. Use run_checks + // in batches. + let per_invariant = cli.iterations / fuzzable.len(); + let mut checks = 0usize; + let mut violations = Vec::new(); + + // Run in batches so we can check the deadline between batches + let batch_size = per_invariant.min(100); + let mut remaining = per_invariant; + + while remaining > 0 { + if let Some(dl) = deadline { + if Instant::now() >= dl { + println!(" (time limit reached)"); + break; + } + } + + let n = remaining.min(batch_size); + let results = inv.run_checks(n); + for r in &results { + checks += 1; + if let Err(e) = r { + violations.push(e.to_string()); + } + } + remaining = remaining.saturating_sub(n); + } + + let report = InvariantReport { + name: inv.name().to_string(), + total: checks, + passed: checks - violations.len(), + failed: violations.len(), + violations: violations.clone(), + }; + print_report(&report); + + total_checks += checks; + total_violations += violations.len(); + } + + let elapsed = start.elapsed(); + println!(); + println!( + "Done: {} checks in {:.1}s, {} violations", + total_checks, + elapsed.as_secs_f64(), + total_violations, + ); + + if total_violations > 0 { + std::process::exit(1); + } + + Ok(()) +} + +fn register_invariants( + registry: &mut SynthesisRegistry, + test_case: &Arc, + default_inputs: &[u8], +) { + registry.register(Box::new(SoundnessInvariant::new( + Arc::clone(test_case), + default_inputs.to_vec(), + ))); + registry.register(Box::new(VerifierCompletenessInvariant::new(Arc::clone( + test_case, + )))); + registry.register(Box::new(ProverCompletenessInvariant::new(Arc::clone( + test_case, + )))); + registry.register(Box::new(DeterminismInvariant::new(Arc::clone(test_case)))); + registry.register(Box::new(SerializationRoundtripInvariant::new( + Arc::clone(test_case), + default_inputs.to_vec(), + ))); + registry.register(Box::new(ZkConsistencyInvariant::new(Arc::clone( + test_case, + )))); +} + +fn print_available_invariants() { + println!("Fuzzable invariants:"); + println!(" soundness"); + println!(" verifier_completeness"); + println!(" prover_completeness"); + println!(" determinism"); + println!(" serialization_roundtrip"); + println!(" zk_consistency"); +} + +fn print_report(report: &InvariantReport) { + if report.failed == 0 { + println!( + " {} — {}/{} passed", + report.name, report.passed, report.total + ); + } else { + println!( + " {} — FAILED {}/{} checks", + report.name, report.failed, report.total + ); + for (i, v) in report.violations.iter().enumerate().take(5) { + println!(" [{i}] {v}"); + } + if report.violations.len() > 5 { + println!(" ... and {} more", report.violations.len() - 5); + } + } +} + +fn parse_duration(s: &str) -> Option { + let s = s.trim(); + if let Some(n) = s.strip_suffix('s') { + n.parse::().ok().map(Duration::from_secs) + } else if let Some(n) = s.strip_suffix('m') { + n.parse::().ok().map(|m| Duration::from_secs(m * 60)) + } else if let Some(n) = s.strip_suffix('h') { + n.parse::() + .ok() + .map(|h| Duration::from_secs(h * 3600)) + } else { + s.parse::().ok().map(Duration::from_secs) + } +} From f389381b2b63e8b62113d1054559bdf317e406a9 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Tue, 31 Mar 2026 18:50:55 -0400 Subject: [PATCH 04/86] feat(jolt-eval): implement red-team CLI with Claude Code agent invocation Replaces the placeholder invoke_agent callback with a real implementation that: 1. Creates a detached git worktree for agent isolation 2. Builds a structured prompt with the invariant description, instructions, and any previous failed attempts 3. Invokes `claude -p` in the worktree with configurable model and max-turns 4. Captures the agent's analysis as the approach description 5. Cleans up the worktree after each iteration 6. Runs intensive fuzz checks (configurable via --num-fuzz) after each agent attempt to mechanically verify findings Also adds num_fuzz_per_iteration to RedTeamConfig so auto_redteam runs random fuzz inputs (not just seed corpus) after each attempt. Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/bin/redteam.rs | 269 +++++++++++++++++-- jolt-eval/src/invariant/synthesis/redteam.rs | 8 +- 2 files changed, 246 insertions(+), 31 deletions(-) diff --git a/jolt-eval/bin/redteam.rs b/jolt-eval/bin/redteam.rs index 99d45228e..c7591fa76 100644 --- a/jolt-eval/bin/redteam.rs +++ b/jolt-eval/bin/redteam.rs @@ -1,11 +1,20 @@ +use std::process::Command; use std::sync::Arc; use clap::Parser; use tracing::info; +use jolt_eval::invariant::completeness_prover::ProverCompletenessInvariant; +use jolt_eval::invariant::completeness_verifier::VerifierCompletenessInvariant; +use jolt_eval::invariant::determinism::DeterminismInvariant; +use jolt_eval::invariant::serialization_roundtrip::SerializationRoundtripInvariant; use jolt_eval::invariant::soundness::SoundnessInvariant; -use jolt_eval::invariant::synthesis::redteam::{auto_redteam, RedTeamConfig, RedTeamResult}; +use jolt_eval::invariant::synthesis::redteam::{ + auto_redteam, create_worktree, remove_worktree, RedTeamConfig, RedTeamResult, +}; use jolt_eval::invariant::synthesis::SynthesisRegistry; +use jolt_eval::invariant::zk_consistency::ZkConsistencyInvariant; +use jolt_eval::invariant::{FailedAttempt, SynthesisTarget}; use jolt_eval::TestCase; #[derive(Parser)] @@ -31,12 +40,35 @@ struct Cli { /// Max trace length for the test program #[arg(long, default_value = "65536")] max_trace_length: usize, + + /// Number of random fuzz inputs to run after each agent iteration + #[arg(long, default_value = "100")] + num_fuzz: usize, + + /// Maximum number of Claude agentic turns per iteration + #[arg(long, default_value = "30")] + max_turns: usize, + + /// List available red-teamable invariants and exit + #[arg(long)] + list: bool, } fn main() -> eyre::Result<()> { tracing_subscriber::fmt::init(); let cli = Cli::parse(); + if cli.list { + println!("Red-teamable invariants:"); + println!(" soundness"); + println!(" verifier_completeness"); + println!(" prover_completeness"); + println!(" determinism"); + println!(" serialization_roundtrip"); + println!(" zk_consistency"); + return Ok(()); + } + let elf_bytes = std::fs::read(&cli.elf)?; let memory_config = common::jolt_device::MemoryConfig { max_input_size: 4096, @@ -53,61 +85,59 @@ fn main() -> eyre::Result<()> { max_trace_length: cli.max_trace_length, }); + let default_inputs = vec![]; let mut registry = SynthesisRegistry::new(); - registry.register(Box::new(SoundnessInvariant::new( - Arc::clone(&test_case), - vec![], - ))); + register_invariants(&mut registry, &test_case, &default_inputs); let invariant = registry - .invariants() - .iter() - .find(|inv| inv.name() == cli.invariant.as_str()) - .map(|inv| inv.as_ref()); + .for_target(SynthesisTarget::RedTeam) + .into_iter() + .find(|inv| inv.name() == cli.invariant.as_str()); let Some(invariant) = invariant else { - eprintln!("Invariant '{}' not found.", cli.invariant); - eprintln!("Available: soundness"); + eprintln!("Invariant '{}' not found or not red-teamable.", cli.invariant); + eprintln!("Run with --list to see available invariants."); std::process::exit(1); }; + let working_dir = std::env::current_dir()?; let config = RedTeamConfig { invariant_name: cli.invariant.clone(), num_iterations: cli.iterations, model: cli.model.clone(), - working_dir: std::env::current_dir()?, + working_dir: working_dir.clone(), + num_fuzz_per_iteration: cli.num_fuzz, }; info!( - "Starting red team: invariant={}, iterations={}, model={}", - cli.invariant, cli.iterations, cli.model + "Starting red team: invariant={}, iterations={}, model={}, fuzz_per_iter={}", + cli.invariant, cli.iterations, cli.model, cli.num_fuzz ); - // The invoke_agent callback is a placeholder for actual AI interaction. - // In production, this would shell out to `claude` CLI or use the API. + let model = cli.model.clone(); + let max_turns = cli.max_turns; + let result = auto_redteam(invariant, &config, |description, failed_attempts| { - info!( - "Agent prompt: find violation of: {}", - &description[..description.len().min(100)] - ); - info!("Previous failed attempts: {}", failed_attempts.len()); - // Placeholder: return None (no candidate produced) - // Real implementation would invoke Claude Code in a worktree - None + invoke_claude_agent(&working_dir, description, failed_attempts, &model, max_turns) }); match result { RedTeamResult::Violation { description, error } => { - println!("VIOLATION FOUND!"); - println!(" Approach: {description}"); - println!(" Error: {error}"); + println!(); + println!("==== VIOLATION FOUND ===="); + println!("Approach: {description}"); + println!("Error: {error}"); std::process::exit(1); } RedTeamResult::NoViolation { attempts } => { - println!("No violations found after {} attempts.", attempts.len()); + println!(); + println!( + "No violations found after {} iterations.", + attempts.len() + ); for attempt in &attempts { println!( - " {}: {} — {}", + " {}: {} -- {}", attempt.description, attempt.approach, attempt.failure_reason ); } @@ -116,3 +146,184 @@ fn main() -> eyre::Result<()> { Ok(()) } + +/// Invoke the Claude Code CLI in an isolated worktree to attempt to find +/// an invariant violation. +/// +/// Flow: +/// 1. Create a detached git worktree so the agent has a full repo copy +/// 2. Build a prompt with the invariant description + past failed attempts +/// 3. Run `claude -p --model --max-turns ` in the worktree +/// 4. Capture the agent's analysis as the approach description +/// 5. Clean up the worktree +fn invoke_claude_agent( + repo_dir: &std::path::Path, + invariant_description: &str, + failed_attempts: &[FailedAttempt], + model: &str, + max_turns: usize, +) -> Option<(String, Vec)> { + // 1. Create worktree + let worktree_dir = match create_worktree(repo_dir, "redteam") { + Ok(dir) => { + info!("Created worktree at {}", dir.display()); + dir + } + Err(e) => { + tracing::error!("Failed to create worktree: {e}"); + return None; + } + }; + + // 2. Build prompt + let prompt = build_prompt(invariant_description, failed_attempts); + + // 3. Run Claude + info!("Invoking claude (model={model}, max_turns={max_turns})..."); + let result = Command::new("claude") + .current_dir(&worktree_dir) + .arg("-p") + .arg(&prompt) + .arg("--model") + .arg(model) + .arg("--max-turns") + .arg(max_turns.to_string()) + .arg("--verbose") + .output(); + + // 4. Clean up worktree regardless of outcome + info!("Cleaning up worktree..."); + remove_worktree(repo_dir, &worktree_dir); + // Also remove the temp directory itself + let _ = std::fs::remove_dir_all(&worktree_dir); + + // 5. Parse result + match result { + Ok(output) => { + let stdout = String::from_utf8_lossy(&output.stdout); + let stderr = String::from_utf8_lossy(&output.stderr); + + if !output.status.success() { + tracing::warn!("claude exited with status {}", output.status); + if !stderr.is_empty() { + tracing::warn!("stderr: {}", &stderr[..stderr.len().min(500)]); + } + } + + let approach = if stdout.trim().is_empty() { + if stderr.trim().is_empty() { + return None; + } + // Sometimes useful output goes to stderr + truncate(&stderr, 2000).to_string() + } else { + truncate(&stdout, 2000).to_string() + }; + + info!( + "Agent response ({} chars): {}...", + approach.len(), + &approach[..approach.len().min(200)] + ); + + Some((approach, vec![])) + } + Err(e) => { + tracing::error!("Failed to invoke claude: {e}"); + tracing::error!( + "Make sure the `claude` CLI is installed and on your PATH. \ + Install via: npm install -g @anthropic-ai/claude-code" + ); + None + } + } +} + +fn build_prompt(invariant_description: &str, failed_attempts: &[FailedAttempt]) -> String { + let mut prompt = String::new(); + + prompt.push_str( + "You are a security researcher red-teaming a zkVM (Jolt). \ + Your goal is to find an input or scenario that VIOLATES the following invariant.\n\n", + ); + + prompt.push_str("## Invariant\n\n"); + prompt.push_str(invariant_description); + prompt.push_str("\n\n"); + + prompt.push_str( + "## Instructions\n\n\ + 1. Read the relevant source code in this repository to understand how the \ + invariant is enforced.\n\ + 2. Look for edge cases, off-by-one errors, missing checks, or assumptions \ + that could be violated.\n\ + 3. If you find a potential weakness, describe it clearly.\n\ + 4. Try to construct a concrete input or scenario that triggers the violation.\n\ + 5. Summarize your approach and findings.\n\n\ + Focus on finding REAL bugs, not theoretical concerns. The invariant will be \ + mechanically checked after your analysis, so only genuine violations count.\n\n", + ); + + if !failed_attempts.is_empty() { + prompt.push_str("## Previous Failed Attempts\n\n"); + prompt.push_str( + "The following approaches have already been tried and did NOT find a violation. \ + Try a fundamentally different approach.\n\n", + ); + for attempt in failed_attempts { + prompt.push_str(&format!( + "- **{}**: {}\n Reason for failure: {}\n", + attempt.description, attempt.approach, attempt.failure_reason + )); + } + prompt.push('\n'); + } + + prompt.push_str( + "## Output\n\n\ + End your response with a clear summary of:\n\ + - What you investigated\n\ + - What you found (if anything)\n\ + - Whether you believe the invariant holds or can be violated\n", + ); + + prompt +} + +fn register_invariants( + registry: &mut SynthesisRegistry, + test_case: &Arc, + default_inputs: &[u8], +) { + registry.register(Box::new(SoundnessInvariant::new( + Arc::clone(test_case), + default_inputs.to_vec(), + ))); + registry.register(Box::new(VerifierCompletenessInvariant::new(Arc::clone( + test_case, + )))); + registry.register(Box::new(ProverCompletenessInvariant::new(Arc::clone( + test_case, + )))); + registry.register(Box::new(DeterminismInvariant::new(Arc::clone(test_case)))); + registry.register(Box::new(SerializationRoundtripInvariant::new( + Arc::clone(test_case), + default_inputs.to_vec(), + ))); + registry.register(Box::new(ZkConsistencyInvariant::new(Arc::clone( + test_case, + )))); +} + +fn truncate(s: &str, max_len: usize) -> &str { + if s.len() <= max_len { + s + } else { + // Find a char boundary + let mut end = max_len; + while end > 0 && !s.is_char_boundary(end) { + end -= 1; + } + &s[..end] + } +} diff --git a/jolt-eval/src/invariant/synthesis/redteam.rs b/jolt-eval/src/invariant/synthesis/redteam.rs index e409fd357..07a2bf9aa 100644 --- a/jolt-eval/src/invariant/synthesis/redteam.rs +++ b/jolt-eval/src/invariant/synthesis/redteam.rs @@ -18,6 +18,8 @@ pub struct RedTeamConfig { pub num_iterations: usize, pub model: String, pub working_dir: PathBuf, + /// Number of random fuzz inputs to run after each agent attempt. + pub num_fuzz_per_iteration: usize, } impl Default for RedTeamConfig { @@ -27,6 +29,7 @@ impl Default for RedTeamConfig { num_iterations: 10, model: "claude-sonnet-4-20250514".to_string(), working_dir: PathBuf::from("."), + num_fuzz_per_iteration: 100, } } } @@ -91,8 +94,9 @@ pub fn auto_redteam( match result { Some((approach, _candidate_bytes)) => { - // Run the invariant's checks to see if the agent found a violation - let check_results = invariant.run_checks(0); + // Run the invariant's seed corpus + random fuzz inputs to see + // if the agent's analysis revealed a real violation. + let check_results = invariant.run_checks(config.num_fuzz_per_iteration); let violation = check_results.iter().find(|r| r.is_err()); if let Some(Err(e)) = violation { From 7cb1b5fdb227d80c4ba60e648fc14a42598a1389 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Tue, 31 Mar 2026 19:19:47 -0400 Subject: [PATCH 05/86] feat(jolt-eval): move optimize to a CLI binary with Claude invocation Replaces the library-level auto_optimize function (which delegated agent invocation to the caller) with a standalone `optimize` binary that drives the full loop: 1. Measures baseline objectives 2. Creates an isolated git worktree per iteration 3. Invokes `claude -p` with a prompt describing the objectives, current best measurements, and past attempts 4. Captures the agent's diff and applies it to the real repo 5. Re-measures objectives and checks all invariants 6. Commits on improvement, reverts otherwise Supports --objectives (comma-separated filter), --hint (extra context for the agent), --iterations, --model, and --max-turns. Keeps OptimizationAttempt as a public type in objective/mod.rs. Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/Cargo.toml | 4 + jolt-eval/README.md | 16 +- jolt-eval/bin/optimize.rs | 516 ++++++++++++++++++++++++++++ jolt-eval/src/objective/mod.rs | 9 +- jolt-eval/src/objective/optimize.rs | 70 ---- 5 files changed, 543 insertions(+), 72 deletions(-) create mode 100644 jolt-eval/bin/optimize.rs delete mode 100644 jolt-eval/src/objective/optimize.rs diff --git a/jolt-eval/Cargo.toml b/jolt-eval/Cargo.toml index 5f4add60e..f8eabe4fc 100644 --- a/jolt-eval/Cargo.toml +++ b/jolt-eval/Cargo.toml @@ -42,3 +42,7 @@ path = "bin/redteam.rs" [[bin]] name = "fuzz" path = "bin/fuzz.rs" + +[[bin]] +name = "optimize" +path = "bin/optimize.rs" diff --git a/jolt-eval/README.md b/jolt-eval/README.md index c88d8770b..467912710 100644 --- a/jolt-eval/README.md +++ b/jolt-eval/README.md @@ -109,6 +109,20 @@ cargo run --bin redteam -- --elf path/to/guest.elf \ The red-team harness runs the AI agent in an isolated git worktree. The invariant is checked in the original working tree so the agent cannot cheat. +### AI-driven optimization + +```bash +# Optimize prover_time and proof_size over 5 iterations +cargo run --bin optimize -- --elf path/to/guest.elf \ + --objectives prover_time,proof_size --iterations 5 + +# With a hint to guide the agent +cargo run --bin optimize -- --elf path/to/guest.elf \ + --hint "Focus on the sumcheck inner loop in jolt-core/src/subprotocols/" +``` + +Each iteration: the agent works in an isolated worktree, the diff is applied to the real repo, objectives are re-measured, invariants are checked, and the change is committed or reverted. + ### Programmatic API ```rust @@ -168,7 +182,7 @@ jolt-eval/ guest_cycles.rs # Guest cycle count via tracing inline_lengths.rs # INLINE instruction count wrapping_cost.rs # Constraint system size - optimize.rs # auto_optimize loop + (OptimizationAttempt type) # in mod.rs macros/ # #[invariant(targets = [...])] proc macro bin/ # CLI binaries tests/ # Framework smoke tests diff --git a/jolt-eval/bin/optimize.rs b/jolt-eval/bin/optimize.rs new file mode 100644 index 000000000..9583dc5d7 --- /dev/null +++ b/jolt-eval/bin/optimize.rs @@ -0,0 +1,516 @@ +use std::collections::HashMap; +use std::process::Command; +use std::sync::Arc; + +use clap::Parser; +use tracing::info; + +use jolt_eval::invariant::completeness_prover::ProverCompletenessInvariant; +use jolt_eval::invariant::completeness_verifier::VerifierCompletenessInvariant; +use jolt_eval::invariant::determinism::DeterminismInvariant; +use jolt_eval::invariant::serialization_roundtrip::SerializationRoundtripInvariant; +use jolt_eval::invariant::soundness::SoundnessInvariant; +use jolt_eval::invariant::synthesis::redteam::{create_worktree, remove_worktree}; +use jolt_eval::invariant::synthesis::SynthesisRegistry; +use jolt_eval::invariant::zk_consistency::ZkConsistencyInvariant; +use jolt_eval::objective::guest_cycles::GuestCycleCountObjective; +use jolt_eval::objective::inline_lengths::InlineLengthsObjective; +use jolt_eval::objective::peak_rss::PeakRssObjective; +use jolt_eval::objective::proof_size::ProofSizeObjective; +use jolt_eval::objective::prover_time::ProverTimeObjective; +use jolt_eval::objective::verifier_time::VerifierTimeObjective; +use jolt_eval::objective::wrapping_cost::WrappingCostObjective; +use jolt_eval::objective::{measure_objectives, Objective, OptimizationAttempt}; +use jolt_eval::TestCase; + +#[derive(Parser)] +#[command(name = "optimize")] +#[command(about = "AI-driven optimization of Jolt objectives")] +struct Cli { + /// Objectives to optimize (comma-separated). Default: all. + /// Available: peak_rss, prover_time, proof_size, verifier_time, + /// guest_cycle_count, inline_lengths, wrapping_cost + #[arg(long)] + objectives: Option, + + /// Number of optimization iterations + #[arg(long, default_value = "5")] + iterations: usize, + + /// AI model to use + #[arg(long, default_value = "claude-sonnet-4-20250514")] + model: String, + + /// Path to a pre-compiled guest ELF + #[arg(long)] + elf: String, + + /// Max trace length for the test program + #[arg(long, default_value = "65536")] + max_trace_length: usize, + + /// Maximum number of Claude agentic turns per iteration + #[arg(long, default_value = "30")] + max_turns: usize, + + /// Extra context to include in the optimization prompt + #[arg(long)] + hint: Option, +} + +fn main() -> eyre::Result<()> { + tracing_subscriber::fmt::init(); + let cli = Cli::parse(); + + let elf_bytes = std::fs::read(&cli.elf)?; + let memory_config = common::jolt_device::MemoryConfig { + max_input_size: 4096, + max_output_size: 4096, + max_untrusted_advice_size: 0, + max_trusted_advice_size: 0, + stack_size: 65536, + heap_size: 32768, + program_size: None, + }; + let test_case = Arc::new(TestCase { + elf_contents: elf_bytes, + memory_config, + max_trace_length: cli.max_trace_length, + }); + + let inputs = vec![]; + let prover_pp = Arc::new(test_case.prover_preprocessing()); + let verifier_pp = Arc::new(TestCase::verifier_preprocessing(&prover_pp)); + + // Build objectives + let all_objectives = build_objectives(&test_case, &prover_pp, &verifier_pp, &inputs); + let objective_names: Vec = if let Some(names) = &cli.objectives { + names.split(',').map(|s| s.trim().to_string()).collect() + } else { + all_objectives.iter().map(|o| o.name().to_string()).collect() + }; + + let objectives: Vec = all_objectives + .into_iter() + .filter(|o| objective_names.contains(&o.name().to_string())) + .collect(); + + if objectives.is_empty() { + eprintln!("No matching objectives found."); + eprintln!("Available: peak_rss, prover_time, proof_size, verifier_time, guest_cycle_count, inline_lengths, wrapping_cost"); + std::process::exit(1); + } + + // Build invariants for safety checking + let default_inputs = vec![]; + let mut registry = SynthesisRegistry::new(); + register_invariants(&mut registry, &test_case, &default_inputs); + + // Measure baseline + let baseline = measure_objectives(&objectives); + println!("=== Baseline measurements ==="); + print_measurements(&objectives, &baseline); + println!(); + + let repo_dir = std::env::current_dir()?; + let mut attempts: Vec = Vec::new(); + let mut best = baseline.clone(); + + for iteration in 0..cli.iterations { + println!("=== Iteration {}/{} ===", iteration + 1, cli.iterations); + + // Invoke Claude in a worktree to make optimizations + let diff = match invoke_optimize_agent( + &repo_dir, + &objectives, + &best, + &attempts, + &cli.model, + cli.max_turns, + cli.hint.as_deref(), + ) { + Some(d) => d, + None => { + info!("Agent produced no changes, stopping."); + break; + } + }; + + // Re-measure after the agent's changes + let new_measurements = measure_objectives(&objectives); + println!(" Measurements after changes:"); + print_measurements(&objectives, &new_measurements); + + // Check invariants + let invariants_passed = registry.invariants().iter().all(|inv| { + let results = inv.run_checks(0); + results.iter().all(|r| r.is_ok()) + }); + + if !invariants_passed { + println!(" Invariants FAILED -- reverting."); + revert_changes(&repo_dir); + } + + // Check if score improved (lower is better for all default objectives) + let improved = if invariants_passed { + objective_names.iter().any(|name| { + let old = best.get(name); + let new = new_measurements.get(name); + match (old, new) { + (Some(&o), Some(&n)) => { + let obj = objectives.iter().find(|obj| obj.name() == name); + match obj.map(|o| o.direction()) { + Some(jolt_eval::Direction::Minimize) => n < o, + Some(jolt_eval::Direction::Maximize) => n > o, + None => false, + } + } + _ => false, + } + }) + } else { + false + }; + + let attempt = OptimizationAttempt { + description: format!("iteration {}", iteration + 1), + diff: truncate(&diff, 5000).to_string(), + measurements: new_measurements.clone(), + invariants_passed, + }; + attempts.push(attempt); + + if improved { + println!(" Improvement found -- keeping changes."); + best = new_measurements; + // Commit the successful optimization + commit_changes(&repo_dir, iteration + 1); + } else if invariants_passed { + println!(" No improvement -- reverting."); + revert_changes(&repo_dir); + } + + println!(); + } + + // Summary + println!("=== Optimization summary ==="); + println!( + "{}/{} iterations produced improvements.", + attempts + .iter() + .filter(|a| a.invariants_passed + && a.measurements.iter().any(|(name, &val)| { + let baseline_val = baseline.get(name); + baseline_val.is_some_and(|&b| val != b) + })) + .count(), + attempts.len() + ); + println!(); + println!("Final measurements:"); + print_measurements(&objectives, &best); + + Ok(()) +} + +/// Invoke Claude in an isolated worktree to attempt an optimization. +/// Returns the agent's output (approach description) or None. +fn invoke_optimize_agent( + repo_dir: &std::path::Path, + objectives: &[Objective], + current_best: &HashMap, + past_attempts: &[OptimizationAttempt], + model: &str, + max_turns: usize, + hint: Option<&str>, +) -> Option { + // Create worktree + let worktree_dir = match create_worktree(repo_dir, "optimize") { + Ok(dir) => { + info!("Created worktree at {}", dir.display()); + dir + } + Err(e) => { + tracing::error!("Failed to create worktree: {e}"); + return None; + } + }; + + let prompt = build_prompt(objectives, current_best, past_attempts, hint); + + info!("Invoking claude (model={model}, max_turns={max_turns})..."); + let result = Command::new("claude") + .current_dir(&worktree_dir) + .arg("-p") + .arg(&prompt) + .arg("--model") + .arg(model) + .arg("--max-turns") + .arg(max_turns.to_string()) + .arg("--verbose") + .output(); + + // Capture any diff the agent produced in the worktree + let diff = Command::new("git") + .current_dir(&worktree_dir) + .args(["diff", "HEAD"]) + .output() + .ok() + .and_then(|o| { + let s = String::from_utf8_lossy(&o.stdout).to_string(); + if s.trim().is_empty() { + None + } else { + Some(s) + } + }); + + // Apply the agent's changes to the real repo (if any) + if let Some(diff_text) = &diff { + info!("Agent produced a diff ({} bytes), applying to repo...", diff_text.len()); + let mut child = Command::new("git") + .current_dir(repo_dir) + .args(["apply", "--allow-empty"]) + .stdin(std::process::Stdio::piped()) + .spawn() + .ok(); + if let Some(ref mut c) = child { + use std::io::Write; + if let Some(stdin) = c.stdin.as_mut() { + let _ = stdin.write_all(diff_text.as_bytes()); + } + let _ = c.wait(); + } + } + + // Clean up worktree + info!("Cleaning up worktree..."); + remove_worktree(repo_dir, &worktree_dir); + let _ = std::fs::remove_dir_all(&worktree_dir); + + // Parse agent output + match result { + Ok(output) => { + let stdout = String::from_utf8_lossy(&output.stdout); + let stderr = String::from_utf8_lossy(&output.stderr); + + if !output.status.success() { + tracing::warn!("claude exited with status {}", output.status); + if !stderr.is_empty() { + tracing::warn!("stderr: {}", truncate(&stderr, 500)); + } + } + + let response = if stdout.trim().is_empty() { + truncate(&stderr, 2000).to_string() + } else { + truncate(&stdout, 2000).to_string() + }; + + if response.trim().is_empty() && diff.is_none() { + return None; + } + + info!("Agent response ({} chars)", response.len()); + Some(diff.unwrap_or(response)) + } + Err(e) => { + tracing::error!("Failed to invoke claude: {e}"); + tracing::error!( + "Make sure the `claude` CLI is installed and on your PATH. \ + Install via: npm install -g @anthropic-ai/claude-code" + ); + None + } + } +} + +fn build_prompt( + objectives: &[Objective], + current_best: &HashMap, + past_attempts: &[OptimizationAttempt], + hint: Option<&str>, +) -> String { + let mut prompt = String::new(); + + prompt.push_str( + "You are an expert performance engineer optimizing a zkVM (Jolt). \ + Your goal is to make code changes that improve the following objectives.\n\n", + ); + + prompt.push_str("## Objectives to optimize\n\n"); + for obj in objectives { + let dir = match obj.direction() { + jolt_eval::Direction::Minimize => "lower is better", + jolt_eval::Direction::Maximize => "higher is better", + }; + let current = current_best + .get(obj.name()) + .map(|v| format!("{v:.4}")) + .unwrap_or_else(|| "unknown".to_string()); + prompt.push_str(&format!( + "- **{}**: current = {}, direction = {}\n", + obj.name(), + current, + dir, + )); + } + prompt.push('\n'); + + prompt.push_str( + "## Instructions\n\n\ + 1. Read the relevant source code (especially `jolt-core/src/`) to understand \ + hot paths and potential optimization opportunities.\n\ + 2. Make targeted code changes that you believe will improve the objectives.\n\ + 3. Focus on changes to `jolt-core/` -- do NOT modify `jolt-eval/`.\n\ + 4. Prefer changes that are safe, correct, and unlikely to break invariants.\n\ + 5. Run `cargo clippy -p jolt-core --features host --message-format=short -q` \ + to verify your changes compile.\n\ + 6. Summarize what you changed and why you expect it to improve the objectives.\n\n", + ); + + if let Some(h) = hint { + prompt.push_str("## Hint\n\n"); + prompt.push_str(h); + prompt.push_str("\n\n"); + } + + if !past_attempts.is_empty() { + prompt.push_str("## Previous attempts\n\n"); + for attempt in past_attempts { + let status = if attempt.invariants_passed { + "invariants passed" + } else { + "INVARIANTS FAILED" + }; + prompt.push_str(&format!("- **{}** ({}): ", attempt.description, status)); + for (name, val) in &attempt.measurements { + prompt.push_str(&format!("{name}={val:.4} ")); + } + prompt.push('\n'); + } + prompt.push('\n'); + } + + prompt.push_str( + "## Output\n\n\ + Make your code changes directly. After you're done, summarize:\n\ + - What you changed\n\ + - Why you expect improvement\n\ + - Any risks or trade-offs\n", + ); + + prompt +} + +fn revert_changes(repo_dir: &std::path::Path) { + let _ = Command::new("git") + .current_dir(repo_dir) + .args(["checkout", "."]) + .status(); +} + +fn commit_changes(repo_dir: &std::path::Path, iteration: usize) { + let _ = Command::new("git") + .current_dir(repo_dir) + .args(["add", "-A"]) + .status(); + let msg = format!("perf(auto-optimize): iteration {iteration}"); + let _ = Command::new("git") + .current_dir(repo_dir) + .args(["commit", "-m", &msg, "--allow-empty"]) + .status(); +} + +fn print_measurements(objectives: &[Objective], measurements: &HashMap) { + for obj in objectives { + let val = measurements + .get(obj.name()) + .map(|v| format!("{v:.4}")) + .unwrap_or_else(|| "N/A".to_string()); + let dir = match obj.direction() { + jolt_eval::Direction::Minimize => "min", + jolt_eval::Direction::Maximize => "max", + }; + println!(" {:<25} {:>15} {:>6}", obj.name(), val, dir); + } +} + +fn register_invariants( + registry: &mut SynthesisRegistry, + test_case: &Arc, + default_inputs: &[u8], +) { + registry.register(Box::new(SoundnessInvariant::new( + Arc::clone(test_case), + default_inputs.to_vec(), + ))); + registry.register(Box::new(VerifierCompletenessInvariant::new(Arc::clone( + test_case, + )))); + registry.register(Box::new(ProverCompletenessInvariant::new(Arc::clone( + test_case, + )))); + registry.register(Box::new(DeterminismInvariant::new(Arc::clone(test_case)))); + registry.register(Box::new(SerializationRoundtripInvariant::new( + Arc::clone(test_case), + default_inputs.to_vec(), + ))); + registry.register(Box::new(ZkConsistencyInvariant::new(Arc::clone( + test_case, + )))); +} + +fn build_objectives( + test_case: &Arc, + prover_pp: &Arc, + verifier_pp: &Arc, + inputs: &[u8], +) -> Vec { + vec![ + Objective::PeakRss(PeakRssObjective::new( + Arc::clone(test_case), + Arc::clone(prover_pp), + inputs.to_vec(), + )), + Objective::ProverTime(ProverTimeObjective::new( + Arc::clone(test_case), + Arc::clone(prover_pp), + inputs.to_vec(), + )), + Objective::ProofSize(ProofSizeObjective::new( + Arc::clone(test_case), + Arc::clone(prover_pp), + inputs.to_vec(), + )), + Objective::VerifierTime(VerifierTimeObjective::new( + Arc::clone(test_case), + Arc::clone(prover_pp), + Arc::clone(verifier_pp), + inputs.to_vec(), + )), + Objective::GuestCycleCount(GuestCycleCountObjective::new( + Arc::clone(test_case), + inputs.to_vec(), + )), + Objective::InlineLengths(InlineLengthsObjective::new(Arc::clone(test_case))), + Objective::WrappingCost(WrappingCostObjective::new( + Arc::clone(test_case), + Arc::clone(prover_pp), + )), + ] +} + +fn truncate(s: &str, max_len: usize) -> &str { + if s.len() <= max_len { + s + } else { + let mut end = max_len; + while end > 0 && !s.is_char_boundary(end) { + end -= 1; + } + &s[..end] + } +} diff --git a/jolt-eval/src/objective/mod.rs b/jolt-eval/src/objective/mod.rs index 9df907510..ce6b66883 100644 --- a/jolt-eval/src/objective/mod.rs +++ b/jolt-eval/src/objective/mod.rs @@ -1,6 +1,5 @@ pub mod guest_cycles; pub mod inline_lengths; -pub mod optimize; pub mod peak_rss; pub mod proof_size; pub mod prover_time; @@ -108,6 +107,14 @@ impl Objective { } } +/// Record of a single optimization attempt for post-hoc analysis. +pub struct OptimizationAttempt { + pub description: String, + pub diff: String, + pub measurements: HashMap, + pub invariants_passed: bool, +} + /// Measure all objectives and return a map of name -> value. pub fn measure_objectives(objectives: &[Objective]) -> HashMap { objectives diff --git a/jolt-eval/src/objective/optimize.rs b/jolt-eval/src/objective/optimize.rs deleted file mode 100644 index d58f43617..000000000 --- a/jolt-eval/src/objective/optimize.rs +++ /dev/null @@ -1,70 +0,0 @@ -use std::collections::HashMap; - -use super::{measure_objectives, Objective}; -use crate::invariant::DynInvariant; - -/// Record of an optimization attempt. -pub struct OptimizationAttempt { - pub description: String, - pub diff: String, - pub measurements: HashMap, - pub invariants_passed: bool, -} - -/// Run an AI-driven optimization loop. -/// -/// The objective function maps measured values to a single scalar score. -/// Each iteration: -/// 1. Measures all objectives -/// 2. Checks that invariants still hold -/// 3. If the score improved and invariants pass, commits the change -/// 4. Otherwise reverts -/// -/// This function provides the measurement and comparison infrastructure. -/// The actual AI interaction (telling Claude to optimize) is handled by -/// the caller. -pub fn auto_optimize( - objectives: &[Objective], - invariants: &[Box], - objective_function: F, - num_iterations: usize, - mut on_iteration: impl FnMut(usize, f64, &HashMap) -> Option, -) -> Vec -where - F: Fn(&HashMap) -> f64, -{ - let baseline_measurements = measure_objectives(objectives); - let mut baseline_score = objective_function(&baseline_measurements); - let mut attempts = Vec::new(); - - for i in 0..num_iterations { - // Let the caller drive the optimization (e.g. invoke Claude) - let diff = match on_iteration(i, baseline_score, &baseline_measurements) { - Some(d) => d, - None => break, - }; - - let new_measurements = measure_objectives(objectives); - let new_score = objective_function(&new_measurements); - - // Check that all invariants still hold - let invariants_passed = invariants - .iter() - .all(|inv| inv.run_checks(0).iter().all(|r| r.is_ok())); - - let attempt = OptimizationAttempt { - description: format!("iteration {i}"), - diff, - measurements: new_measurements, - invariants_passed, - }; - - if invariants_passed && new_score > baseline_score { - baseline_score = new_score; - } - - attempts.push(attempt); - } - - attempts -} From edd7c435b52aea98be36995e01ffdfd3b41f253d Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Tue, 31 Mar 2026 19:29:29 -0400 Subject: [PATCH 06/86] refactor(jolt-eval): extract AgentHarness trait for customizable agent backends Introduces `agent::AgentHarness` -- a trait abstracting over how an AI coding agent is invoked. Implementors control their own isolation strategy (worktrees, containers, API calls, etc.), so a multi-agent Codex harness, a remote API agent, or a parallel fan-out agent can all plug into the same red-team and optimize loops. Concrete changes: - New `src/agent.rs` with `AgentHarness` trait, `AgentResponse`/ `AgentError` types, `ClaudeCodeAgent` implementation, and worktree / diff-apply utilities. - `auto_redteam` now takes `&dyn AgentHarness` + `repo_dir` instead of an `FnMut` callback. Prompt construction stays inside the library. - `RedTeamConfig` trimmed to orchestration-only fields (iterations, fuzz count); model/max-turns live on the agent. - Both `bin/redteam.rs` and `bin/optimize.rs` construct a `ClaudeCodeAgent` from CLI args and pass it through the trait. Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/bin/optimize.rs | 183 +++------------- jolt-eval/bin/redteam.rs | 178 +--------------- jolt-eval/src/agent.rs | 206 +++++++++++++++++++ jolt-eval/src/invariant/synthesis/redteam.rs | 134 +++++++----- jolt-eval/src/lib.rs | 1 + 5 files changed, 324 insertions(+), 378 deletions(-) create mode 100644 jolt-eval/src/agent.rs diff --git a/jolt-eval/bin/optimize.rs b/jolt-eval/bin/optimize.rs index 9583dc5d7..942d84b67 100644 --- a/jolt-eval/bin/optimize.rs +++ b/jolt-eval/bin/optimize.rs @@ -5,12 +5,12 @@ use std::sync::Arc; use clap::Parser; use tracing::info; +use jolt_eval::agent::{apply_diff, truncate, AgentHarness, ClaudeCodeAgent}; use jolt_eval::invariant::completeness_prover::ProverCompletenessInvariant; use jolt_eval::invariant::completeness_verifier::VerifierCompletenessInvariant; use jolt_eval::invariant::determinism::DeterminismInvariant; use jolt_eval::invariant::serialization_roundtrip::SerializationRoundtripInvariant; use jolt_eval::invariant::soundness::SoundnessInvariant; -use jolt_eval::invariant::synthesis::redteam::{create_worktree, remove_worktree}; use jolt_eval::invariant::synthesis::SynthesisRegistry; use jolt_eval::invariant::zk_consistency::ZkConsistencyInvariant; use jolt_eval::objective::guest_cycles::GuestCycleCountObjective; @@ -82,12 +82,14 @@ fn main() -> eyre::Result<()> { let prover_pp = Arc::new(test_case.prover_preprocessing()); let verifier_pp = Arc::new(TestCase::verifier_preprocessing(&prover_pp)); - // Build objectives let all_objectives = build_objectives(&test_case, &prover_pp, &verifier_pp, &inputs); let objective_names: Vec = if let Some(names) = &cli.objectives { names.split(',').map(|s| s.trim().to_string()).collect() } else { - all_objectives.iter().map(|o| o.name().to_string()).collect() + all_objectives + .iter() + .map(|o| o.name().to_string()) + .collect() }; let objectives: Vec = all_objectives @@ -101,17 +103,16 @@ fn main() -> eyre::Result<()> { std::process::exit(1); } - // Build invariants for safety checking let default_inputs = vec![]; let mut registry = SynthesisRegistry::new(); register_invariants(&mut registry, &test_case, &default_inputs); - // Measure baseline let baseline = measure_objectives(&objectives); println!("=== Baseline measurements ==="); print_measurements(&objectives, &baseline); println!(); + let agent = ClaudeCodeAgent::new(&cli.model, cli.max_turns); let repo_dir = std::env::current_dir()?; let mut attempts: Vec = Vec::new(); let mut best = baseline.clone(); @@ -119,29 +120,32 @@ fn main() -> eyre::Result<()> { for iteration in 0..cli.iterations { println!("=== Iteration {}/{} ===", iteration + 1, cli.iterations); - // Invoke Claude in a worktree to make optimizations - let diff = match invoke_optimize_agent( - &repo_dir, - &objectives, - &best, - &attempts, - &cli.model, - cli.max_turns, - cli.hint.as_deref(), - ) { - Some(d) => d, - None => { - info!("Agent produced no changes, stopping."); + let prompt = + build_optimize_prompt(&objectives, &best, &attempts, cli.hint.as_deref()); + + let response = match agent.invoke(&repo_dir, &prompt) { + Ok(r) => r, + Err(e) => { + info!("Agent error: {e}"); break; } }; - // Re-measure after the agent's changes + // Apply the agent's diff to the real repo + if let Some(diff) = &response.diff { + info!("Agent produced a diff ({} bytes), applying...", diff.len()); + if let Err(e) = apply_diff(&repo_dir, diff) { + tracing::warn!("Failed to apply diff: {e}"); + } + } else { + info!("Agent produced no code changes, stopping."); + break; + } + let new_measurements = measure_objectives(&objectives); println!(" Measurements after changes:"); print_measurements(&objectives, &new_measurements); - // Check invariants let invariants_passed = registry.invariants().iter().all(|inv| { let results = inv.run_checks(0); results.iter().all(|r| r.is_ok()) @@ -152,7 +156,6 @@ fn main() -> eyre::Result<()> { revert_changes(&repo_dir); } - // Check if score improved (lower is better for all default objectives) let improved = if invariants_passed { objective_names.iter().any(|name| { let old = best.get(name); @@ -173,9 +176,10 @@ fn main() -> eyre::Result<()> { false }; + let diff_text = response.diff.as_deref().unwrap_or(""); let attempt = OptimizationAttempt { description: format!("iteration {}", iteration + 1), - diff: truncate(&diff, 5000).to_string(), + diff: truncate(diff_text, 5000).to_string(), measurements: new_measurements.clone(), invariants_passed, }; @@ -184,7 +188,6 @@ fn main() -> eyre::Result<()> { if improved { println!(" Improvement found -- keeping changes."); best = new_measurements; - // Commit the successful optimization commit_changes(&repo_dir, iteration + 1); } else if invariants_passed { println!(" No improvement -- reverting."); @@ -194,17 +197,15 @@ fn main() -> eyre::Result<()> { println!(); } - // Summary println!("=== Optimization summary ==="); println!( "{}/{} iterations produced improvements.", attempts .iter() .filter(|a| a.invariants_passed - && a.measurements.iter().any(|(name, &val)| { - let baseline_val = baseline.get(name); - baseline_val.is_some_and(|&b| val != b) - })) + && a.measurements + .iter() + .any(|(name, &val)| { baseline.get(name).is_some_and(|&b| val != b) })) .count(), attempts.len() ); @@ -215,119 +216,7 @@ fn main() -> eyre::Result<()> { Ok(()) } -/// Invoke Claude in an isolated worktree to attempt an optimization. -/// Returns the agent's output (approach description) or None. -fn invoke_optimize_agent( - repo_dir: &std::path::Path, - objectives: &[Objective], - current_best: &HashMap, - past_attempts: &[OptimizationAttempt], - model: &str, - max_turns: usize, - hint: Option<&str>, -) -> Option { - // Create worktree - let worktree_dir = match create_worktree(repo_dir, "optimize") { - Ok(dir) => { - info!("Created worktree at {}", dir.display()); - dir - } - Err(e) => { - tracing::error!("Failed to create worktree: {e}"); - return None; - } - }; - - let prompt = build_prompt(objectives, current_best, past_attempts, hint); - - info!("Invoking claude (model={model}, max_turns={max_turns})..."); - let result = Command::new("claude") - .current_dir(&worktree_dir) - .arg("-p") - .arg(&prompt) - .arg("--model") - .arg(model) - .arg("--max-turns") - .arg(max_turns.to_string()) - .arg("--verbose") - .output(); - - // Capture any diff the agent produced in the worktree - let diff = Command::new("git") - .current_dir(&worktree_dir) - .args(["diff", "HEAD"]) - .output() - .ok() - .and_then(|o| { - let s = String::from_utf8_lossy(&o.stdout).to_string(); - if s.trim().is_empty() { - None - } else { - Some(s) - } - }); - - // Apply the agent's changes to the real repo (if any) - if let Some(diff_text) = &diff { - info!("Agent produced a diff ({} bytes), applying to repo...", diff_text.len()); - let mut child = Command::new("git") - .current_dir(repo_dir) - .args(["apply", "--allow-empty"]) - .stdin(std::process::Stdio::piped()) - .spawn() - .ok(); - if let Some(ref mut c) = child { - use std::io::Write; - if let Some(stdin) = c.stdin.as_mut() { - let _ = stdin.write_all(diff_text.as_bytes()); - } - let _ = c.wait(); - } - } - - // Clean up worktree - info!("Cleaning up worktree..."); - remove_worktree(repo_dir, &worktree_dir); - let _ = std::fs::remove_dir_all(&worktree_dir); - - // Parse agent output - match result { - Ok(output) => { - let stdout = String::from_utf8_lossy(&output.stdout); - let stderr = String::from_utf8_lossy(&output.stderr); - - if !output.status.success() { - tracing::warn!("claude exited with status {}", output.status); - if !stderr.is_empty() { - tracing::warn!("stderr: {}", truncate(&stderr, 500)); - } - } - - let response = if stdout.trim().is_empty() { - truncate(&stderr, 2000).to_string() - } else { - truncate(&stdout, 2000).to_string() - }; - - if response.trim().is_empty() && diff.is_none() { - return None; - } - - info!("Agent response ({} chars)", response.len()); - Some(diff.unwrap_or(response)) - } - Err(e) => { - tracing::error!("Failed to invoke claude: {e}"); - tracing::error!( - "Make sure the `claude` CLI is installed and on your PATH. \ - Install via: npm install -g @anthropic-ai/claude-code" - ); - None - } - } -} - -fn build_prompt( +fn build_optimize_prompt( objectives: &[Objective], current_best: &HashMap, past_attempts: &[OptimizationAttempt], @@ -502,15 +391,3 @@ fn build_objectives( )), ] } - -fn truncate(s: &str, max_len: usize) -> &str { - if s.len() <= max_len { - s - } else { - let mut end = max_len; - while end > 0 && !s.is_char_boundary(end) { - end -= 1; - } - &s[..end] - } -} diff --git a/jolt-eval/bin/redteam.rs b/jolt-eval/bin/redteam.rs index c7591fa76..a90b5f362 100644 --- a/jolt-eval/bin/redteam.rs +++ b/jolt-eval/bin/redteam.rs @@ -1,20 +1,18 @@ -use std::process::Command; use std::sync::Arc; use clap::Parser; use tracing::info; +use jolt_eval::agent::ClaudeCodeAgent; use jolt_eval::invariant::completeness_prover::ProverCompletenessInvariant; use jolt_eval::invariant::completeness_verifier::VerifierCompletenessInvariant; use jolt_eval::invariant::determinism::DeterminismInvariant; use jolt_eval::invariant::serialization_roundtrip::SerializationRoundtripInvariant; use jolt_eval::invariant::soundness::SoundnessInvariant; -use jolt_eval::invariant::synthesis::redteam::{ - auto_redteam, create_worktree, remove_worktree, RedTeamConfig, RedTeamResult, -}; +use jolt_eval::invariant::synthesis::redteam::{auto_redteam, RedTeamConfig, RedTeamResult}; use jolt_eval::invariant::synthesis::SynthesisRegistry; use jolt_eval::invariant::zk_consistency::ZkConsistencyInvariant; -use jolt_eval::invariant::{FailedAttempt, SynthesisTarget}; +use jolt_eval::invariant::SynthesisTarget; use jolt_eval::TestCase; #[derive(Parser)] @@ -100,26 +98,20 @@ fn main() -> eyre::Result<()> { std::process::exit(1); }; - let working_dir = std::env::current_dir()?; let config = RedTeamConfig { - invariant_name: cli.invariant.clone(), num_iterations: cli.iterations, - model: cli.model.clone(), - working_dir: working_dir.clone(), num_fuzz_per_iteration: cli.num_fuzz, }; + let agent = ClaudeCodeAgent::new(&cli.model, cli.max_turns); + let repo_dir = std::env::current_dir()?; + info!( "Starting red team: invariant={}, iterations={}, model={}, fuzz_per_iter={}", cli.invariant, cli.iterations, cli.model, cli.num_fuzz ); - let model = cli.model.clone(); - let max_turns = cli.max_turns; - - let result = auto_redteam(invariant, &config, |description, failed_attempts| { - invoke_claude_agent(&working_dir, description, failed_attempts, &model, max_turns) - }); + let result = auto_redteam(invariant, &config, &agent, &repo_dir); match result { RedTeamResult::Violation { description, error } => { @@ -147,149 +139,6 @@ fn main() -> eyre::Result<()> { Ok(()) } -/// Invoke the Claude Code CLI in an isolated worktree to attempt to find -/// an invariant violation. -/// -/// Flow: -/// 1. Create a detached git worktree so the agent has a full repo copy -/// 2. Build a prompt with the invariant description + past failed attempts -/// 3. Run `claude -p --model --max-turns ` in the worktree -/// 4. Capture the agent's analysis as the approach description -/// 5. Clean up the worktree -fn invoke_claude_agent( - repo_dir: &std::path::Path, - invariant_description: &str, - failed_attempts: &[FailedAttempt], - model: &str, - max_turns: usize, -) -> Option<(String, Vec)> { - // 1. Create worktree - let worktree_dir = match create_worktree(repo_dir, "redteam") { - Ok(dir) => { - info!("Created worktree at {}", dir.display()); - dir - } - Err(e) => { - tracing::error!("Failed to create worktree: {e}"); - return None; - } - }; - - // 2. Build prompt - let prompt = build_prompt(invariant_description, failed_attempts); - - // 3. Run Claude - info!("Invoking claude (model={model}, max_turns={max_turns})..."); - let result = Command::new("claude") - .current_dir(&worktree_dir) - .arg("-p") - .arg(&prompt) - .arg("--model") - .arg(model) - .arg("--max-turns") - .arg(max_turns.to_string()) - .arg("--verbose") - .output(); - - // 4. Clean up worktree regardless of outcome - info!("Cleaning up worktree..."); - remove_worktree(repo_dir, &worktree_dir); - // Also remove the temp directory itself - let _ = std::fs::remove_dir_all(&worktree_dir); - - // 5. Parse result - match result { - Ok(output) => { - let stdout = String::from_utf8_lossy(&output.stdout); - let stderr = String::from_utf8_lossy(&output.stderr); - - if !output.status.success() { - tracing::warn!("claude exited with status {}", output.status); - if !stderr.is_empty() { - tracing::warn!("stderr: {}", &stderr[..stderr.len().min(500)]); - } - } - - let approach = if stdout.trim().is_empty() { - if stderr.trim().is_empty() { - return None; - } - // Sometimes useful output goes to stderr - truncate(&stderr, 2000).to_string() - } else { - truncate(&stdout, 2000).to_string() - }; - - info!( - "Agent response ({} chars): {}...", - approach.len(), - &approach[..approach.len().min(200)] - ); - - Some((approach, vec![])) - } - Err(e) => { - tracing::error!("Failed to invoke claude: {e}"); - tracing::error!( - "Make sure the `claude` CLI is installed and on your PATH. \ - Install via: npm install -g @anthropic-ai/claude-code" - ); - None - } - } -} - -fn build_prompt(invariant_description: &str, failed_attempts: &[FailedAttempt]) -> String { - let mut prompt = String::new(); - - prompt.push_str( - "You are a security researcher red-teaming a zkVM (Jolt). \ - Your goal is to find an input or scenario that VIOLATES the following invariant.\n\n", - ); - - prompt.push_str("## Invariant\n\n"); - prompt.push_str(invariant_description); - prompt.push_str("\n\n"); - - prompt.push_str( - "## Instructions\n\n\ - 1. Read the relevant source code in this repository to understand how the \ - invariant is enforced.\n\ - 2. Look for edge cases, off-by-one errors, missing checks, or assumptions \ - that could be violated.\n\ - 3. If you find a potential weakness, describe it clearly.\n\ - 4. Try to construct a concrete input or scenario that triggers the violation.\n\ - 5. Summarize your approach and findings.\n\n\ - Focus on finding REAL bugs, not theoretical concerns. The invariant will be \ - mechanically checked after your analysis, so only genuine violations count.\n\n", - ); - - if !failed_attempts.is_empty() { - prompt.push_str("## Previous Failed Attempts\n\n"); - prompt.push_str( - "The following approaches have already been tried and did NOT find a violation. \ - Try a fundamentally different approach.\n\n", - ); - for attempt in failed_attempts { - prompt.push_str(&format!( - "- **{}**: {}\n Reason for failure: {}\n", - attempt.description, attempt.approach, attempt.failure_reason - )); - } - prompt.push('\n'); - } - - prompt.push_str( - "## Output\n\n\ - End your response with a clear summary of:\n\ - - What you investigated\n\ - - What you found (if anything)\n\ - - Whether you believe the invariant holds or can be violated\n", - ); - - prompt -} - fn register_invariants( registry: &mut SynthesisRegistry, test_case: &Arc, @@ -314,16 +163,3 @@ fn register_invariants( test_case, )))); } - -fn truncate(s: &str, max_len: usize) -> &str { - if s.len() <= max_len { - s - } else { - // Find a char boundary - let mut end = max_len; - while end > 0 && !s.is_char_boundary(end) { - end -= 1; - } - &s[..end] - } -} diff --git a/jolt-eval/src/agent.rs b/jolt-eval/src/agent.rs new file mode 100644 index 000000000..1e56add8e --- /dev/null +++ b/jolt-eval/src/agent.rs @@ -0,0 +1,206 @@ +use std::fmt; +use std::path::{Path, PathBuf}; +use std::process::Command; + +/// Output from an agent invocation. +pub struct AgentResponse { + /// The agent's textual output/analysis. + pub text: String, + /// A unified diff of code changes the agent produced, if any. + pub diff: Option, +} + +/// Error during agent invocation. +#[derive(Debug, Clone)] +pub struct AgentError { + pub message: String, +} + +impl fmt::Display for AgentError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.message) + } +} + +impl std::error::Error for AgentError {} + +impl AgentError { + pub fn new(message: impl Into) -> Self { + Self { + message: message.into(), + } + } +} + +/// A coding agent that can analyze or modify a repository given a prompt. +/// +/// Implementors are responsible for their own isolation strategy (worktrees, +/// containers, API calls, etc.). The `repo_dir` parameter indicates the +/// repository root so the agent can set up whatever sandbox it needs. +/// +/// # Examples +/// +/// The built-in [`ClaudeCodeAgent`] creates a git worktree and invokes the +/// `claude` CLI. A multi-agent harness could fan out to several agents in +/// parallel and merge results. An API-based agent could call a remote +/// service without any local isolation. +pub trait AgentHarness: Send + Sync { + fn invoke(&self, repo_dir: &Path, prompt: &str) -> Result; +} + +/// Agent implementation that invokes the Claude Code CLI in an isolated +/// git worktree. +pub struct ClaudeCodeAgent { + pub model: String, + pub max_turns: usize, +} + +impl ClaudeCodeAgent { + pub fn new(model: impl Into, max_turns: usize) -> Self { + Self { + model: model.into(), + max_turns, + } + } +} + +impl AgentHarness for ClaudeCodeAgent { + fn invoke(&self, repo_dir: &Path, prompt: &str) -> Result { + // 1. Create worktree + let worktree_dir = create_worktree(repo_dir)?; + tracing::info!("Created worktree at {}", worktree_dir.display()); + + // 2. Run Claude + tracing::info!( + "Invoking claude (model={}, max_turns={})...", + self.model, + self.max_turns + ); + let result = Command::new("claude") + .current_dir(&worktree_dir) + .arg("-p") + .arg(prompt) + .arg("--model") + .arg(&self.model) + .arg("--max-turns") + .arg(self.max_turns.to_string()) + .arg("--verbose") + .output(); + + // 3. Capture diff before cleanup + let diff = Command::new("git") + .current_dir(&worktree_dir) + .args(["diff", "HEAD"]) + .output() + .ok() + .and_then(|o| { + let s = String::from_utf8_lossy(&o.stdout).to_string(); + if s.trim().is_empty() { + None + } else { + Some(s) + } + }); + + // 4. Clean up worktree + tracing::info!("Cleaning up worktree..."); + remove_worktree(repo_dir, &worktree_dir); + let _ = std::fs::remove_dir_all(&worktree_dir); + + // 5. Parse result + match result { + Ok(output) => { + let stdout = String::from_utf8_lossy(&output.stdout); + let stderr = String::from_utf8_lossy(&output.stderr); + + if !output.status.success() { + tracing::warn!("claude exited with status {}", output.status); + if !stderr.is_empty() { + tracing::warn!("stderr: {}", truncate(&stderr, 500)); + } + } + + let text = if stdout.trim().is_empty() { + stderr.to_string() + } else { + stdout.to_string() + }; + + if text.trim().is_empty() && diff.is_none() { + return Err(AgentError::new("Agent produced no output")); + } + + Ok(AgentResponse { text, diff }) + } + Err(e) => Err(AgentError::new(format!( + "Failed to invoke claude: {e}. \ + Make sure the `claude` CLI is installed and on your PATH. \ + Install via: npm install -g @anthropic-ai/claude-code" + ))), + } + } +} + +/// Create an isolated detached git worktree from `repo_dir`. +pub fn create_worktree(repo_dir: &Path) -> Result { + let tmp = tempfile::tempdir().map_err(|e| AgentError::new(format!("tempdir: {e}")))?; + let worktree_dir = tmp.path().to_path_buf(); + std::mem::forget(tmp); + + let status = Command::new("git") + .current_dir(repo_dir) + .args(["worktree", "add", "--detach"]) + .arg(&worktree_dir) + .status() + .map_err(|e| AgentError::new(format!("git worktree: {e}")))?; + + if !status.success() { + return Err(AgentError::new("git worktree add failed")); + } + + Ok(worktree_dir) +} + +/// Remove a git worktree. +pub fn remove_worktree(repo_dir: &Path, worktree_dir: &Path) { + let _ = Command::new("git") + .current_dir(repo_dir) + .args(["worktree", "remove", "--force"]) + .arg(worktree_dir) + .status(); +} + +/// Apply a unified diff to `repo_dir`. +pub fn apply_diff(repo_dir: &Path, diff: &str) -> Result<(), AgentError> { + let mut child = Command::new("git") + .current_dir(repo_dir) + .args(["apply", "--allow-empty"]) + .stdin(std::process::Stdio::piped()) + .spawn() + .map_err(|e| AgentError::new(format!("git apply spawn: {e}")))?; + + if let Some(stdin) = child.stdin.as_mut() { + use std::io::Write; + let _ = stdin.write_all(diff.as_bytes()); + } + + let status = child + .wait() + .map_err(|e| AgentError::new(format!("git apply wait: {e}")))?; + + if !status.success() { + return Err(AgentError::new("git apply failed")); + } + Ok(()) +} + +pub fn truncate(s: &str, max_len: usize) -> &str { + if s.len() <= max_len { + return s; + } + let mut end = max_len; + while end > 0 && !s.is_char_boundary(end) { + end -= 1; + } + &s[..end] +} diff --git a/jolt-eval/src/invariant/synthesis/redteam.rs b/jolt-eval/src/invariant/synthesis/redteam.rs index 07a2bf9aa..f2caf46d2 100644 --- a/jolt-eval/src/invariant/synthesis/redteam.rs +++ b/jolt-eval/src/invariant/synthesis/redteam.rs @@ -1,8 +1,8 @@ -use std::path::{Path, PathBuf}; -use std::process::Command; +use std::path::Path; use super::super::{DynInvariant, FailedAttempt, SynthesisTarget}; use super::SynthesisRegistry; +use crate::agent::{truncate, AgentHarness}; /// Result of a red-team session. pub enum RedTeamResult { @@ -14,10 +14,7 @@ pub enum RedTeamResult { /// Configuration for an AI red-team session. pub struct RedTeamConfig { - pub invariant_name: String, pub num_iterations: usize, - pub model: String, - pub working_dir: PathBuf, /// Number of random fuzz inputs to run after each agent attempt. pub num_fuzz_per_iteration: usize, } @@ -25,59 +22,27 @@ pub struct RedTeamConfig { impl Default for RedTeamConfig { fn default() -> Self { Self { - invariant_name: String::new(), num_iterations: 10, - model: "claude-sonnet-4-20250514".to_string(), - working_dir: PathBuf::from("."), num_fuzz_per_iteration: 100, } } } -/// Create an isolated git worktree for the AI agent to work in. -pub fn create_worktree(repo_dir: &Path, _branch_name: &str) -> Result { - let tmp = tempfile::tempdir().map_err(|e| format!("Failed to create temp dir: {e}"))?; - // Persist the temp dir so the worktree outlives this function - let worktree_dir = tmp.path().to_path_buf(); - std::mem::forget(tmp); - - let status = Command::new("git") - .current_dir(repo_dir) - .args(["worktree", "add", "--detach"]) - .arg(&worktree_dir) - .status() - .map_err(|e| format!("Failed to run git worktree: {e}"))?; - - if !status.success() { - return Err("git worktree add failed".to_string()); - } - - Ok(worktree_dir) -} - -/// Remove a git worktree. -pub fn remove_worktree(repo_dir: &Path, worktree_dir: &Path) { - let _ = Command::new("git") - .current_dir(repo_dir) - .args(["worktree", "remove", "--force"]) - .arg(worktree_dir) - .status(); -} - /// Run an AI red-team session against a single invariant. /// -/// The AI agent runs in an isolated worktree to produce a claimed bad input. -/// The invariant is checked in the original working tree so the AI cannot cheat. +/// Each iteration: +/// 1. Builds a prompt from the invariant description + past failed attempts +/// 2. Invokes the agent (via the [`AgentHarness`] trait) to analyze the code +/// 3. Runs the invariant's seed corpus + random fuzz inputs +/// 4. If a violation is found, returns immediately +/// 5. Otherwise records the failed attempt and continues /// -/// This function orchestrates the loop but delegates the actual AI interaction -/// to the `invoke_agent` callback, which should: -/// 1. Receive the invariant description and past failed attempts -/// 2. Have the AI produce a candidate counterexample (as bytes) -/// 3. Return the candidate or None if the AI couldn't produce one +/// The `agent` is responsible for its own isolation (e.g. worktrees). pub fn auto_redteam( invariant: &dyn DynInvariant, config: &RedTeamConfig, - mut invoke_agent: impl FnMut(&str, &[FailedAttempt]) -> Option<(String, Vec)>, + agent: &dyn AgentHarness, + repo_dir: &Path, ) -> RedTeamResult { let description = invariant.description(); let mut failed_attempts = Vec::new(); @@ -90,12 +55,18 @@ pub fn auto_redteam( invariant.name() ); - let result = invoke_agent(&description, &failed_attempts); + let prompt = build_redteam_prompt(&description, &failed_attempts); + + match agent.invoke(repo_dir, &prompt) { + Ok(response) => { + let approach = truncate(&response.text, 2000).to_string(); + tracing::info!( + "Agent response ({} chars): {}...", + approach.len(), + truncate(&approach, 200) + ); - match result { - Some((approach, _candidate_bytes)) => { - // Run the invariant's seed corpus + random fuzz inputs to see - // if the agent's analysis revealed a real violation. + // Run the invariant's seed corpus + random fuzz inputs let check_results = invariant.run_checks(config.num_fuzz_per_iteration); let violation = check_results.iter().find(|r| r.is_err()); @@ -112,11 +83,12 @@ pub fn auto_redteam( failure_reason: "Invariant check passed for all inputs".to_string(), }); } - None => { + Err(e) => { + tracing::warn!("Agent invocation failed: {e}"); failed_attempts.push(FailedAttempt { description: format!("Iteration {}", iteration + 1), - approach: "Agent could not produce a candidate".to_string(), - failure_reason: "No candidate generated".to_string(), + approach: "Agent invocation failed".to_string(), + failure_reason: e.to_string(), }); } } @@ -127,6 +99,60 @@ pub fn auto_redteam( } } +fn build_redteam_prompt( + invariant_description: &str, + failed_attempts: &[FailedAttempt], +) -> String { + let mut prompt = String::new(); + + prompt.push_str( + "You are a security researcher red-teaming a zkVM (Jolt). \ + Your goal is to find an input or scenario that VIOLATES the following invariant.\n\n", + ); + + prompt.push_str("## Invariant\n\n"); + prompt.push_str(invariant_description); + prompt.push_str("\n\n"); + + prompt.push_str( + "## Instructions\n\n\ + 1. Read the relevant source code in this repository to understand how the \ + invariant is enforced.\n\ + 2. Look for edge cases, off-by-one errors, missing checks, or assumptions \ + that could be violated.\n\ + 3. If you find a potential weakness, describe it clearly.\n\ + 4. Try to construct a concrete input or scenario that triggers the violation.\n\ + 5. Summarize your approach and findings.\n\n\ + Focus on finding REAL bugs, not theoretical concerns. The invariant will be \ + mechanically checked after your analysis, so only genuine violations count.\n\n", + ); + + if !failed_attempts.is_empty() { + prompt.push_str("## Previous Failed Attempts\n\n"); + prompt.push_str( + "The following approaches have already been tried and did NOT find a violation. \ + Try a fundamentally different approach.\n\n", + ); + for attempt in failed_attempts { + prompt.push_str(&format!( + "- **{}**: {}\n Reason for failure: {}\n", + attempt.description, attempt.approach, attempt.failure_reason + )); + } + prompt.push('\n'); + } + + prompt.push_str( + "## Output\n\n\ + End your response with a clear summary of:\n\ + - What you investigated\n\ + - What you found (if anything)\n\ + - Whether you believe the invariant holds or can be violated\n", + ); + + prompt +} + /// List all invariants suitable for red-team testing. pub fn redteamable_invariants(registry: &SynthesisRegistry) -> Vec<&dyn DynInvariant> { registry.for_target(SynthesisTarget::RedTeam) diff --git a/jolt-eval/src/lib.rs b/jolt-eval/src/lib.rs index 4417a15cb..27e982bcf 100644 --- a/jolt-eval/src/lib.rs +++ b/jolt-eval/src/lib.rs @@ -1,5 +1,6 @@ #![allow(non_snake_case)] +pub mod agent; pub mod invariant; pub mod objective; From 60fd789b2f98c258a19d7fbab745311ad3151d29 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Tue, 31 Mar 2026 19:32:32 -0400 Subject: [PATCH 07/86] test(jolt-eval): add MockAgent and comprehensive agent harness tests Adds `MockAgent` to the library with three constructors: - `always_ok(text)` -- repeats a fixed success response - `always_err(msg)` -- repeats a fixed error - `from_responses(vec)` -- drains a queue, then repeats the last entry MockAgent records every prompt it receives, accessible via `recorded_prompts()`. 21 new tests covering: - MockAgent basics: ok, err, prompt recording, repeat behavior, response queuing, diff passthrough - auto_redteam integration: no-violation path, immediate violation, fuzz-triggered violation, agent error handling, zero iterations, mixed ok/err responses - Prompt construction: invariant description inclusion, failed attempt accumulation across iterations - Trait extensibility: object safety, Arc wrapping, custom `FirstSuccessHarness` multi-agent implementation plugging directly into auto_redteam Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/src/agent.rs | 87 ++++++ jolt-eval/tests/agent_test.rs | 484 ++++++++++++++++++++++++++++++++++ 2 files changed, 571 insertions(+) create mode 100644 jolt-eval/tests/agent_test.rs diff --git a/jolt-eval/src/agent.rs b/jolt-eval/src/agent.rs index 1e56add8e..f2b563498 100644 --- a/jolt-eval/src/agent.rs +++ b/jolt-eval/src/agent.rs @@ -3,6 +3,7 @@ use std::path::{Path, PathBuf}; use std::process::Command; /// Output from an agent invocation. +#[derive(Debug)] pub struct AgentResponse { /// The agent's textual output/analysis. pub text: String, @@ -194,6 +195,92 @@ pub fn apply_diff(repo_dir: &Path, diff: &str) -> Result<(), AgentError> { Ok(()) } +/// A mock agent for testing. Returns pre-configured responses and records +/// every prompt it receives. +/// +/// # Usage +/// +/// ```ignore +/// use jolt_eval::agent::{MockAgent, AgentResponse}; +/// +/// // Agent that always succeeds with a fixed response +/// let agent = MockAgent::always_ok("I found nothing."); +/// +/// // Agent that returns a sequence of responses +/// let agent = MockAgent::from_responses(vec![ +/// Ok(AgentResponse { text: "attempt 1".into(), diff: None }), +/// Err(AgentError::new("network timeout")), +/// Ok(AgentResponse { text: "attempt 3".into(), diff: Some("diff".into()) }), +/// ]); +/// +/// // After invoking, inspect the prompts the agent received +/// let prompts = agent.recorded_prompts(); +/// ``` +pub struct MockAgent { + responses: std::sync::Mutex>>, + prompts: std::sync::Mutex>, +} + +impl MockAgent { + /// Create a mock that always returns `Ok` with the given text and no diff. + pub fn always_ok(text: &str) -> Self { + let text = text.to_string(); + Self { + responses: std::sync::Mutex::new(vec![Ok(AgentResponse { + text, + diff: None, + })]), + prompts: std::sync::Mutex::new(Vec::new()), + } + } + + /// Create a mock that always returns `Err`. + pub fn always_err(message: &str) -> Self { + Self { + responses: std::sync::Mutex::new(vec![Err(AgentError::new(message))]), + prompts: std::sync::Mutex::new(Vec::new()), + } + } + + /// Create a mock that returns responses from a queue. + /// After the queue is exhausted, subsequent calls return an error. + pub fn from_responses(responses: Vec>) -> Self { + let mut reversed = responses; + reversed.reverse(); // so we can pop from the back + Self { + responses: std::sync::Mutex::new(reversed), + prompts: std::sync::Mutex::new(Vec::new()), + } + } + + /// Return all prompts that were passed to `invoke`, in order. + pub fn recorded_prompts(&self) -> Vec { + self.prompts.lock().unwrap().clone() + } +} + +impl AgentHarness for MockAgent { + fn invoke(&self, _repo_dir: &Path, prompt: &str) -> Result { + self.prompts.lock().unwrap().push(prompt.to_string()); + + let mut responses = self.responses.lock().unwrap(); + if responses.is_empty() { + return Err(AgentError::new("MockAgent: no more responses")); + } + // If only one response left, clone it (repeating) instead of popping + if responses.len() == 1 { + return match &responses[0] { + Ok(r) => Ok(AgentResponse { + text: r.text.clone(), + diff: r.diff.clone(), + }), + Err(e) => Err(AgentError::new(&e.message)), + }; + } + responses.pop().unwrap() + } +} + pub fn truncate(s: &str, max_len: usize) -> &str { if s.len() <= max_len { return s; diff --git a/jolt-eval/tests/agent_test.rs b/jolt-eval/tests/agent_test.rs new file mode 100644 index 000000000..b4b03dcac --- /dev/null +++ b/jolt-eval/tests/agent_test.rs @@ -0,0 +1,484 @@ +use std::path::Path; + +use enumset::EnumSet; +use jolt_eval::agent::{AgentError, AgentHarness, AgentResponse, MockAgent}; +use jolt_eval::invariant::synthesis::redteam::{auto_redteam, RedTeamConfig, RedTeamResult}; +use jolt_eval::invariant::{Invariant, InvariantViolation, SynthesisTarget}; + +// ========================================================================= +// Test invariants +// ========================================================================= + +/// Always passes -- the red-team loop should never find a violation. +struct AlwaysPassInvariant; +impl Invariant for AlwaysPassInvariant { + type Setup = (); + type Input = u8; + fn name(&self) -> &str { + "always_pass" + } + fn description(&self) -> String { + "This invariant always passes.".into() + } + fn targets(&self) -> EnumSet { + SynthesisTarget::Test | SynthesisTarget::RedTeam + } + fn setup(&self) {} + fn check(&self, _: &(), _: u8) -> Result<(), InvariantViolation> { + Ok(()) + } + fn seed_corpus(&self) -> Vec { + vec![0, 1, 255] + } +} + +/// Always fails -- the red-team loop should find a violation immediately. +struct AlwaysFailInvariant; +impl Invariant for AlwaysFailInvariant { + type Setup = (); + type Input = u8; + fn name(&self) -> &str { + "always_fail" + } + fn description(&self) -> String { + "This invariant always fails.".into() + } + fn targets(&self) -> EnumSet { + SynthesisTarget::Test | SynthesisTarget::RedTeam + } + fn setup(&self) {} + fn check(&self, _: &(), input: u8) -> Result<(), InvariantViolation> { + Err(InvariantViolation::new(format!("always fails ({input})"))) + } + fn seed_corpus(&self) -> Vec { + vec![42] + } +} + +/// Fails only when the input is 0 -- tests that fuzz inputs can trigger it. +struct FailsOnZeroInvariant; +impl Invariant for FailsOnZeroInvariant { + type Setup = (); + type Input = u8; + fn name(&self) -> &str { + "fails_on_zero" + } + fn description(&self) -> String { + "Fails when input is 0.".into() + } + fn targets(&self) -> EnumSet { + SynthesisTarget::Test | SynthesisTarget::RedTeam + } + fn setup(&self) {} + fn check(&self, _: &(), input: u8) -> Result<(), InvariantViolation> { + if input == 0 { + Err(InvariantViolation::new("input was zero")) + } else { + Ok(()) + } + } + fn seed_corpus(&self) -> Vec { + vec![1, 2, 3] // seed corpus avoids 0 + } +} + +// ========================================================================= +// MockAgent tests +// ========================================================================= + +#[test] +fn mock_always_ok_returns_text() { + let agent = MockAgent::always_ok("hello world"); + let resp = agent.invoke(Path::new("/tmp"), "test prompt").unwrap(); + assert_eq!(resp.text, "hello world"); + assert!(resp.diff.is_none()); +} + +#[test] +fn mock_always_err_returns_error() { + let agent = MockAgent::always_err("boom"); + let err = agent.invoke(Path::new("/tmp"), "test").unwrap_err(); + assert_eq!(err.message, "boom"); +} + +#[test] +fn mock_records_prompts() { + let agent = MockAgent::always_ok("ok"); + agent.invoke(Path::new("/tmp"), "prompt 1").unwrap(); + agent.invoke(Path::new("/tmp"), "prompt 2").unwrap(); + agent.invoke(Path::new("/tmp"), "prompt 3").unwrap(); + + let prompts = agent.recorded_prompts(); + assert_eq!(prompts.len(), 3); + assert_eq!(prompts[0], "prompt 1"); + assert_eq!(prompts[1], "prompt 2"); + assert_eq!(prompts[2], "prompt 3"); +} + +#[test] +fn mock_always_ok_repeats_indefinitely() { + let agent = MockAgent::always_ok("same"); + for _ in 0..100 { + let resp = agent.invoke(Path::new("/tmp"), "x").unwrap(); + assert_eq!(resp.text, "same"); + } +} + +#[test] +fn mock_always_err_repeats_indefinitely() { + let agent = MockAgent::always_err("fail"); + for _ in 0..100 { + let err = agent.invoke(Path::new("/tmp"), "x").unwrap_err(); + assert_eq!(err.message, "fail"); + } +} + +#[test] +fn mock_from_responses_returns_in_order() { + let agent = MockAgent::from_responses(vec![ + Ok(AgentResponse { + text: "first".into(), + diff: None, + }), + Ok(AgentResponse { + text: "second".into(), + diff: Some("diff".into()), + }), + Err(AgentError::new("third fails")), + ]); + + let r1 = agent.invoke(Path::new("/tmp"), "a").unwrap(); + assert_eq!(r1.text, "first"); + assert!(r1.diff.is_none()); + + let r2 = agent.invoke(Path::new("/tmp"), "b").unwrap(); + assert_eq!(r2.text, "second"); + assert_eq!(r2.diff.as_deref(), Some("diff")); + + let r3 = agent.invoke(Path::new("/tmp"), "c").unwrap_err(); + assert_eq!(r3.message, "third fails"); +} + +#[test] +fn mock_from_responses_last_entry_repeats() { + let agent = MockAgent::from_responses(vec![ + Ok(AgentResponse { + text: "first".into(), + diff: None, + }), + Ok(AgentResponse { + text: "last".into(), + diff: None, + }), + ]); + + agent.invoke(Path::new("/tmp"), "a").unwrap(); + let r2 = agent.invoke(Path::new("/tmp"), "b").unwrap(); + assert_eq!(r2.text, "last"); + // After exhausting queue, last response repeats + let r3 = agent.invoke(Path::new("/tmp"), "c").unwrap(); + assert_eq!(r3.text, "last"); +} + +#[test] +fn mock_with_diff() { + let agent = MockAgent::from_responses(vec![Ok(AgentResponse { + text: "I optimized the code".into(), + diff: Some("--- a/foo\n+++ b/foo\n@@ ...\n-old\n+new".into()), + })]); + + let resp = agent.invoke(Path::new("/tmp"), "optimize").unwrap(); + assert!(resp.diff.is_some()); + assert!(resp.diff.unwrap().contains("+new")); +} + +// ========================================================================= +// auto_redteam tests with MockAgent +// ========================================================================= + +#[test] +fn redteam_no_violation_when_invariant_always_passes() { + let invariant = AlwaysPassInvariant; + let agent = MockAgent::always_ok("I analyzed the code and found nothing."); + let config = RedTeamConfig { + num_iterations: 3, + num_fuzz_per_iteration: 5, + }; + + let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); + + match result { + RedTeamResult::NoViolation { attempts } => { + assert_eq!(attempts.len(), 3); + for a in &attempts { + assert_eq!(a.failure_reason, "Invariant check passed for all inputs"); + } + } + RedTeamResult::Violation { .. } => { + panic!("Expected no violation for AlwaysPassInvariant"); + } + } + + // Agent should have been invoked exactly 3 times + assert_eq!(agent.recorded_prompts().len(), 3); +} + +#[test] +fn redteam_finds_violation_immediately_when_invariant_always_fails() { + let invariant = AlwaysFailInvariant; + let agent = MockAgent::always_ok("Trying something."); + let config = RedTeamConfig { + num_iterations: 10, + num_fuzz_per_iteration: 0, // seed corpus alone triggers failure + }; + + let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); + + match result { + RedTeamResult::Violation { error, .. } => { + assert!(error.contains("always fails")); + } + RedTeamResult::NoViolation { .. } => { + panic!("Expected violation for AlwaysFailInvariant"); + } + } + + // Should stop after first iteration (found violation) + assert_eq!(agent.recorded_prompts().len(), 1); +} + +#[test] +fn redteam_finds_violation_via_fuzz_inputs() { + let invariant = FailsOnZeroInvariant; + let agent = MockAgent::always_ok("Analyzing..."); + let config = RedTeamConfig { + num_iterations: 3, + // High fuzz count makes it very likely a 0 byte appears + num_fuzz_per_iteration: 1000, + }; + + let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); + + // With 1000 random u8 inputs per iteration, the chance of never hitting 0 + // across 3 iterations is (255/256)^3000 ≈ 0. So we expect a violation. + match result { + RedTeamResult::Violation { error, .. } => { + assert!(error.contains("zero")); + } + RedTeamResult::NoViolation { .. } => { + panic!("Expected violation for FailsOnZeroInvariant with high fuzz count"); + } + } +} + +#[test] +fn redteam_handles_agent_errors_gracefully() { + let invariant = AlwaysPassInvariant; + let agent = MockAgent::always_err("network timeout"); + let config = RedTeamConfig { + num_iterations: 3, + num_fuzz_per_iteration: 0, + }; + + let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); + + match result { + RedTeamResult::NoViolation { attempts } => { + assert_eq!(attempts.len(), 3); + for a in &attempts { + assert_eq!(a.approach, "Agent invocation failed"); + assert!(a.failure_reason.contains("network timeout")); + } + } + RedTeamResult::Violation { .. } => { + panic!("Expected no violation when agent always errors"); + } + } +} + +#[test] +fn redteam_prompt_includes_invariant_description() { + let invariant = AlwaysPassInvariant; + let agent = MockAgent::always_ok("ok"); + let config = RedTeamConfig { + num_iterations: 1, + num_fuzz_per_iteration: 0, + }; + + auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); + + let prompts = agent.recorded_prompts(); + assert_eq!(prompts.len(), 1); + assert!(prompts[0].contains("This invariant always passes.")); + assert!(prompts[0].contains("VIOLATES")); +} + +#[test] +fn redteam_prompt_includes_failed_attempts_after_first_iteration() { + let invariant = AlwaysPassInvariant; + let agent = MockAgent::always_ok("I tried X but it didn't work."); + let config = RedTeamConfig { + num_iterations: 3, + num_fuzz_per_iteration: 0, + }; + + auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); + + let prompts = agent.recorded_prompts(); + assert_eq!(prompts.len(), 3); + + // First prompt should NOT contain "Previous Failed Attempts" + assert!(!prompts[0].contains("Previous Failed Attempts")); + + // Second prompt should contain the first attempt's approach + assert!(prompts[1].contains("Previous Failed Attempts")); + assert!(prompts[1].contains("I tried X but it didn't work.")); + + // Third prompt should contain both prior attempts + assert!(prompts[2].contains("Iteration 1")); + assert!(prompts[2].contains("Iteration 2")); +} + +#[test] +fn redteam_zero_iterations_returns_immediately() { + let invariant = AlwaysPassInvariant; + let agent = MockAgent::always_ok("should not be called"); + let config = RedTeamConfig { + num_iterations: 0, + num_fuzz_per_iteration: 0, + }; + + let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); + + match result { + RedTeamResult::NoViolation { attempts } => { + assert!(attempts.is_empty()); + } + _ => panic!("Expected NoViolation with empty attempts"), + } + + assert!(agent.recorded_prompts().is_empty()); +} + +#[test] +fn redteam_mixed_agent_responses() { + let invariant = AlwaysPassInvariant; + let agent = MockAgent::from_responses(vec![ + Ok(AgentResponse { + text: "first try".into(), + diff: None, + }), + Err(AgentError::new("transient error")), + Ok(AgentResponse { + text: "third try".into(), + diff: None, + }), + ]); + let config = RedTeamConfig { + num_iterations: 3, + num_fuzz_per_iteration: 0, + }; + + let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); + + match result { + RedTeamResult::NoViolation { attempts } => { + assert_eq!(attempts.len(), 3); + assert!(attempts[0].approach.contains("first try")); + assert_eq!(attempts[1].approach, "Agent invocation failed"); + assert!(attempts[2].approach.contains("third try")); + } + _ => panic!("Expected NoViolation"), + } +} + +// ========================================================================= +// AgentHarness trait object tests +// ========================================================================= + +#[test] +fn agent_harness_is_object_safe() { + // Verify we can use AgentHarness as a trait object + let agent: Box = Box::new(MockAgent::always_ok("hi")); + let resp = agent.invoke(Path::new("/tmp"), "hello").unwrap(); + assert_eq!(resp.text, "hi"); +} + +#[test] +fn agent_harness_works_with_arc() { + use std::sync::Arc; + let agent: Arc = Arc::new(MockAgent::always_ok("shared")); + let resp = agent.invoke(Path::new("/tmp"), "test").unwrap(); + assert_eq!(resp.text, "shared"); +} + +/// A custom multi-agent harness that fans out to N agents and returns the +/// first successful response. Demonstrates the trait's extensibility. +struct FirstSuccessHarness { + agents: Vec>, +} + +impl AgentHarness for FirstSuccessHarness { + fn invoke(&self, repo_dir: &Path, prompt: &str) -> Result { + for agent in &self.agents { + if let Ok(resp) = agent.invoke(repo_dir, prompt) { + return Ok(resp); + } + } + Err(AgentError::new("All agents failed")) + } +} + +#[test] +fn custom_multi_agent_harness() { + let harness = FirstSuccessHarness { + agents: vec![ + Box::new(MockAgent::always_err("agent 1 down")), + Box::new(MockAgent::always_err("agent 2 down")), + Box::new(MockAgent::always_ok("agent 3 succeeded")), + ], + }; + + let resp = harness.invoke(Path::new("/tmp"), "test").unwrap(); + assert_eq!(resp.text, "agent 3 succeeded"); +} + +#[test] +fn custom_multi_agent_all_fail() { + let harness = FirstSuccessHarness { + agents: vec![ + Box::new(MockAgent::always_err("nope")), + Box::new(MockAgent::always_err("nope")), + ], + }; + + let err = harness.invoke(Path::new("/tmp"), "test").unwrap_err(); + assert_eq!(err.message, "All agents failed"); +} + +#[test] +fn custom_harness_plugs_into_auto_redteam() { + let harness = FirstSuccessHarness { + agents: vec![ + Box::new(MockAgent::always_err("agent 1 down")), + Box::new(MockAgent::always_ok("agent 2 found nothing")), + ], + }; + + let invariant = AlwaysPassInvariant; + let config = RedTeamConfig { + num_iterations: 2, + num_fuzz_per_iteration: 0, + }; + + let result = auto_redteam(&invariant, &config, &harness, Path::new("/tmp")); + + match result { + RedTeamResult::NoViolation { attempts } => { + assert_eq!(attempts.len(), 2); + // The harness should have used agent 2's response + assert!(attempts[0].approach.contains("agent 2 found nothing")); + } + _ => panic!("Expected NoViolation"), + } +} From 99fa7cf2610a272eb7788cdc16b5b1f790d05f9a Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Tue, 31 Mar 2026 19:39:57 -0400 Subject: [PATCH 08/86] test(jolt-eval): add auto_optimize library function and mock-based tests Extracts the optimization loop from bin/optimize.rs into a testable library function `objective::optimize::auto_optimize` that takes: - `&dyn AgentHarness` for the AI agent - `&mut dyn OptimizeEnv` for side effects (measure, check invariants, apply diff, accept/reject) `OptimizeEnv` trait lets tests swap in a `MockOptimizeEnv` with controllable measurement sequences, invariant pass/fail schedules, and recorded side effects. 14 new optimization tests covering: - Accept on improvement (Minimize and Maximize directions) - Reject on regression - Reject when invariants fail despite improvement - Multi-iteration progressive improvement with mixed accept/reject - Early stop when agent produces no diff or errors - Zero iterations - Multiple objectives (any-improves-triggers-accept) - Prompt content: measurements, hint, past attempts - Diff application recording - Invariant failure mid-sequence Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/bin/optimize.rs | 344 ++++++------------- jolt-eval/src/objective/mod.rs | 1 + jolt-eval/src/objective/optimize.rs | 219 ++++++++++++ jolt-eval/tests/agent_test.rs | 494 ++++++++++++++++++++++++++++ 4 files changed, 812 insertions(+), 246 deletions(-) create mode 100644 jolt-eval/src/objective/optimize.rs diff --git a/jolt-eval/bin/optimize.rs b/jolt-eval/bin/optimize.rs index 942d84b67..84d305cc9 100644 --- a/jolt-eval/bin/optimize.rs +++ b/jolt-eval/bin/optimize.rs @@ -3,9 +3,8 @@ use std::process::Command; use std::sync::Arc; use clap::Parser; -use tracing::info; -use jolt_eval::agent::{apply_diff, truncate, AgentHarness, ClaudeCodeAgent}; +use jolt_eval::agent::ClaudeCodeAgent; use jolt_eval::invariant::completeness_prover::ProverCompletenessInvariant; use jolt_eval::invariant::completeness_verifier::VerifierCompletenessInvariant; use jolt_eval::invariant::determinism::DeterminismInvariant; @@ -15,12 +14,13 @@ use jolt_eval::invariant::synthesis::SynthesisRegistry; use jolt_eval::invariant::zk_consistency::ZkConsistencyInvariant; use jolt_eval::objective::guest_cycles::GuestCycleCountObjective; use jolt_eval::objective::inline_lengths::InlineLengthsObjective; +use jolt_eval::objective::optimize::{auto_optimize, OptimizeConfig, OptimizeEnv}; use jolt_eval::objective::peak_rss::PeakRssObjective; use jolt_eval::objective::proof_size::ProofSizeObjective; use jolt_eval::objective::prover_time::ProverTimeObjective; use jolt_eval::objective::verifier_time::VerifierTimeObjective; use jolt_eval::objective::wrapping_cost::WrappingCostObjective; -use jolt_eval::objective::{measure_objectives, Objective, OptimizationAttempt}; +use jolt_eval::objective::{measure_objectives, Direction, Objective}; use jolt_eval::TestCase; #[derive(Parser)] @@ -28,8 +28,6 @@ use jolt_eval::TestCase; #[command(about = "AI-driven optimization of Jolt objectives")] struct Cli { /// Objectives to optimize (comma-separated). Default: all. - /// Available: peak_rss, prover_time, proof_size, verifier_time, - /// guest_cycle_count, inline_lengths, wrapping_cost #[arg(long)] objectives: Option, @@ -58,6 +56,60 @@ struct Cli { hint: Option, } +/// Real environment backed by Jolt objectives, invariants, and git. +struct RealEnv { + objectives: Vec, + registry: SynthesisRegistry, + repo_dir: std::path::PathBuf, +} + +impl OptimizeEnv for RealEnv { + fn measure(&mut self) -> HashMap { + measure_objectives(&self.objectives) + } + + fn check_invariants(&mut self) -> bool { + self.registry.invariants().iter().all(|inv| { + let results = inv.run_checks(0); + results.iter().all(|r| r.is_ok()) + }) + } + + fn directions(&self) -> HashMap { + self.objectives + .iter() + .map(|o| (o.name().to_string(), o.direction())) + .collect() + } + + fn apply_diff(&mut self, diff: &str) { + if let Err(e) = jolt_eval::agent::apply_diff(&self.repo_dir, diff) { + tracing::warn!("Failed to apply diff: {e}"); + } + } + + fn accept(&mut self, iteration: usize) { + println!(" Improvement found -- keeping changes."); + let _ = Command::new("git") + .current_dir(&self.repo_dir) + .args(["add", "-A"]) + .status(); + let msg = format!("perf(auto-optimize): iteration {iteration}"); + let _ = Command::new("git") + .current_dir(&self.repo_dir) + .args(["commit", "-m", &msg, "--allow-empty"]) + .status(); + } + + fn reject(&mut self) { + println!(" Reverting changes."); + let _ = Command::new("git") + .current_dir(&self.repo_dir) + .args(["checkout", "."]) + .status(); + } +} + fn main() -> eyre::Result<()> { tracing_subscriber::fmt::init(); let cli = Cli::parse(); @@ -98,8 +150,7 @@ fn main() -> eyre::Result<()> { .collect(); if objectives.is_empty() { - eprintln!("No matching objectives found."); - eprintln!("Available: peak_rss, prover_time, proof_size, verifier_time, guest_cycle_count, inline_lengths, wrapping_cost"); + eprintln!("No matching objectives. Available: peak_rss, prover_time, proof_size, verifier_time, guest_cycle_count, inline_lengths, wrapping_cost"); std::process::exit(1); } @@ -107,223 +158,59 @@ fn main() -> eyre::Result<()> { let mut registry = SynthesisRegistry::new(); register_invariants(&mut registry, &test_case, &default_inputs); - let baseline = measure_objectives(&objectives); - println!("=== Baseline measurements ==="); - print_measurements(&objectives, &baseline); - println!(); - - let agent = ClaudeCodeAgent::new(&cli.model, cli.max_turns); let repo_dir = std::env::current_dir()?; - let mut attempts: Vec = Vec::new(); - let mut best = baseline.clone(); - - for iteration in 0..cli.iterations { - println!("=== Iteration {}/{} ===", iteration + 1, cli.iterations); - - let prompt = - build_optimize_prompt(&objectives, &best, &attempts, cli.hint.as_deref()); - - let response = match agent.invoke(&repo_dir, &prompt) { - Ok(r) => r, - Err(e) => { - info!("Agent error: {e}"); - break; - } - }; - - // Apply the agent's diff to the real repo - if let Some(diff) = &response.diff { - info!("Agent produced a diff ({} bytes), applying...", diff.len()); - if let Err(e) = apply_diff(&repo_dir, diff) { - tracing::warn!("Failed to apply diff: {e}"); - } - } else { - info!("Agent produced no code changes, stopping."); - break; - } - - let new_measurements = measure_objectives(&objectives); - println!(" Measurements after changes:"); - print_measurements(&objectives, &new_measurements); - - let invariants_passed = registry.invariants().iter().all(|inv| { - let results = inv.run_checks(0); - results.iter().all(|r| r.is_ok()) - }); - - if !invariants_passed { - println!(" Invariants FAILED -- reverting."); - revert_changes(&repo_dir); - } + let agent = ClaudeCodeAgent::new(&cli.model, cli.max_turns); + let config = OptimizeConfig { + num_iterations: cli.iterations, + hint: cli.hint.clone(), + }; - let improved = if invariants_passed { - objective_names.iter().any(|name| { - let old = best.get(name); - let new = new_measurements.get(name); - match (old, new) { - (Some(&o), Some(&n)) => { - let obj = objectives.iter().find(|obj| obj.name() == name); - match obj.map(|o| o.direction()) { - Some(jolt_eval::Direction::Minimize) => n < o, - Some(jolt_eval::Direction::Maximize) => n > o, - None => false, - } - } - _ => false, - } - }) - } else { - false - }; + let mut env = RealEnv { + objectives, + registry, + repo_dir, + }; - let diff_text = response.diff.as_deref().unwrap_or(""); - let attempt = OptimizationAttempt { - description: format!("iteration {}", iteration + 1), - diff: truncate(diff_text, 5000).to_string(), - measurements: new_measurements.clone(), - invariants_passed, - }; - attempts.push(attempt); - - if improved { - println!(" Improvement found -- keeping changes."); - best = new_measurements; - commit_changes(&repo_dir, iteration + 1); - } else if invariants_passed { - println!(" No improvement -- reverting."); - revert_changes(&repo_dir); - } + println!("=== Baseline measurements ==="); + let baseline = env.measure(); + print_measurements(&env.directions(), &baseline); + println!(); - println!(); - } + let result = auto_optimize(&agent, &mut env, &config, &std::env::current_dir()?); println!("=== Optimization summary ==="); println!( "{}/{} iterations produced improvements.", - attempts + result + .attempts .iter() .filter(|a| a.invariants_passed && a.measurements .iter() - .any(|(name, &val)| { baseline.get(name).is_some_and(|&b| val != b) })) + .any(|(name, &val)| { result.baseline.get(name).is_some_and(|&b| val != b) })) .count(), - attempts.len() + result.attempts.len() ); println!(); println!("Final measurements:"); - print_measurements(&objectives, &best); + print_measurements(&env.directions(), &result.best); Ok(()) } -fn build_optimize_prompt( - objectives: &[Objective], - current_best: &HashMap, - past_attempts: &[OptimizationAttempt], - hint: Option<&str>, -) -> String { - let mut prompt = String::new(); - - prompt.push_str( - "You are an expert performance engineer optimizing a zkVM (Jolt). \ - Your goal is to make code changes that improve the following objectives.\n\n", - ); - - prompt.push_str("## Objectives to optimize\n\n"); - for obj in objectives { - let dir = match obj.direction() { - jolt_eval::Direction::Minimize => "lower is better", - jolt_eval::Direction::Maximize => "higher is better", - }; - let current = current_best - .get(obj.name()) - .map(|v| format!("{v:.4}")) - .unwrap_or_else(|| "unknown".to_string()); - prompt.push_str(&format!( - "- **{}**: current = {}, direction = {}\n", - obj.name(), - current, - dir, - )); - } - prompt.push('\n'); - - prompt.push_str( - "## Instructions\n\n\ - 1. Read the relevant source code (especially `jolt-core/src/`) to understand \ - hot paths and potential optimization opportunities.\n\ - 2. Make targeted code changes that you believe will improve the objectives.\n\ - 3. Focus on changes to `jolt-core/` -- do NOT modify `jolt-eval/`.\n\ - 4. Prefer changes that are safe, correct, and unlikely to break invariants.\n\ - 5. Run `cargo clippy -p jolt-core --features host --message-format=short -q` \ - to verify your changes compile.\n\ - 6. Summarize what you changed and why you expect it to improve the objectives.\n\n", - ); - - if let Some(h) = hint { - prompt.push_str("## Hint\n\n"); - prompt.push_str(h); - prompt.push_str("\n\n"); - } - - if !past_attempts.is_empty() { - prompt.push_str("## Previous attempts\n\n"); - for attempt in past_attempts { - let status = if attempt.invariants_passed { - "invariants passed" - } else { - "INVARIANTS FAILED" - }; - prompt.push_str(&format!("- **{}** ({}): ", attempt.description, status)); - for (name, val) in &attempt.measurements { - prompt.push_str(&format!("{name}={val:.4} ")); - } - prompt.push('\n'); - } - prompt.push('\n'); - } - - prompt.push_str( - "## Output\n\n\ - Make your code changes directly. After you're done, summarize:\n\ - - What you changed\n\ - - Why you expect improvement\n\ - - Any risks or trade-offs\n", - ); - - prompt -} - -fn revert_changes(repo_dir: &std::path::Path) { - let _ = Command::new("git") - .current_dir(repo_dir) - .args(["checkout", "."]) - .status(); -} - -fn commit_changes(repo_dir: &std::path::Path, iteration: usize) { - let _ = Command::new("git") - .current_dir(repo_dir) - .args(["add", "-A"]) - .status(); - let msg = format!("perf(auto-optimize): iteration {iteration}"); - let _ = Command::new("git") - .current_dir(repo_dir) - .args(["commit", "-m", &msg, "--allow-empty"]) - .status(); -} - -fn print_measurements(objectives: &[Objective], measurements: &HashMap) { - for obj in objectives { +fn print_measurements(directions: &HashMap, measurements: &HashMap) { + let mut names: Vec<_> = directions.keys().collect(); + names.sort(); + for name in names { let val = measurements - .get(obj.name()) + .get(name) .map(|v| format!("{v:.4}")) .unwrap_or_else(|| "N/A".to_string()); - let dir = match obj.direction() { - jolt_eval::Direction::Minimize => "min", - jolt_eval::Direction::Maximize => "max", + let dir = match directions[name] { + Direction::Minimize => "min", + Direction::Maximize => "max", }; - println!(" {:<25} {:>15} {:>6}", obj.name(), val, dir); + println!(" {:<25} {:>15} {:>6}", name, val, dir); } } @@ -332,24 +219,12 @@ fn register_invariants( test_case: &Arc, default_inputs: &[u8], ) { - registry.register(Box::new(SoundnessInvariant::new( - Arc::clone(test_case), - default_inputs.to_vec(), - ))); - registry.register(Box::new(VerifierCompletenessInvariant::new(Arc::clone( - test_case, - )))); - registry.register(Box::new(ProverCompletenessInvariant::new(Arc::clone( - test_case, - )))); + registry.register(Box::new(SoundnessInvariant::new(Arc::clone(test_case), default_inputs.to_vec()))); + registry.register(Box::new(VerifierCompletenessInvariant::new(Arc::clone(test_case)))); + registry.register(Box::new(ProverCompletenessInvariant::new(Arc::clone(test_case)))); registry.register(Box::new(DeterminismInvariant::new(Arc::clone(test_case)))); - registry.register(Box::new(SerializationRoundtripInvariant::new( - Arc::clone(test_case), - default_inputs.to_vec(), - ))); - registry.register(Box::new(ZkConsistencyInvariant::new(Arc::clone( - test_case, - )))); + registry.register(Box::new(SerializationRoundtripInvariant::new(Arc::clone(test_case), default_inputs.to_vec()))); + registry.register(Box::new(ZkConsistencyInvariant::new(Arc::clone(test_case)))); } fn build_objectives( @@ -359,35 +234,12 @@ fn build_objectives( inputs: &[u8], ) -> Vec { vec![ - Objective::PeakRss(PeakRssObjective::new( - Arc::clone(test_case), - Arc::clone(prover_pp), - inputs.to_vec(), - )), - Objective::ProverTime(ProverTimeObjective::new( - Arc::clone(test_case), - Arc::clone(prover_pp), - inputs.to_vec(), - )), - Objective::ProofSize(ProofSizeObjective::new( - Arc::clone(test_case), - Arc::clone(prover_pp), - inputs.to_vec(), - )), - Objective::VerifierTime(VerifierTimeObjective::new( - Arc::clone(test_case), - Arc::clone(prover_pp), - Arc::clone(verifier_pp), - inputs.to_vec(), - )), - Objective::GuestCycleCount(GuestCycleCountObjective::new( - Arc::clone(test_case), - inputs.to_vec(), - )), + Objective::PeakRss(PeakRssObjective::new(Arc::clone(test_case), Arc::clone(prover_pp), inputs.to_vec())), + Objective::ProverTime(ProverTimeObjective::new(Arc::clone(test_case), Arc::clone(prover_pp), inputs.to_vec())), + Objective::ProofSize(ProofSizeObjective::new(Arc::clone(test_case), Arc::clone(prover_pp), inputs.to_vec())), + Objective::VerifierTime(VerifierTimeObjective::new(Arc::clone(test_case), Arc::clone(prover_pp), Arc::clone(verifier_pp), inputs.to_vec())), + Objective::GuestCycleCount(GuestCycleCountObjective::new(Arc::clone(test_case), inputs.to_vec())), Objective::InlineLengths(InlineLengthsObjective::new(Arc::clone(test_case))), - Objective::WrappingCost(WrappingCostObjective::new( - Arc::clone(test_case), - Arc::clone(prover_pp), - )), + Objective::WrappingCost(WrappingCostObjective::new(Arc::clone(test_case), Arc::clone(prover_pp))), ] } diff --git a/jolt-eval/src/objective/mod.rs b/jolt-eval/src/objective/mod.rs index ce6b66883..aaba9b47c 100644 --- a/jolt-eval/src/objective/mod.rs +++ b/jolt-eval/src/objective/mod.rs @@ -1,5 +1,6 @@ pub mod guest_cycles; pub mod inline_lengths; +pub mod optimize; pub mod peak_rss; pub mod proof_size; pub mod prover_time; diff --git a/jolt-eval/src/objective/optimize.rs b/jolt-eval/src/objective/optimize.rs new file mode 100644 index 000000000..96c5fca58 --- /dev/null +++ b/jolt-eval/src/objective/optimize.rs @@ -0,0 +1,219 @@ +use std::collections::HashMap; +use std::path::Path; + +use crate::agent::{truncate, AgentHarness}; +use crate::objective::{Direction, OptimizationAttempt}; + +/// Configuration for an optimization run. +pub struct OptimizeConfig { + pub num_iterations: usize, + pub hint: Option, +} + +impl Default for OptimizeConfig { + fn default() -> Self { + Self { + num_iterations: 5, + hint: None, + } + } +} + +/// Result of a complete optimization run. +pub struct OptimizeResult { + pub attempts: Vec, + pub baseline: HashMap, + pub best: HashMap, +} + +/// Environment trait that decouples the optimization loop from side effects. +/// +/// The real binary implements this with actual measurement, invariant +/// checking, and git operations. Tests supply a mock implementation. +pub trait OptimizeEnv { + /// Measure all objectives. Returns name -> value. + fn measure(&mut self) -> HashMap; + + /// Check all invariants. Returns `true` if they all pass. + fn check_invariants(&mut self) -> bool; + + /// Return the direction for each objective (name -> direction). + fn directions(&self) -> HashMap; + + /// Apply an agent-produced diff to the working tree. + fn apply_diff(&mut self, diff: &str); + + /// Called when a change is accepted (measurements improved, invariants passed). + fn accept(&mut self, iteration: usize); + + /// Called when a change is rejected (no improvement, or invariants failed). + fn reject(&mut self); +} + +/// Run an AI-driven optimization loop. +/// +/// Each iteration: +/// 1. Builds a prompt from objective directions, current best measurements, +/// past attempts, and an optional hint. +/// 2. Invokes the agent via [`AgentHarness`]. +/// 3. If the agent produced a diff, applies it via [`OptimizeEnv::apply_diff`]. +/// 4. Re-measures objectives and checks invariants. +/// 5. Accepts or rejects the change. +pub fn auto_optimize( + agent: &dyn AgentHarness, + env: &mut dyn OptimizeEnv, + config: &OptimizeConfig, + repo_dir: &Path, +) -> OptimizeResult { + let directions = env.directions(); + let baseline = env.measure(); + let mut best = baseline.clone(); + let mut attempts = Vec::new(); + + for iteration in 0..config.num_iterations { + let prompt = build_optimize_prompt(&directions, &best, &attempts, config.hint.as_deref()); + + let response = match agent.invoke(repo_dir, &prompt) { + Ok(r) => r, + Err(e) => { + tracing::info!("Agent error: {e}"); + break; + } + }; + + let diff_text = match &response.diff { + Some(d) => { + env.apply_diff(d); + d.clone() + } + None => { + tracing::info!("Agent produced no code changes, stopping."); + break; + } + }; + + let new_measurements = env.measure(); + let invariants_passed = env.check_invariants(); + + if !invariants_passed { + env.reject(); + } + + let improved = if invariants_passed { + directions.iter().any(|(name, dir)| { + let old = best.get(name); + let new = new_measurements.get(name); + match (old, new) { + (Some(&o), Some(&n)) => match dir { + Direction::Minimize => n < o, + Direction::Maximize => n > o, + }, + _ => false, + } + }) + } else { + false + }; + + let attempt = OptimizationAttempt { + description: format!("iteration {}", iteration + 1), + diff: truncate(&diff_text, 5000).to_string(), + measurements: new_measurements.clone(), + invariants_passed, + }; + attempts.push(attempt); + + if improved { + best = new_measurements; + env.accept(iteration + 1); + } else if invariants_passed { + env.reject(); + } + } + + OptimizeResult { + attempts, + baseline, + best, + } +} + +fn build_optimize_prompt( + directions: &HashMap, + current_best: &HashMap, + past_attempts: &[OptimizationAttempt], + hint: Option<&str>, +) -> String { + let mut prompt = String::new(); + + prompt.push_str( + "You are an expert performance engineer optimizing a zkVM (Jolt). \ + Your goal is to make code changes that improve the following objectives.\n\n", + ); + + prompt.push_str("## Objectives to optimize\n\n"); + let mut names: Vec<_> = directions.keys().collect(); + names.sort(); + for name in &names { + let dir = match directions[*name] { + Direction::Minimize => "lower is better", + Direction::Maximize => "higher is better", + }; + let current = current_best + .get(*name) + .map(|v| format!("{v:.4}")) + .unwrap_or_else(|| "unknown".to_string()); + prompt.push_str(&format!( + "- **{name}**: current = {current}, direction = {dir}\n", + )); + } + prompt.push('\n'); + + prompt.push_str( + "## Instructions\n\n\ + 1. Read the relevant source code (especially `jolt-core/src/`) to understand \ + hot paths and potential optimization opportunities.\n\ + 2. Make targeted code changes that you believe will improve the objectives.\n\ + 3. Focus on changes to `jolt-core/` -- do NOT modify `jolt-eval/`.\n\ + 4. Prefer changes that are safe, correct, and unlikely to break invariants.\n\ + 5. Run `cargo clippy -p jolt-core --features host --message-format=short -q` \ + to verify your changes compile.\n\ + 6. Summarize what you changed and why you expect it to improve the objectives.\n\n", + ); + + if let Some(h) = hint { + prompt.push_str("## Hint\n\n"); + prompt.push_str(h); + prompt.push_str("\n\n"); + } + + if !past_attempts.is_empty() { + prompt.push_str("## Previous attempts\n\n"); + for attempt in past_attempts { + let status = if attempt.invariants_passed { + "invariants passed" + } else { + "INVARIANTS FAILED" + }; + prompt.push_str(&format!("- **{}** ({}): ", attempt.description, status)); + let mut keys: Vec<_> = attempt.measurements.keys().collect(); + keys.sort(); + for name in keys { + let val = attempt.measurements[name]; + prompt.push_str(&format!("{name}={val:.4} ")); + } + prompt.push('\n'); + } + prompt.push('\n'); + } + + prompt.push_str( + "## Output\n\n\ + Make your code changes directly. After you're done, summarize:\n\ + - What you changed\n\ + - Why you expect improvement\n\ + - Any risks or trade-offs\n", + ); + + prompt +} diff --git a/jolt-eval/tests/agent_test.rs b/jolt-eval/tests/agent_test.rs index b4b03dcac..c16068a6c 100644 --- a/jolt-eval/tests/agent_test.rs +++ b/jolt-eval/tests/agent_test.rs @@ -1,9 +1,12 @@ +use std::collections::HashMap; use std::path::Path; use enumset::EnumSet; use jolt_eval::agent::{AgentError, AgentHarness, AgentResponse, MockAgent}; use jolt_eval::invariant::synthesis::redteam::{auto_redteam, RedTeamConfig, RedTeamResult}; use jolt_eval::invariant::{Invariant, InvariantViolation, SynthesisTarget}; +use jolt_eval::objective::optimize::{auto_optimize, OptimizeConfig, OptimizeEnv}; +use jolt_eval::objective::Direction; // ========================================================================= // Test invariants @@ -482,3 +485,494 @@ fn custom_harness_plugs_into_auto_redteam() { _ => panic!("Expected NoViolation"), } } + +// ========================================================================= +// Mock OptimizeEnv +// ========================================================================= + +/// Mock environment for testing the optimization loop. +/// +/// - `measurements` is a queue of measurement snapshots. Each call to +/// `measure()` pops the front. When one entry remains it repeats. +/// - `invariants_pass` controls whether invariants pass each iteration. +/// - Side effects (apply/accept/reject) are recorded for assertions. +struct MockOptimizeEnv { + directions: HashMap, + measurements: Vec>, + measure_index: usize, + invariants_pass: Vec, + invariant_index: usize, + applied_diffs: Vec, + accepted: Vec, + rejected: usize, +} + +impl MockOptimizeEnv { + fn new(directions: HashMap) -> Self { + Self { + directions, + measurements: vec![], + measure_index: 0, + invariants_pass: vec![true], + invariant_index: 0, + applied_diffs: vec![], + accepted: vec![], + rejected: 0, + } + } + + fn with_measurements(mut self, measurements: Vec>) -> Self { + self.measurements = measurements; + self + } + + fn with_invariants(mut self, pass: Vec) -> Self { + self.invariants_pass = pass; + self + } +} + +impl OptimizeEnv for MockOptimizeEnv { + fn measure(&mut self) -> HashMap { + if self.measurements.is_empty() { + return HashMap::new(); + } + let idx = self.measure_index.min(self.measurements.len() - 1); + self.measure_index += 1; + self.measurements[idx].clone() + } + + fn check_invariants(&mut self) -> bool { + if self.invariants_pass.is_empty() { + return true; + } + let idx = self.invariant_index.min(self.invariants_pass.len() - 1); + self.invariant_index += 1; + self.invariants_pass[idx] + } + + fn directions(&self) -> HashMap { + self.directions.clone() + } + + fn apply_diff(&mut self, diff: &str) { + self.applied_diffs.push(diff.to_string()); + } + + fn accept(&mut self, iteration: usize) { + self.accepted.push(iteration); + } + + fn reject(&mut self) { + self.rejected += 1; + } +} + +fn m(pairs: &[(&str, f64)]) -> HashMap { + pairs.iter().map(|(k, v)| (k.to_string(), *v)).collect() +} + +fn d(pairs: &[(&str, Direction)]) -> HashMap { + pairs.iter().map(|(k, v)| (k.to_string(), *v)).collect() +} + +// ========================================================================= +// auto_optimize tests +// ========================================================================= + +#[test] +fn optimize_accepts_improvement() { + let agent = MockAgent::from_responses(vec![Ok(AgentResponse { + text: "I optimized X".into(), + diff: Some("fake diff".into()), + })]); + + let mut env = MockOptimizeEnv::new(d(&[("time", Direction::Minimize)])) + .with_measurements(vec![ + m(&[("time", 10.0)]), // baseline + m(&[("time", 8.0)]), // improved + ]); + + let config = OptimizeConfig { + num_iterations: 1, + hint: None, + }; + + let result = auto_optimize(&agent, &mut env, &config, Path::new("/tmp")); + + assert_eq!(result.attempts.len(), 1); + assert!(result.attempts[0].invariants_passed); + assert_eq!(result.best["time"], 8.0); + assert_eq!(env.accepted, vec![1]); + assert_eq!(env.rejected, 0); + assert_eq!(env.applied_diffs.len(), 1); +} + +#[test] +fn optimize_rejects_regression() { + let agent = MockAgent::from_responses(vec![Ok(AgentResponse { + text: "I tried something".into(), + diff: Some("bad diff".into()), + })]); + + let mut env = MockOptimizeEnv::new(d(&[("time", Direction::Minimize)])) + .with_measurements(vec![ + m(&[("time", 10.0)]), // baseline + m(&[("time", 12.0)]), // regression + ]); + + let config = OptimizeConfig { + num_iterations: 1, + hint: None, + }; + + let result = auto_optimize(&agent, &mut env, &config, Path::new("/tmp")); + + assert_eq!(result.attempts.len(), 1); + assert!(result.attempts[0].invariants_passed); + // Best stays at baseline because regression was rejected + assert_eq!(result.best["time"], 10.0); + assert!(env.accepted.is_empty()); + assert_eq!(env.rejected, 1); +} + +#[test] +fn optimize_rejects_when_invariants_fail() { + let agent = MockAgent::from_responses(vec![Ok(AgentResponse { + text: "I broke something".into(), + diff: Some("breaking diff".into()), + })]); + + let mut env = MockOptimizeEnv::new(d(&[("time", Direction::Minimize)])) + .with_measurements(vec![ + m(&[("time", 10.0)]), // baseline + m(&[("time", 5.0)]), // looks improved but invariants fail + ]) + .with_invariants(vec![false]); + + let config = OptimizeConfig { + num_iterations: 1, + hint: None, + }; + + let result = auto_optimize(&agent, &mut env, &config, Path::new("/tmp")); + + assert_eq!(result.attempts.len(), 1); + assert!(!result.attempts[0].invariants_passed); + assert_eq!(result.best["time"], 10.0); // rejected despite improvement + assert!(env.accepted.is_empty()); + assert_eq!(env.rejected, 1); // rejected because invariants failed +} + +#[test] +fn optimize_maximize_direction() { + let agent = MockAgent::from_responses(vec![Ok(AgentResponse { + text: "more inlines".into(), + diff: Some("diff".into()), + })]); + + let mut env = MockOptimizeEnv::new(d(&[("inlines", Direction::Maximize)])) + .with_measurements(vec![ + m(&[("inlines", 100.0)]), // baseline + m(&[("inlines", 150.0)]), // improvement (higher is better) + ]); + + let config = OptimizeConfig { + num_iterations: 1, + hint: None, + }; + + let result = auto_optimize(&agent, &mut env, &config, Path::new("/tmp")); + + assert_eq!(result.best["inlines"], 150.0); + assert_eq!(env.accepted, vec![1]); +} + +#[test] +fn optimize_maximize_rejects_decrease() { + let agent = MockAgent::from_responses(vec![Ok(AgentResponse { + text: "oops".into(), + diff: Some("diff".into()), + })]); + + let mut env = MockOptimizeEnv::new(d(&[("inlines", Direction::Maximize)])) + .with_measurements(vec![ + m(&[("inlines", 100.0)]), + m(&[("inlines", 80.0)]), // regression for Maximize + ]); + + let config = OptimizeConfig { + num_iterations: 1, + hint: None, + }; + + let result = auto_optimize(&agent, &mut env, &config, Path::new("/tmp")); + + assert_eq!(result.best["inlines"], 100.0); + assert!(env.accepted.is_empty()); + assert_eq!(env.rejected, 1); +} + +#[test] +fn optimize_multi_iteration_progressive_improvement() { + let agent = MockAgent::from_responses(vec![ + Ok(AgentResponse { + text: "iter 1".into(), + diff: Some("diff1".into()), + }), + Ok(AgentResponse { + text: "iter 2".into(), + diff: Some("diff2".into()), + }), + Ok(AgentResponse { + text: "iter 3".into(), + diff: Some("diff3".into()), + }), + ]); + + let mut env = MockOptimizeEnv::new(d(&[("time", Direction::Minimize)])) + .with_measurements(vec![ + m(&[("time", 10.0)]), // baseline + m(&[("time", 8.0)]), // iter 1: improvement + m(&[("time", 9.0)]), // iter 2: regression from 8.0 + m(&[("time", 6.0)]), // iter 3: improvement from 8.0 + ]); + + let config = OptimizeConfig { + num_iterations: 3, + hint: None, + }; + + let result = auto_optimize(&agent, &mut env, &config, Path::new("/tmp")); + + assert_eq!(result.attempts.len(), 3); + assert_eq!(result.best["time"], 6.0); + assert_eq!(env.accepted, vec![1, 3]); // iters 1 and 3 accepted + assert_eq!(env.rejected, 1); // iter 2 rejected +} + +#[test] +fn optimize_stops_when_agent_produces_no_diff() { + let agent = MockAgent::from_responses(vec![ + Ok(AgentResponse { + text: "I made a change".into(), + diff: Some("diff1".into()), + }), + Ok(AgentResponse { + text: "I couldn't find anything else".into(), + diff: None, // no diff -> should stop + }), + ]); + + let mut env = MockOptimizeEnv::new(d(&[("time", Direction::Minimize)])) + .with_measurements(vec![ + m(&[("time", 10.0)]), + m(&[("time", 9.0)]), + ]); + + let config = OptimizeConfig { + num_iterations: 5, + hint: None, + }; + + let result = auto_optimize(&agent, &mut env, &config, Path::new("/tmp")); + + // Only 1 attempt recorded (second iteration stopped before measurement) + assert_eq!(result.attempts.len(), 1); +} + +#[test] +fn optimize_stops_when_agent_errors() { + let agent = MockAgent::from_responses(vec![ + Ok(AgentResponse { + text: "change 1".into(), + diff: Some("diff".into()), + }), + Err(AgentError::new("agent crashed")), + ]); + + let mut env = MockOptimizeEnv::new(d(&[("time", Direction::Minimize)])) + .with_measurements(vec![ + m(&[("time", 10.0)]), + m(&[("time", 10.0)]), + ]); + + let config = OptimizeConfig { + num_iterations: 5, + hint: None, + }; + + let result = auto_optimize(&agent, &mut env, &config, Path::new("/tmp")); + + assert_eq!(result.attempts.len(), 1); +} + +#[test] +fn optimize_zero_iterations() { + let agent = MockAgent::always_ok("should not be called"); + let mut env = MockOptimizeEnv::new(d(&[("time", Direction::Minimize)])) + .with_measurements(vec![m(&[("time", 10.0)])]); + + let config = OptimizeConfig { + num_iterations: 0, + hint: None, + }; + + let result = auto_optimize(&agent, &mut env, &config, Path::new("/tmp")); + + assert!(result.attempts.is_empty()); + assert_eq!(result.baseline["time"], 10.0); + assert_eq!(result.best["time"], 10.0); + assert!(agent.recorded_prompts().is_empty()); +} + +#[test] +fn optimize_multiple_objectives() { + let agent = MockAgent::from_responses(vec![Ok(AgentResponse { + text: "optimized".into(), + diff: Some("diff".into()), + })]); + + // time improves, size regresses, but any improvement triggers accept + let mut env = MockOptimizeEnv::new(d(&[ + ("time", Direction::Minimize), + ("size", Direction::Minimize), + ])) + .with_measurements(vec![ + m(&[("time", 10.0), ("size", 100.0)]), + m(&[("time", 8.0), ("size", 110.0)]), + ]); + + let config = OptimizeConfig { + num_iterations: 1, + hint: None, + }; + + let result = auto_optimize(&agent, &mut env, &config, Path::new("/tmp")); + + // Accepted because time improved (even though size regressed) + assert_eq!(env.accepted, vec![1]); + assert_eq!(result.best["time"], 8.0); + assert_eq!(result.best["size"], 110.0); +} + +#[test] +fn optimize_prompt_includes_measurements_and_hint() { + let agent = MockAgent::from_responses(vec![Ok(AgentResponse { + text: "done".into(), + diff: Some("diff".into()), + })]); + + let mut env = MockOptimizeEnv::new(d(&[("time", Direction::Minimize)])) + .with_measurements(vec![ + m(&[("time", 42.0)]), + m(&[("time", 42.0)]), + ]); + + let config = OptimizeConfig { + num_iterations: 1, + hint: Some("Focus on the inner loop".into()), + }; + + auto_optimize(&agent, &mut env, &config, Path::new("/tmp")); + + let prompts = agent.recorded_prompts(); + assert_eq!(prompts.len(), 1); + assert!(prompts[0].contains("42.0")); + assert!(prompts[0].contains("lower is better")); + assert!(prompts[0].contains("Focus on the inner loop")); +} + +#[test] +fn optimize_prompt_includes_past_attempts() { + let agent = MockAgent::from_responses(vec![ + Ok(AgentResponse { + text: "attempt 1".into(), + diff: Some("d1".into()), + }), + Ok(AgentResponse { + text: "attempt 2".into(), + diff: Some("d2".into()), + }), + ]); + + let mut env = MockOptimizeEnv::new(d(&[("time", Direction::Minimize)])) + .with_measurements(vec![ + m(&[("time", 10.0)]), + m(&[("time", 10.0)]), // no improvement + m(&[("time", 10.0)]), + ]); + + let config = OptimizeConfig { + num_iterations: 2, + hint: None, + }; + + auto_optimize(&agent, &mut env, &config, Path::new("/tmp")); + + let prompts = agent.recorded_prompts(); + assert_eq!(prompts.len(), 2); + // First prompt: no past attempts + assert!(!prompts[0].contains("Previous attempts")); + // Second prompt: includes iteration 1's results + assert!(prompts[1].contains("Previous attempts")); + assert!(prompts[1].contains("iteration 1")); +} + +#[test] +fn optimize_diff_is_applied() { + let agent = MockAgent::from_responses(vec![Ok(AgentResponse { + text: "changed something".into(), + diff: Some("--- a/x\n+++ b/x\n".into()), + })]); + + let mut env = MockOptimizeEnv::new(d(&[("time", Direction::Minimize)])) + .with_measurements(vec![ + m(&[("time", 10.0)]), + m(&[("time", 10.0)]), + ]); + + let config = OptimizeConfig { + num_iterations: 1, + hint: None, + }; + + auto_optimize(&agent, &mut env, &config, Path::new("/tmp")); + + assert_eq!(env.applied_diffs.len(), 1); + assert!(env.applied_diffs[0].contains("--- a/x")); +} + +#[test] +fn optimize_invariant_failure_mid_sequence() { + // 3 iterations: improve, invariant fail, improve + let agent = MockAgent::from_responses(vec![ + Ok(AgentResponse { text: "i1".into(), diff: Some("d1".into()) }), + Ok(AgentResponse { text: "i2".into(), diff: Some("d2".into()) }), + Ok(AgentResponse { text: "i3".into(), diff: Some("d3".into()) }), + ]); + + let mut env = MockOptimizeEnv::new(d(&[("time", Direction::Minimize)])) + .with_measurements(vec![ + m(&[("time", 10.0)]), // baseline + m(&[("time", 8.0)]), // iter 1: improvement + m(&[("time", 5.0)]), // iter 2: would be improvement but invariants fail + m(&[("time", 7.0)]), // iter 3: improvement + ]) + .with_invariants(vec![true, false, true]); + + let config = OptimizeConfig { + num_iterations: 3, + hint: None, + }; + + let result = auto_optimize(&agent, &mut env, &config, Path::new("/tmp")); + + assert_eq!(result.attempts.len(), 3); + assert!(result.attempts[0].invariants_passed); + assert!(!result.attempts[1].invariants_passed); + assert!(result.attempts[2].invariants_passed); + assert_eq!(env.accepted, vec![1, 3]); + assert_eq!(env.rejected, 1); // iter 2 rejected (invariant failure) + assert_eq!(result.best["time"], 7.0); +} From 076a17618ac9b499b8c714a0ff214d3db63d65e9 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Tue, 31 Mar 2026 20:27:50 -0400 Subject: [PATCH 09/86] refactor(jolt-eval): replace hardcoded invariant/objective names with single source of truth Adds `BUILTIN_INVARIANT_NAMES` const and `names()`/`names_for_target()` methods to `SynthesisRegistry`. All four CLI binaries now derive their `--list` output and error messages from these instead of duplicating name strings. Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/bin/check_invariants.rs | 4 ++-- jolt-eval/bin/fuzz.rs | 18 +++++---------- jolt-eval/bin/optimize.rs | 8 +++---- jolt-eval/bin/redteam.rs | 11 ++++------ jolt-eval/src/invariant/synthesis/mod.rs | 28 ++++++++++++++++++++++++ 5 files changed, 42 insertions(+), 27 deletions(-) diff --git a/jolt-eval/bin/check_invariants.rs b/jolt-eval/bin/check_invariants.rs index 3eb8d4666..bd1cf4491 100644 --- a/jolt-eval/bin/check_invariants.rs +++ b/jolt-eval/bin/check_invariants.rs @@ -8,7 +8,7 @@ use jolt_eval::invariant::completeness_verifier::VerifierCompletenessInvariant; use jolt_eval::invariant::determinism::DeterminismInvariant; use jolt_eval::invariant::serialization_roundtrip::SerializationRoundtripInvariant; use jolt_eval::invariant::soundness::SoundnessInvariant; -use jolt_eval::invariant::synthesis::SynthesisRegistry; +use jolt_eval::invariant::synthesis::{SynthesisRegistry, BUILTIN_INVARIANT_NAMES}; use jolt_eval::invariant::zk_consistency::ZkConsistencyInvariant; use jolt_eval::invariant::{DynInvariant, InvariantReport}; use jolt_eval::TestCase; @@ -85,7 +85,7 @@ fn main() -> eyre::Result<()> { if invariants.is_empty() { eprintln!("No matching invariants found."); if let Some(name) = &cli.invariant { - eprintln!("Available: soundness, verifier_completeness, prover_completeness, determinism, serialization_roundtrip, zk_consistency"); + eprintln!("Available: {}", BUILTIN_INVARIANT_NAMES.join(", ")); eprintln!("Requested: {name}"); } std::process::exit(1); diff --git a/jolt-eval/bin/fuzz.rs b/jolt-eval/bin/fuzz.rs index 5ddf8fb6b..588b207e1 100644 --- a/jolt-eval/bin/fuzz.rs +++ b/jolt-eval/bin/fuzz.rs @@ -7,7 +7,7 @@ use jolt_eval::invariant::completeness_verifier::VerifierCompletenessInvariant; use jolt_eval::invariant::determinism::DeterminismInvariant; use jolt_eval::invariant::serialization_roundtrip::SerializationRoundtripInvariant; use jolt_eval::invariant::soundness::SoundnessInvariant; -use jolt_eval::invariant::synthesis::SynthesisRegistry; +use jolt_eval::invariant::synthesis::{SynthesisRegistry, BUILTIN_INVARIANT_NAMES}; use jolt_eval::invariant::zk_consistency::ZkConsistencyInvariant; use jolt_eval::invariant::{DynInvariant, InvariantReport, SynthesisTarget}; use jolt_eval::TestCase; @@ -69,8 +69,10 @@ fn main() -> eyre::Result<()> { eprintln!("Error: --elf is required. Provide a pre-compiled guest ELF."); std::process::exit(1); } else { - // --list doesn't need an ELF; use a dummy to populate names - print_available_invariants(); + println!("Fuzzable invariants:"); + for name in BUILTIN_INVARIANT_NAMES { + println!(" {name}"); + } return Ok(()); }; @@ -216,16 +218,6 @@ fn register_invariants( )))); } -fn print_available_invariants() { - println!("Fuzzable invariants:"); - println!(" soundness"); - println!(" verifier_completeness"); - println!(" prover_completeness"); - println!(" determinism"); - println!(" serialization_roundtrip"); - println!(" zk_consistency"); -} - fn print_report(report: &InvariantReport) { if report.failed == 0 { println!( diff --git a/jolt-eval/bin/optimize.rs b/jolt-eval/bin/optimize.rs index 84d305cc9..b757c4186 100644 --- a/jolt-eval/bin/optimize.rs +++ b/jolt-eval/bin/optimize.rs @@ -135,13 +135,11 @@ fn main() -> eyre::Result<()> { let verifier_pp = Arc::new(TestCase::verifier_preprocessing(&prover_pp)); let all_objectives = build_objectives(&test_case, &prover_pp, &verifier_pp, &inputs); + let all_names: Vec = all_objectives.iter().map(|o| o.name().to_string()).collect(); let objective_names: Vec = if let Some(names) = &cli.objectives { names.split(',').map(|s| s.trim().to_string()).collect() } else { - all_objectives - .iter() - .map(|o| o.name().to_string()) - .collect() + all_names.clone() }; let objectives: Vec = all_objectives @@ -150,7 +148,7 @@ fn main() -> eyre::Result<()> { .collect(); if objectives.is_empty() { - eprintln!("No matching objectives. Available: peak_rss, prover_time, proof_size, verifier_time, guest_cycle_count, inline_lengths, wrapping_cost"); + eprintln!("No matching objectives. Available: {}", all_names.join(", ")); std::process::exit(1); } diff --git a/jolt-eval/bin/redteam.rs b/jolt-eval/bin/redteam.rs index a90b5f362..cb815bddf 100644 --- a/jolt-eval/bin/redteam.rs +++ b/jolt-eval/bin/redteam.rs @@ -10,7 +10,7 @@ use jolt_eval::invariant::determinism::DeterminismInvariant; use jolt_eval::invariant::serialization_roundtrip::SerializationRoundtripInvariant; use jolt_eval::invariant::soundness::SoundnessInvariant; use jolt_eval::invariant::synthesis::redteam::{auto_redteam, RedTeamConfig, RedTeamResult}; -use jolt_eval::invariant::synthesis::SynthesisRegistry; +use jolt_eval::invariant::synthesis::{SynthesisRegistry, BUILTIN_INVARIANT_NAMES}; use jolt_eval::invariant::zk_consistency::ZkConsistencyInvariant; use jolt_eval::invariant::SynthesisTarget; use jolt_eval::TestCase; @@ -58,12 +58,9 @@ fn main() -> eyre::Result<()> { if cli.list { println!("Red-teamable invariants:"); - println!(" soundness"); - println!(" verifier_completeness"); - println!(" prover_completeness"); - println!(" determinism"); - println!(" serialization_roundtrip"); - println!(" zk_consistency"); + for name in BUILTIN_INVARIANT_NAMES { + println!(" {name}"); + } return Ok(()); } diff --git a/jolt-eval/src/invariant/synthesis/mod.rs b/jolt-eval/src/invariant/synthesis/mod.rs index 1042aca24..0d7a1877b 100644 --- a/jolt-eval/src/invariant/synthesis/mod.rs +++ b/jolt-eval/src/invariant/synthesis/mod.rs @@ -39,3 +39,31 @@ impl Default for SynthesisRegistry { Self::new() } } + +/// Return all registered invariant names. +impl SynthesisRegistry { + pub fn names(&self) -> Vec<&str> { + self.invariants.iter().map(|inv| inv.name()).collect() + } + + pub fn names_for_target(&self, target: SynthesisTarget) -> Vec<&str> { + self.for_target(target) + .iter() + .map(|inv| inv.name()) + .collect() + } +} + +/// Canonical list of built-in Jolt invariant names. +/// +/// This is the single source of truth used by all CLI binaries for +/// `--list` output and error messages. It does not require constructing +/// a `TestCase` or `SynthesisRegistry`. +pub const BUILTIN_INVARIANT_NAMES: &[&str] = &[ + "soundness", + "verifier_completeness", + "prover_completeness", + "determinism", + "serialization_roundtrip", + "zk_consistency", +]; From 6301ac17c9527fd77cc1734a9ed27856d17d229b Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Tue, 31 Mar 2026 20:52:54 -0400 Subject: [PATCH 10/86] refactor(jolt-eval): use inventory crate for invariant and objective registries Each invariant and objective module now calls `inventory::submit!` with an entry struct (`InvariantEntry` / `ObjectiveEntry`) containing the name, metadata, and a factory function. At runtime, `inventory::iter` discovers all registered entries automatically. Key changes: - `InvariantEntry` (name, targets, build fn) + `inventory::collect!` in invariant/mod.rs; each of the 6 invariant files submits one. - `ObjectiveEntry` (name, direction, build fn) + `inventory::collect!` in objective/mod.rs; each of the 7 objective files submits one. - `SynthesisRegistry::from_inventory(test_case, inputs)` replaces manual `register_invariants` functions in all binaries. - `build_objectives_from_inventory(setup, inputs)` and `measure_dyn` replace manual `build_objectives` functions. - `invariant_names()` replaces `BUILTIN_INVARIANT_NAMES` const -- names are derived from the inventory, not hardcoded. - All 5 CLI binaries simplified: no more per-binary registration boilerplate. Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/Cargo.toml | 1 + jolt-eval/bin/check_invariants.rs | 41 +------ jolt-eval/bin/fuzz.rs | 65 ++--------- jolt-eval/bin/measure_objectives.rs | 73 ++---------- jolt-eval/bin/optimize.rs | 110 ++++++------------ jolt-eval/bin/redteam.rs | 39 +------ .../src/invariant/completeness_prover.rs | 10 +- .../src/invariant/completeness_verifier.rs | 10 +- jolt-eval/src/invariant/determinism.rs | 10 +- jolt-eval/src/invariant/mod.rs | 20 ++++ .../src/invariant/serialization_roundtrip.rs | 10 +- jolt-eval/src/invariant/soundness.rs | 10 +- jolt-eval/src/invariant/synthesis/mod.rs | 47 ++++---- jolt-eval/src/invariant/zk_consistency.rs | 10 +- jolt-eval/src/objective/guest_cycles.rs | 12 +- jolt-eval/src/objective/inline_lengths.rs | 10 +- jolt-eval/src/objective/mod.rs | 39 +++++++ jolt-eval/src/objective/peak_rss.rs | 12 +- jolt-eval/src/objective/proof_size.rs | 12 +- jolt-eval/src/objective/prover_time.rs | 12 +- jolt-eval/src/objective/verifier_time.rs | 13 ++- jolt-eval/src/objective/wrapping_cost.rs | 12 +- 22 files changed, 279 insertions(+), 299 deletions(-) diff --git a/jolt-eval/Cargo.toml b/jolt-eval/Cargo.toml index f8eabe4fc..3b55c4876 100644 --- a/jolt-eval/Cargo.toml +++ b/jolt-eval/Cargo.toml @@ -23,6 +23,7 @@ tracing-subscriber = { workspace = true } arbitrary = { version = "1", features = ["derive"] } enumset = "1" +inventory = { workspace = true } tempfile = "3" jolt-eval-macros = { path = "macros" } diff --git a/jolt-eval/bin/check_invariants.rs b/jolt-eval/bin/check_invariants.rs index bd1cf4491..1914486d5 100644 --- a/jolt-eval/bin/check_invariants.rs +++ b/jolt-eval/bin/check_invariants.rs @@ -3,13 +3,7 @@ use std::sync::Arc; use clap::Parser; use tracing::info; -use jolt_eval::invariant::completeness_prover::ProverCompletenessInvariant; -use jolt_eval::invariant::completeness_verifier::VerifierCompletenessInvariant; -use jolt_eval::invariant::determinism::DeterminismInvariant; -use jolt_eval::invariant::serialization_roundtrip::SerializationRoundtripInvariant; -use jolt_eval::invariant::soundness::SoundnessInvariant; -use jolt_eval::invariant::synthesis::{SynthesisRegistry, BUILTIN_INVARIANT_NAMES}; -use jolt_eval::invariant::zk_consistency::ZkConsistencyInvariant; +use jolt_eval::invariant::synthesis::{invariant_names, SynthesisRegistry}; use jolt_eval::invariant::{DynInvariant, InvariantReport}; use jolt_eval::TestCase; @@ -56,16 +50,10 @@ fn main() -> eyre::Result<()> { }) } else { eprintln!("Error: --elf is required. Provide a pre-compiled guest ELF."); - eprintln!( - "Example: compile with `cargo build -p --release` then pass the ELF path." - ); std::process::exit(1); }; - let default_inputs = vec![]; - - let mut registry = SynthesisRegistry::new(); - register_invariants(&mut registry, &test_case, &default_inputs); + let registry = SynthesisRegistry::from_inventory(test_case, vec![]); let invariants: Vec<&dyn DynInvariant> = if let Some(name) = &cli.invariant { registry @@ -85,7 +73,7 @@ fn main() -> eyre::Result<()> { if invariants.is_empty() { eprintln!("No matching invariants found."); if let Some(name) = &cli.invariant { - eprintln!("Available: {}", BUILTIN_INVARIANT_NAMES.join(", ")); + eprintln!("Available: {}", invariant_names().join(", ")); eprintln!("Requested: {name}"); } std::process::exit(1); @@ -112,29 +100,6 @@ fn main() -> eyre::Result<()> { Ok(()) } -fn register_invariants( - registry: &mut SynthesisRegistry, - test_case: &Arc, - default_inputs: &[u8], -) { - registry.register(Box::new(SoundnessInvariant::new( - Arc::clone(test_case), - default_inputs.to_vec(), - ))); - registry.register(Box::new(VerifierCompletenessInvariant::new(Arc::clone( - test_case, - )))); - registry.register(Box::new(ProverCompletenessInvariant::new(Arc::clone( - test_case, - )))); - registry.register(Box::new(DeterminismInvariant::new(Arc::clone(test_case)))); - registry.register(Box::new(SerializationRoundtripInvariant::new( - Arc::clone(test_case), - default_inputs.to_vec(), - ))); - registry.register(Box::new(ZkConsistencyInvariant::new(Arc::clone(test_case)))); -} - fn print_report(report: &InvariantReport) { println!( " {} — {}/{} passed", diff --git a/jolt-eval/bin/fuzz.rs b/jolt-eval/bin/fuzz.rs index 588b207e1..c99cb798e 100644 --- a/jolt-eval/bin/fuzz.rs +++ b/jolt-eval/bin/fuzz.rs @@ -2,13 +2,8 @@ use std::sync::Arc; use std::time::{Duration, Instant}; use clap::Parser; -use jolt_eval::invariant::completeness_prover::ProverCompletenessInvariant; -use jolt_eval::invariant::completeness_verifier::VerifierCompletenessInvariant; -use jolt_eval::invariant::determinism::DeterminismInvariant; -use jolt_eval::invariant::serialization_roundtrip::SerializationRoundtripInvariant; -use jolt_eval::invariant::soundness::SoundnessInvariant; -use jolt_eval::invariant::synthesis::{SynthesisRegistry, BUILTIN_INVARIANT_NAMES}; -use jolt_eval::invariant::zk_consistency::ZkConsistencyInvariant; + +use jolt_eval::invariant::synthesis::{invariant_names, SynthesisRegistry}; use jolt_eval::invariant::{DynInvariant, InvariantReport, SynthesisTarget}; use jolt_eval::TestCase; @@ -49,6 +44,14 @@ fn main() -> eyre::Result<()> { tracing_subscriber::fmt::init(); let cli = Cli::parse(); + if cli.list { + println!("Fuzzable invariants:"); + for name in invariant_names() { + println!(" {name}"); + } + return Ok(()); + } + let test_case = if let Some(elf_path) = &cli.elf { let elf_bytes = std::fs::read(elf_path)?; let memory_config = common::jolt_device::MemoryConfig { @@ -65,27 +68,12 @@ fn main() -> eyre::Result<()> { memory_config, max_trace_length: cli.max_trace_length, }) - } else if !cli.list { + } else { eprintln!("Error: --elf is required. Provide a pre-compiled guest ELF."); std::process::exit(1); - } else { - println!("Fuzzable invariants:"); - for name in BUILTIN_INVARIANT_NAMES { - println!(" {name}"); - } - return Ok(()); }; - let default_inputs = vec![]; - let mut registry = SynthesisRegistry::new(); - register_invariants(&mut registry, &test_case, &default_inputs); - - if cli.list { - for inv in registry.for_target(SynthesisTarget::Fuzz) { - println!(" {}", inv.name()); - } - return Ok(()); - } + let registry = SynthesisRegistry::from_inventory(test_case, vec![]); let fuzzable: Vec<&dyn DynInvariant> = if let Some(name) = &cli.invariant { let matches: Vec<_> = registry @@ -134,14 +122,10 @@ fn main() -> eyre::Result<()> { for inv in &fuzzable { println!(" {} — setting up...", inv.name()); - // DynInvariant::run_checks handles setup internally, but for a fuzz - // loop we want to amortize setup across many iterations. Use run_checks - // in batches. let per_invariant = cli.iterations / fuzzable.len(); let mut checks = 0usize; let mut violations = Vec::new(); - // Run in batches so we can check the deadline between batches let batch_size = per_invariant.min(100); let mut remaining = per_invariant; @@ -193,31 +177,6 @@ fn main() -> eyre::Result<()> { Ok(()) } -fn register_invariants( - registry: &mut SynthesisRegistry, - test_case: &Arc, - default_inputs: &[u8], -) { - registry.register(Box::new(SoundnessInvariant::new( - Arc::clone(test_case), - default_inputs.to_vec(), - ))); - registry.register(Box::new(VerifierCompletenessInvariant::new(Arc::clone( - test_case, - )))); - registry.register(Box::new(ProverCompletenessInvariant::new(Arc::clone( - test_case, - )))); - registry.register(Box::new(DeterminismInvariant::new(Arc::clone(test_case)))); - registry.register(Box::new(SerializationRoundtripInvariant::new( - Arc::clone(test_case), - default_inputs.to_vec(), - ))); - registry.register(Box::new(ZkConsistencyInvariant::new(Arc::clone( - test_case, - )))); -} - fn print_report(report: &InvariantReport) { if report.failed == 0 { println!( diff --git a/jolt-eval/bin/measure_objectives.rs b/jolt-eval/bin/measure_objectives.rs index cb7c78ef7..67ae35731 100644 --- a/jolt-eval/bin/measure_objectives.rs +++ b/jolt-eval/bin/measure_objectives.rs @@ -1,16 +1,7 @@ -use std::sync::Arc; - use clap::Parser; -use jolt_eval::objective::guest_cycles::GuestCycleCountObjective; -use jolt_eval::objective::inline_lengths::InlineLengthsObjective; -use jolt_eval::objective::peak_rss::PeakRssObjective; -use jolt_eval::objective::proof_size::ProofSizeObjective; -use jolt_eval::objective::prover_time::ProverTimeObjective; -use jolt_eval::objective::verifier_time::VerifierTimeObjective; -use jolt_eval::objective::wrapping_cost::WrappingCostObjective; -use jolt_eval::objective::Objective; -use jolt_eval::TestCase; +use jolt_eval::objective::{build_objectives_from_inventory, registered_objectives}; +use jolt_eval::{SharedSetup, TestCase}; #[derive(Parser)] #[command(name = "measure-objectives")] @@ -48,33 +39,31 @@ fn main() -> eyre::Result<()> { heap_size: 32768, program_size: None, }; - Arc::new(TestCase { + TestCase { elf_contents: elf_bytes, memory_config, max_trace_length: cli.max_trace_length, - }) + } } else { eprintln!("Error: --elf is required. Provide a pre-compiled guest ELF."); std::process::exit(1); }; - let inputs = vec![]; - let prover_pp = Arc::new(test_case.prover_preprocessing()); - let verifier_pp = Arc::new(TestCase::verifier_preprocessing(&prover_pp)); - - let objectives = build_objectives(&test_case, &prover_pp, &verifier_pp, &inputs); + let setup = SharedSetup::new(test_case); + let objectives = build_objectives_from_inventory(&setup, vec![]); - let filtered: Vec<&Objective> = if let Some(name) = &cli.objective { + let filtered: Vec<_> = if let Some(name) = &cli.objective { objectives - .iter() + .into_iter() .filter(|o| o.name() == name.as_str()) .collect() } else { - objectives.iter().collect() + objectives }; if filtered.is_empty() { - eprintln!("No matching objectives found."); + let all_names: Vec<_> = registered_objectives().map(|e| e.name).collect(); + eprintln!("No matching objectives. Available: {}", all_names.join(", ")); std::process::exit(1); } @@ -107,43 +96,3 @@ fn main() -> eyre::Result<()> { Ok(()) } - -fn build_objectives( - test_case: &Arc, - prover_pp: &Arc, - verifier_pp: &Arc, - inputs: &[u8], -) -> Vec { - vec![ - Objective::PeakRss(PeakRssObjective::new( - Arc::clone(test_case), - Arc::clone(prover_pp), - inputs.to_vec(), - )), - Objective::ProverTime(ProverTimeObjective::new( - Arc::clone(test_case), - Arc::clone(prover_pp), - inputs.to_vec(), - )), - Objective::ProofSize(ProofSizeObjective::new( - Arc::clone(test_case), - Arc::clone(prover_pp), - inputs.to_vec(), - )), - Objective::VerifierTime(VerifierTimeObjective::new( - Arc::clone(test_case), - Arc::clone(prover_pp), - Arc::clone(verifier_pp), - inputs.to_vec(), - )), - Objective::GuestCycleCount(GuestCycleCountObjective::new( - Arc::clone(test_case), - inputs.to_vec(), - )), - Objective::InlineLengths(InlineLengthsObjective::new(Arc::clone(test_case))), - Objective::WrappingCost(WrappingCostObjective::new( - Arc::clone(test_case), - Arc::clone(prover_pp), - )), - ] -} diff --git a/jolt-eval/bin/optimize.rs b/jolt-eval/bin/optimize.rs index b757c4186..392bbf568 100644 --- a/jolt-eval/bin/optimize.rs +++ b/jolt-eval/bin/optimize.rs @@ -5,23 +5,12 @@ use std::sync::Arc; use clap::Parser; use jolt_eval::agent::ClaudeCodeAgent; -use jolt_eval::invariant::completeness_prover::ProverCompletenessInvariant; -use jolt_eval::invariant::completeness_verifier::VerifierCompletenessInvariant; -use jolt_eval::invariant::determinism::DeterminismInvariant; -use jolt_eval::invariant::serialization_roundtrip::SerializationRoundtripInvariant; -use jolt_eval::invariant::soundness::SoundnessInvariant; use jolt_eval::invariant::synthesis::SynthesisRegistry; -use jolt_eval::invariant::zk_consistency::ZkConsistencyInvariant; -use jolt_eval::objective::guest_cycles::GuestCycleCountObjective; -use jolt_eval::objective::inline_lengths::InlineLengthsObjective; use jolt_eval::objective::optimize::{auto_optimize, OptimizeConfig, OptimizeEnv}; -use jolt_eval::objective::peak_rss::PeakRssObjective; -use jolt_eval::objective::proof_size::ProofSizeObjective; -use jolt_eval::objective::prover_time::ProverTimeObjective; -use jolt_eval::objective::verifier_time::VerifierTimeObjective; -use jolt_eval::objective::wrapping_cost::WrappingCostObjective; -use jolt_eval::objective::{measure_objectives, Direction, Objective}; -use jolt_eval::TestCase; +use jolt_eval::objective::{ + build_objectives_from_inventory, measure_dyn, AbstractObjective, Direction, +}; +use jolt_eval::{SharedSetup, TestCase}; #[derive(Parser)] #[command(name = "optimize")] @@ -56,16 +45,15 @@ struct Cli { hint: Option, } -/// Real environment backed by Jolt objectives, invariants, and git. struct RealEnv { - objectives: Vec, + objectives: Vec>, registry: SynthesisRegistry, repo_dir: std::path::PathBuf, } impl OptimizeEnv for RealEnv { fn measure(&mut self) -> HashMap { - measure_objectives(&self.objectives) + measure_dyn(&self.objectives) } fn check_invariants(&mut self) -> bool { @@ -124,49 +112,47 @@ fn main() -> eyre::Result<()> { heap_size: 32768, program_size: None, }; - let test_case = Arc::new(TestCase { + let test_case = TestCase { elf_contents: elf_bytes, memory_config, max_trace_length: cli.max_trace_length, - }); - - let inputs = vec![]; - let prover_pp = Arc::new(test_case.prover_preprocessing()); - let verifier_pp = Arc::new(TestCase::verifier_preprocessing(&prover_pp)); + }; - let all_objectives = build_objectives(&test_case, &prover_pp, &verifier_pp, &inputs); + let setup = SharedSetup::new(test_case); + let all_objectives = build_objectives_from_inventory(&setup, vec![]); let all_names: Vec = all_objectives.iter().map(|o| o.name().to_string()).collect(); - let objective_names: Vec = if let Some(names) = &cli.objectives { - names.split(',').map(|s| s.trim().to_string()).collect() + + let filter_names: Option> = cli + .objectives + .as_ref() + .map(|s| s.split(',').map(|n| n.trim().to_string()).collect()); + + let objectives: Vec> = if let Some(names) = &filter_names { + all_objectives + .into_iter() + .filter(|o| names.contains(&o.name().to_string())) + .collect() } else { - all_names.clone() + all_objectives }; - let objectives: Vec = all_objectives - .into_iter() - .filter(|o| objective_names.contains(&o.name().to_string())) - .collect(); - if objectives.is_empty() { eprintln!("No matching objectives. Available: {}", all_names.join(", ")); std::process::exit(1); } - let default_inputs = vec![]; - let mut registry = SynthesisRegistry::new(); - register_invariants(&mut registry, &test_case, &default_inputs); - - let repo_dir = std::env::current_dir()?; - let agent = ClaudeCodeAgent::new(&cli.model, cli.max_turns); - let config = OptimizeConfig { - num_iterations: cli.iterations, - hint: cli.hint.clone(), + let test_case2 = TestCase { + elf_contents: std::fs::read(&cli.elf)?, + memory_config, + max_trace_length: cli.max_trace_length, }; + let registry = SynthesisRegistry::from_inventory(Arc::new(test_case2), vec![]); + let repo_dir = std::env::current_dir()?; let mut env = RealEnv { objectives, registry, - repo_dir, + repo_dir: repo_dir.clone(), }; println!("=== Baseline measurements ==="); @@ -174,7 +160,13 @@ fn main() -> eyre::Result<()> { print_measurements(&env.directions(), &baseline); println!(); - let result = auto_optimize(&agent, &mut env, &config, &std::env::current_dir()?); + let agent = ClaudeCodeAgent::new(&cli.model, cli.max_turns); + let config = OptimizeConfig { + num_iterations: cli.iterations, + hint: cli.hint.clone(), + }; + + let result = auto_optimize(&agent, &mut env, &config, &repo_dir); println!("=== Optimization summary ==="); println!( @@ -211,33 +203,3 @@ fn print_measurements(directions: &HashMap, measurements: &Ha println!(" {:<25} {:>15} {:>6}", name, val, dir); } } - -fn register_invariants( - registry: &mut SynthesisRegistry, - test_case: &Arc, - default_inputs: &[u8], -) { - registry.register(Box::new(SoundnessInvariant::new(Arc::clone(test_case), default_inputs.to_vec()))); - registry.register(Box::new(VerifierCompletenessInvariant::new(Arc::clone(test_case)))); - registry.register(Box::new(ProverCompletenessInvariant::new(Arc::clone(test_case)))); - registry.register(Box::new(DeterminismInvariant::new(Arc::clone(test_case)))); - registry.register(Box::new(SerializationRoundtripInvariant::new(Arc::clone(test_case), default_inputs.to_vec()))); - registry.register(Box::new(ZkConsistencyInvariant::new(Arc::clone(test_case)))); -} - -fn build_objectives( - test_case: &Arc, - prover_pp: &Arc, - verifier_pp: &Arc, - inputs: &[u8], -) -> Vec { - vec![ - Objective::PeakRss(PeakRssObjective::new(Arc::clone(test_case), Arc::clone(prover_pp), inputs.to_vec())), - Objective::ProverTime(ProverTimeObjective::new(Arc::clone(test_case), Arc::clone(prover_pp), inputs.to_vec())), - Objective::ProofSize(ProofSizeObjective::new(Arc::clone(test_case), Arc::clone(prover_pp), inputs.to_vec())), - Objective::VerifierTime(VerifierTimeObjective::new(Arc::clone(test_case), Arc::clone(prover_pp), Arc::clone(verifier_pp), inputs.to_vec())), - Objective::GuestCycleCount(GuestCycleCountObjective::new(Arc::clone(test_case), inputs.to_vec())), - Objective::InlineLengths(InlineLengthsObjective::new(Arc::clone(test_case))), - Objective::WrappingCost(WrappingCostObjective::new(Arc::clone(test_case), Arc::clone(prover_pp))), - ] -} diff --git a/jolt-eval/bin/redteam.rs b/jolt-eval/bin/redteam.rs index cb815bddf..edd625713 100644 --- a/jolt-eval/bin/redteam.rs +++ b/jolt-eval/bin/redteam.rs @@ -4,14 +4,8 @@ use clap::Parser; use tracing::info; use jolt_eval::agent::ClaudeCodeAgent; -use jolt_eval::invariant::completeness_prover::ProverCompletenessInvariant; -use jolt_eval::invariant::completeness_verifier::VerifierCompletenessInvariant; -use jolt_eval::invariant::determinism::DeterminismInvariant; -use jolt_eval::invariant::serialization_roundtrip::SerializationRoundtripInvariant; -use jolt_eval::invariant::soundness::SoundnessInvariant; use jolt_eval::invariant::synthesis::redteam::{auto_redteam, RedTeamConfig, RedTeamResult}; -use jolt_eval::invariant::synthesis::{SynthesisRegistry, BUILTIN_INVARIANT_NAMES}; -use jolt_eval::invariant::zk_consistency::ZkConsistencyInvariant; +use jolt_eval::invariant::synthesis::{invariant_names, SynthesisRegistry}; use jolt_eval::invariant::SynthesisTarget; use jolt_eval::TestCase; @@ -58,7 +52,7 @@ fn main() -> eyre::Result<()> { if cli.list { println!("Red-teamable invariants:"); - for name in BUILTIN_INVARIANT_NAMES { + for name in invariant_names() { println!(" {name}"); } return Ok(()); @@ -80,9 +74,7 @@ fn main() -> eyre::Result<()> { max_trace_length: cli.max_trace_length, }); - let default_inputs = vec![]; - let mut registry = SynthesisRegistry::new(); - register_invariants(&mut registry, &test_case, &default_inputs); + let registry = SynthesisRegistry::from_inventory(test_case, vec![]); let invariant = registry .for_target(SynthesisTarget::RedTeam) @@ -135,28 +127,3 @@ fn main() -> eyre::Result<()> { Ok(()) } - -fn register_invariants( - registry: &mut SynthesisRegistry, - test_case: &Arc, - default_inputs: &[u8], -) { - registry.register(Box::new(SoundnessInvariant::new( - Arc::clone(test_case), - default_inputs.to_vec(), - ))); - registry.register(Box::new(VerifierCompletenessInvariant::new(Arc::clone( - test_case, - )))); - registry.register(Box::new(ProverCompletenessInvariant::new(Arc::clone( - test_case, - )))); - registry.register(Box::new(DeterminismInvariant::new(Arc::clone(test_case)))); - registry.register(Box::new(SerializationRoundtripInvariant::new( - Arc::clone(test_case), - default_inputs.to_vec(), - ))); - registry.register(Box::new(ZkConsistencyInvariant::new(Arc::clone( - test_case, - )))); -} diff --git a/jolt-eval/src/invariant/completeness_prover.rs b/jolt-eval/src/invariant/completeness_prover.rs index cb405e904..6da36839d 100644 --- a/jolt-eval/src/invariant/completeness_prover.rs +++ b/jolt-eval/src/invariant/completeness_prover.rs @@ -3,9 +3,17 @@ use std::sync::Arc; use arbitrary::Arbitrary; use enumset::EnumSet; -use super::{Invariant, InvariantViolation, SynthesisTarget}; +use super::{Invariant, InvariantEntry, InvariantViolation, SynthesisTarget}; use crate::{ProverPreprocessing, TestCase}; +inventory::submit! { + InvariantEntry { + name: "prover_completeness", + targets: || SynthesisTarget::Test | SynthesisTarget::Fuzz, + build: |tc, _inputs| Box::new(ProverCompletenessInvariant::new(tc)), + } +} + /// Prover completeness: for a fixed program, input, and valid size parameters, /// the prover should produce a proof without panicking. pub struct ProverCompletenessInvariant { diff --git a/jolt-eval/src/invariant/completeness_verifier.rs b/jolt-eval/src/invariant/completeness_verifier.rs index 4ab56d98f..e9030edf0 100644 --- a/jolt-eval/src/invariant/completeness_verifier.rs +++ b/jolt-eval/src/invariant/completeness_verifier.rs @@ -3,9 +3,17 @@ use std::sync::Arc; use arbitrary::Arbitrary; use enumset::EnumSet; -use super::{Invariant, InvariantViolation, SynthesisTarget}; +use super::{Invariant, InvariantEntry, InvariantViolation, SynthesisTarget}; use crate::{ProverPreprocessing, TestCase, VerifierPreprocessing}; +inventory::submit! { + InvariantEntry { + name: "verifier_completeness", + targets: || SynthesisTarget::Test | SynthesisTarget::Fuzz, + build: |tc, _inputs| Box::new(VerifierCompletenessInvariant::new(tc)), + } +} + /// Verifier completeness: for a fixed program and honest prover output/proof, /// the verifier accepts the honest output/proof. pub struct VerifierCompletenessInvariant { diff --git a/jolt-eval/src/invariant/determinism.rs b/jolt-eval/src/invariant/determinism.rs index f5ce761dd..12db393ee 100644 --- a/jolt-eval/src/invariant/determinism.rs +++ b/jolt-eval/src/invariant/determinism.rs @@ -3,9 +3,17 @@ use std::sync::Arc; use arbitrary::Arbitrary; use enumset::EnumSet; -use super::{Invariant, InvariantViolation, SynthesisTarget}; +use super::{Invariant, InvariantEntry, InvariantViolation, SynthesisTarget}; use crate::{serialize_proof, ProverPreprocessing, TestCase}; +inventory::submit! { + InvariantEntry { + name: "determinism", + targets: || SynthesisTarget::Test | SynthesisTarget::Fuzz, + build: |tc, _inputs| Box::new(DeterminismInvariant::new(tc)), + } +} + /// Determinism invariant: same program + input must produce byte-identical proofs. pub struct DeterminismInvariant { pub test_case: Arc, diff --git a/jolt-eval/src/invariant/mod.rs b/jolt-eval/src/invariant/mod.rs index 0a4245a38..c9d04dd2e 100644 --- a/jolt-eval/src/invariant/mod.rs +++ b/jolt-eval/src/invariant/mod.rs @@ -7,11 +7,14 @@ pub mod synthesis; pub mod zk_consistency; use std::fmt; +use std::sync::Arc; use arbitrary::Arbitrary; use enumset::{EnumSet, EnumSetType}; use rand::RngCore; +use crate::TestCase; + /// What to synthesize from an invariant definition. #[derive(Debug, EnumSetType)] pub enum SynthesisTarget { @@ -81,6 +84,23 @@ pub trait Invariant: Send + Sync { } } +/// Registration entry for the [`inventory`] crate. +/// +/// Each built-in invariant module calls `inventory::submit!` with one of +/// these, so all invariants are discoverable at runtime without manual +/// registration. +pub struct InvariantEntry { + pub name: &'static str, + pub targets: fn() -> EnumSet, + pub build: fn(Arc, Vec) -> Box, +} +inventory::collect!(InvariantEntry); + +/// Iterate all invariant entries registered via `inventory`. +pub fn registered_invariants() -> impl Iterator { + inventory::iter::() +} + /// A counterexample produced when an invariant is violated. pub struct InvariantCounterexample { pub description: String, diff --git a/jolt-eval/src/invariant/serialization_roundtrip.rs b/jolt-eval/src/invariant/serialization_roundtrip.rs index 9dd00663d..434dfd6cc 100644 --- a/jolt-eval/src/invariant/serialization_roundtrip.rs +++ b/jolt-eval/src/invariant/serialization_roundtrip.rs @@ -3,9 +3,17 @@ use std::sync::Arc; use arbitrary::Arbitrary; use enumset::EnumSet; -use super::{Invariant, InvariantViolation, SynthesisTarget}; +use super::{Invariant, InvariantEntry, InvariantViolation, SynthesisTarget}; use crate::{deserialize_proof, serialize_proof, TestCase}; +inventory::submit! { + InvariantEntry { + name: "serialization_roundtrip", + targets: || { SynthesisTarget::Test.into() }, + build: |tc, inputs| Box::new(SerializationRoundtripInvariant::new(tc, inputs)), + } +} + /// Serialization roundtrip invariant: `deserialize(serialize(proof)) == proof`, /// verified by checking that re-serialization produces identical bytes. pub struct SerializationRoundtripInvariant { diff --git a/jolt-eval/src/invariant/soundness.rs b/jolt-eval/src/invariant/soundness.rs index f70ea1bf0..70aa53177 100644 --- a/jolt-eval/src/invariant/soundness.rs +++ b/jolt-eval/src/invariant/soundness.rs @@ -3,9 +3,17 @@ use std::sync::Arc; use arbitrary::Arbitrary; use enumset::EnumSet; -use super::{Invariant, InvariantViolation, SynthesisTarget}; +use super::{Invariant, InvariantEntry, InvariantViolation, SynthesisTarget}; use crate::{serialize_proof, JoltDevice, Proof, TestCase, VerifierPreprocessing}; +inventory::submit! { + InvariantEntry { + name: "soundness", + targets: || SynthesisTarget::Test | SynthesisTarget::Fuzz | SynthesisTarget::RedTeam, + build: |tc, inputs| Box::new(SoundnessInvariant::new(tc, inputs)), + } +} + /// Mutation applied to a serialized proof to test soundness. #[derive(Debug, Clone, Arbitrary)] pub struct ProofMutation { diff --git a/jolt-eval/src/invariant/synthesis/mod.rs b/jolt-eval/src/invariant/synthesis/mod.rs index 0d7a1877b..1c6d74f70 100644 --- a/jolt-eval/src/invariant/synthesis/mod.rs +++ b/jolt-eval/src/invariant/synthesis/mod.rs @@ -2,7 +2,10 @@ pub mod fuzz; pub mod redteam; pub mod test; -use super::{DynInvariant, SynthesisTarget}; +use std::sync::Arc; + +use super::{registered_invariants, DynInvariant, SynthesisTarget}; +use crate::TestCase; /// Registry of invariants available for synthesis. pub struct SynthesisRegistry { @@ -16,6 +19,15 @@ impl SynthesisRegistry { } } + /// Build a registry from all `inventory`-registered invariants. + pub fn from_inventory(test_case: Arc, default_inputs: Vec) -> Self { + let mut registry = Self::new(); + for entry in registered_invariants() { + registry.register((entry.build)(Arc::clone(&test_case), default_inputs.clone())); + } + registry + } + pub fn register(&mut self, invariant: Box) { self.invariants.push(invariant); } @@ -32,16 +44,7 @@ impl SynthesisRegistry { .map(|inv| inv.as_ref()) .collect() } -} -impl Default for SynthesisRegistry { - fn default() -> Self { - Self::new() - } -} - -/// Return all registered invariant names. -impl SynthesisRegistry { pub fn names(&self) -> Vec<&str> { self.invariants.iter().map(|inv| inv.name()).collect() } @@ -54,16 +57,14 @@ impl SynthesisRegistry { } } -/// Canonical list of built-in Jolt invariant names. -/// -/// This is the single source of truth used by all CLI binaries for -/// `--list` output and error messages. It does not require constructing -/// a `TestCase` or `SynthesisRegistry`. -pub const BUILTIN_INVARIANT_NAMES: &[&str] = &[ - "soundness", - "verifier_completeness", - "prover_completeness", - "determinism", - "serialization_roundtrip", - "zk_consistency", -]; +impl Default for SynthesisRegistry { + fn default() -> Self { + Self::new() + } +} + +/// Return the names of all `inventory`-registered invariants. +/// Does not require a `TestCase`. +pub fn invariant_names() -> Vec<&'static str> { + registered_invariants().map(|e| e.name).collect() +} diff --git a/jolt-eval/src/invariant/zk_consistency.rs b/jolt-eval/src/invariant/zk_consistency.rs index 2bef70d42..61112b924 100644 --- a/jolt-eval/src/invariant/zk_consistency.rs +++ b/jolt-eval/src/invariant/zk_consistency.rs @@ -3,9 +3,17 @@ use std::sync::Arc; use arbitrary::Arbitrary; use enumset::EnumSet; -use super::{Invariant, InvariantViolation, SynthesisTarget}; +use super::{Invariant, InvariantEntry, InvariantViolation, SynthesisTarget}; use crate::{ProverPreprocessing, TestCase, VerifierPreprocessing}; +inventory::submit! { + InvariantEntry { + name: "zk_consistency", + targets: || SynthesisTarget::Test | SynthesisTarget::Fuzz, + build: |tc, _inputs| Box::new(ZkConsistencyInvariant::new(tc)), + } +} + /// ZK consistency invariant: both `host` and `host,zk` compilation modes /// produce valid proofs that pass verification. /// diff --git a/jolt-eval/src/objective/guest_cycles.rs b/jolt-eval/src/objective/guest_cycles.rs index 40ff488a3..5fcd4f269 100644 --- a/jolt-eval/src/objective/guest_cycles.rs +++ b/jolt-eval/src/objective/guest_cycles.rs @@ -1,8 +1,18 @@ use std::sync::Arc; -use super::{AbstractObjective, Direction, MeasurementError}; +use super::{AbstractObjective, Direction, MeasurementError, ObjectiveEntry}; use crate::TestCase; +inventory::submit! { + ObjectiveEntry { + name: "guest_cycle_count", + direction: Direction::Minimize, + build: |setup, inputs| Box::new(GuestCycleCountObjective::new( + setup.test_case.clone(), inputs, + )), + } +} + /// Measures guest instruction cycle count via program tracing. pub struct GuestCycleCountObjective { pub test_case: Arc, diff --git a/jolt-eval/src/objective/inline_lengths.rs b/jolt-eval/src/objective/inline_lengths.rs index 54136a3b1..50ccd446c 100644 --- a/jolt-eval/src/objective/inline_lengths.rs +++ b/jolt-eval/src/objective/inline_lengths.rs @@ -1,8 +1,16 @@ use std::sync::Arc; -use super::{AbstractObjective, Direction, MeasurementError}; +use super::{AbstractObjective, Direction, MeasurementError, ObjectiveEntry}; use crate::TestCase; +inventory::submit! { + ObjectiveEntry { + name: "inline_lengths", + direction: Direction::Maximize, + build: |setup, _inputs| Box::new(InlineLengthsObjective::new(setup.test_case.clone())), + } +} + /// Measures total virtual/inline sequence length in the decoded bytecode. /// /// Inline sequences replace guest-side computation with constraint-native diff --git a/jolt-eval/src/objective/mod.rs b/jolt-eval/src/objective/mod.rs index aaba9b47c..8f1c16f9a 100644 --- a/jolt-eval/src/objective/mod.rs +++ b/jolt-eval/src/objective/mod.rs @@ -10,6 +10,8 @@ pub mod wrapping_cost; use std::collections::HashMap; use std::fmt; +use crate::SharedSetup; + /// Whether lower or higher values are better. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Direction { @@ -59,6 +61,43 @@ pub trait AbstractObjective: Send + Sync { fn direction(&self) -> Direction; } +/// Registration entry for the [`inventory`] crate. +/// +/// Each built-in objective module calls `inventory::submit!` with one of +/// these, so all objectives are discoverable at runtime. +pub struct ObjectiveEntry { + pub name: &'static str, + pub direction: Direction, + pub build: fn(&SharedSetup, Vec) -> Box, +} +inventory::collect!(ObjectiveEntry); + +/// Iterate all objective entries registered via `inventory`. +pub fn registered_objectives() -> impl Iterator { + inventory::iter::() +} + +/// Build all registered objectives from a [`SharedSetup`]. +pub fn build_objectives_from_inventory( + setup: &SharedSetup, + inputs: Vec, +) -> Vec> { + inventory::iter::() + .map(|entry| (entry.build)(setup, inputs.clone())) + .collect() +} + +/// Measure a list of trait-object objectives. +pub fn measure_dyn(objectives: &[Box]) -> HashMap { + objectives + .iter() + .filter_map(|obj| { + let name = obj.name().to_string(); + obj.collect_measurement().ok().map(|v| (name, v)) + }) + .collect() +} + /// Centralized objective enum dispatching to concrete implementations. pub enum Objective { PeakRss(peak_rss::PeakRssObjective), diff --git a/jolt-eval/src/objective/peak_rss.rs b/jolt-eval/src/objective/peak_rss.rs index ca49f924a..84af1da60 100644 --- a/jolt-eval/src/objective/peak_rss.rs +++ b/jolt-eval/src/objective/peak_rss.rs @@ -2,9 +2,19 @@ use std::sync::Arc; use sysinfo::{Pid, System}; -use super::{AbstractObjective, Direction, MeasurementError}; +use super::{AbstractObjective, Direction, MeasurementError, ObjectiveEntry}; use crate::{ProverPreprocessing, TestCase}; +inventory::submit! { + ObjectiveEntry { + name: "peak_rss", + direction: Direction::Minimize, + build: |setup, inputs| Box::new(PeakRssObjective::new( + setup.test_case.clone(), setup.prover_preprocessing.clone(), inputs, + )), + } +} + /// Measures peak resident set size (RSS) during proving. /// /// Uses the `sysinfo` crate to sample memory before and after proving. diff --git a/jolt-eval/src/objective/proof_size.rs b/jolt-eval/src/objective/proof_size.rs index 29211c2db..b8214a643 100644 --- a/jolt-eval/src/objective/proof_size.rs +++ b/jolt-eval/src/objective/proof_size.rs @@ -1,8 +1,18 @@ use std::sync::Arc; -use super::{AbstractObjective, Direction, MeasurementError}; +use super::{AbstractObjective, Direction, MeasurementError, ObjectiveEntry}; use crate::{serialize_proof, ProverPreprocessing, TestCase}; +inventory::submit! { + ObjectiveEntry { + name: "proof_size", + direction: Direction::Minimize, + build: |setup, inputs| Box::new(ProofSizeObjective::new( + setup.test_case.clone(), setup.prover_preprocessing.clone(), inputs, + )), + } +} + /// Measures serialized proof size in bytes. pub struct ProofSizeObjective { pub test_case: Arc, diff --git a/jolt-eval/src/objective/prover_time.rs b/jolt-eval/src/objective/prover_time.rs index 7b839f576..c29c61e4d 100644 --- a/jolt-eval/src/objective/prover_time.rs +++ b/jolt-eval/src/objective/prover_time.rs @@ -1,9 +1,19 @@ use std::sync::Arc; use std::time::Instant; -use super::{AbstractObjective, Direction, MeasurementError}; +use super::{AbstractObjective, Direction, MeasurementError, ObjectiveEntry}; use crate::{ProverPreprocessing, TestCase}; +inventory::submit! { + ObjectiveEntry { + name: "prover_time", + direction: Direction::Minimize, + build: |setup, inputs| Box::new(ProverTimeObjective::new( + setup.test_case.clone(), setup.prover_preprocessing.clone(), inputs, + )), + } +} + /// Measures wall-clock prover time in seconds. pub struct ProverTimeObjective { pub test_case: Arc, diff --git a/jolt-eval/src/objective/verifier_time.rs b/jolt-eval/src/objective/verifier_time.rs index 1223f95a9..34e76c443 100644 --- a/jolt-eval/src/objective/verifier_time.rs +++ b/jolt-eval/src/objective/verifier_time.rs @@ -1,9 +1,20 @@ use std::sync::Arc; use std::time::Instant; -use super::{AbstractObjective, Direction, MeasurementError}; +use super::{AbstractObjective, Direction, MeasurementError, ObjectiveEntry}; use crate::{ProverPreprocessing, TestCase, VerifierPreprocessing}; +inventory::submit! { + ObjectiveEntry { + name: "verifier_time", + direction: Direction::Minimize, + build: |setup, inputs| Box::new(VerifierTimeObjective::new( + setup.test_case.clone(), setup.prover_preprocessing.clone(), + setup.verifier_preprocessing.clone(), inputs, + )), + } +} + /// Measures wall-clock verifier time in seconds. pub struct VerifierTimeObjective { pub test_case: Arc, diff --git a/jolt-eval/src/objective/wrapping_cost.rs b/jolt-eval/src/objective/wrapping_cost.rs index dfcddb924..3a5eef453 100644 --- a/jolt-eval/src/objective/wrapping_cost.rs +++ b/jolt-eval/src/objective/wrapping_cost.rs @@ -1,8 +1,18 @@ use std::sync::Arc; -use super::{AbstractObjective, Direction, MeasurementError}; +use super::{AbstractObjective, Direction, MeasurementError, ObjectiveEntry}; use crate::{ProverPreprocessing, TestCase}; +inventory::submit! { + ObjectiveEntry { + name: "wrapping_cost", + direction: Direction::Minimize, + build: |setup, _inputs| Box::new(WrappingCostObjective::new( + setup.test_case.clone(), setup.prover_preprocessing.clone(), + )), + } +} + /// Measures the "wrapping cost" as the total number of constraints in the R1CS. /// /// This is derived from the preprocessing data which encodes the constraint From 6470e7231c944d0366e4e13bf41596c2fd648f06 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Wed, 1 Apr 2026 10:38:25 -0400 Subject: [PATCH 11/86] feat(jolt-eval): parse agent counterexamples as JSON in auto_redteam MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit auto_redteam now asks the agent to produce a concrete counterexample as a JSON code block, deserializes it into the invariant's Input type via serde, and runs Invariant::check on that specific input. Key changes: - Invariant::Input gains Serialize + DeserializeOwned bounds; Invariant::Setup gains 'static. All 6 built-in Input types derive Serialize + Deserialize. - DynInvariant gains three methods: input_json_example() — serializes a seed-corpus item for the prompt dyn_setup() — type-erased setup (Box, call once) check_json_input() — deserialize JSON → Input, run check() - CheckJsonResult enum distinguishes Pass / Violation / BadInput (deserialization failure) so parse errors are never mistaken for real counterexamples. - auto_redteam builds a prompt that includes the JSON schema example and asks the agent to end with a ```json code block. The loop extracts JSON via extract_json(), feeds it through check_json_input, and returns Violation with the input_json on success. - RedTeamResult::Violation now carries input_json alongside approach and error. - RedTeamConfig trimmed: num_fuzz_per_iteration removed (the loop checks the single agent-produced input, not random fuzz). Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/Cargo.toml | 1 + jolt-eval/bin/optimize.rs | 17 +- jolt-eval/bin/redteam.rs | 20 +- .../src/invariant/completeness_prover.rs | 2 +- .../src/invariant/completeness_verifier.rs | 2 +- jolt-eval/src/invariant/determinism.rs | 2 +- jolt-eval/src/invariant/mod.rs | 113 ++++++++++- .../src/invariant/serialization_roundtrip.rs | 2 +- jolt-eval/src/invariant/soundness.rs | 2 +- jolt-eval/src/invariant/synthesis/redteam.rs | 142 +++++++++----- jolt-eval/src/invariant/zk_consistency.rs | 2 +- jolt-eval/tests/agent_test.rs | 175 ++++++++++++------ jolt-eval/tests/macro_test.rs | 2 +- 13 files changed, 347 insertions(+), 135 deletions(-) diff --git a/jolt-eval/Cargo.toml b/jolt-eval/Cargo.toml index 3b55c4876..d751296b0 100644 --- a/jolt-eval/Cargo.toml +++ b/jolt-eval/Cargo.toml @@ -11,6 +11,7 @@ tracer = { workspace = true } ark-bn254 = { workspace = true } ark-serialize = { workspace = true } serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true, features = ["std"] } postcard = { workspace = true, features = ["use-std"] } thiserror = { workspace = true } eyre = { workspace = true } diff --git a/jolt-eval/bin/optimize.rs b/jolt-eval/bin/optimize.rs index 392bbf568..5c92be781 100644 --- a/jolt-eval/bin/optimize.rs +++ b/jolt-eval/bin/optimize.rs @@ -120,7 +120,10 @@ fn main() -> eyre::Result<()> { let setup = SharedSetup::new(test_case); let all_objectives = build_objectives_from_inventory(&setup, vec![]); - let all_names: Vec = all_objectives.iter().map(|o| o.name().to_string()).collect(); + let all_names: Vec = all_objectives + .iter() + .map(|o| o.name().to_string()) + .collect(); let filter_names: Option> = cli .objectives @@ -137,7 +140,10 @@ fn main() -> eyre::Result<()> { }; if objectives.is_empty() { - eprintln!("No matching objectives. Available: {}", all_names.join(", ")); + eprintln!( + "No matching objectives. Available: {}", + all_names.join(", ") + ); std::process::exit(1); } @@ -177,7 +183,7 @@ fn main() -> eyre::Result<()> { .filter(|a| a.invariants_passed && a.measurements .iter() - .any(|(name, &val)| { result.baseline.get(name).is_some_and(|&b| val != b) })) + .any(|(name, &val)| { result.baseline.get(name).is_some_and(|&b| val < b) })) .count(), result.attempts.len() ); @@ -188,7 +194,10 @@ fn main() -> eyre::Result<()> { Ok(()) } -fn print_measurements(directions: &HashMap, measurements: &HashMap) { +fn print_measurements( + directions: &HashMap, + measurements: &HashMap, +) { let mut names: Vec<_> = directions.keys().collect(); names.sort(); for name in names { diff --git a/jolt-eval/bin/redteam.rs b/jolt-eval/bin/redteam.rs index edd625713..85f5b8ae5 100644 --- a/jolt-eval/bin/redteam.rs +++ b/jolt-eval/bin/redteam.rs @@ -33,10 +33,6 @@ struct Cli { #[arg(long, default_value = "65536")] max_trace_length: usize, - /// Number of random fuzz inputs to run after each agent iteration - #[arg(long, default_value = "100")] - num_fuzz: usize, - /// Maximum number of Claude agentic turns per iteration #[arg(long, default_value = "30")] max_turns: usize, @@ -89,25 +85,29 @@ fn main() -> eyre::Result<()> { let config = RedTeamConfig { num_iterations: cli.iterations, - num_fuzz_per_iteration: cli.num_fuzz, }; let agent = ClaudeCodeAgent::new(&cli.model, cli.max_turns); let repo_dir = std::env::current_dir()?; info!( - "Starting red team: invariant={}, iterations={}, model={}, fuzz_per_iter={}", - cli.invariant, cli.iterations, cli.model, cli.num_fuzz + "Starting red team: invariant={}, iterations={}, model={}", + cli.invariant, cli.iterations, cli.model ); let result = auto_redteam(invariant, &config, &agent, &repo_dir); match result { - RedTeamResult::Violation { description, error } => { + RedTeamResult::Violation { + approach, + input_json, + error, + } => { println!(); println!("==== VIOLATION FOUND ===="); - println!("Approach: {description}"); - println!("Error: {error}"); + println!("Approach: {approach}"); + println!("Input: {input_json}"); + println!("Error: {error}"); std::process::exit(1); } RedTeamResult::NoViolation { attempts } => { diff --git a/jolt-eval/src/invariant/completeness_prover.rs b/jolt-eval/src/invariant/completeness_prover.rs index 6da36839d..c9d28ae97 100644 --- a/jolt-eval/src/invariant/completeness_prover.rs +++ b/jolt-eval/src/invariant/completeness_prover.rs @@ -26,7 +26,7 @@ pub struct ProverCompletenessSetup { } /// Program inputs for prover completeness testing. -#[derive(Debug, Clone, Arbitrary)] +#[derive(Debug, Clone, Arbitrary, serde::Serialize, serde::Deserialize)] pub struct ProverInputs { pub data: Vec, } diff --git a/jolt-eval/src/invariant/completeness_verifier.rs b/jolt-eval/src/invariant/completeness_verifier.rs index e9030edf0..8d05cce3f 100644 --- a/jolt-eval/src/invariant/completeness_verifier.rs +++ b/jolt-eval/src/invariant/completeness_verifier.rs @@ -28,7 +28,7 @@ pub struct VerifierCompletenessSetup { } /// Program inputs for completeness testing. -#[derive(Debug, Clone, Arbitrary)] +#[derive(Debug, Clone, Arbitrary, serde::Serialize, serde::Deserialize)] pub struct ProgramInputs { pub data: Vec, } diff --git a/jolt-eval/src/invariant/determinism.rs b/jolt-eval/src/invariant/determinism.rs index 12db393ee..57ed5e3f3 100644 --- a/jolt-eval/src/invariant/determinism.rs +++ b/jolt-eval/src/invariant/determinism.rs @@ -25,7 +25,7 @@ pub struct DeterminismSetup { } /// Program inputs for determinism testing. -#[derive(Debug, Clone, Arbitrary)] +#[derive(Debug, Clone, Arbitrary, serde::Serialize, serde::Deserialize)] pub struct DeterminismInputs { pub data: Vec, } diff --git a/jolt-eval/src/invariant/mod.rs b/jolt-eval/src/invariant/mod.rs index c9d04dd2e..c748cb350 100644 --- a/jolt-eval/src/invariant/mod.rs +++ b/jolt-eval/src/invariant/mod.rs @@ -6,12 +6,15 @@ pub mod soundness; pub mod synthesis; pub mod zk_consistency; +use std::any::Any; use std::fmt; use std::sync::Arc; use arbitrary::Arbitrary; use enumset::{EnumSet, EnumSetType}; use rand::RngCore; +use serde::de::DeserializeOwned; +use serde::Serialize; use crate::TestCase; @@ -60,10 +63,11 @@ impl InvariantViolation { /// Core invariant trait. Each invariant defines a setup phase (run once) /// and a check phase (run per input). The `Input` type must support -/// `Arbitrary` for fuzzing and random testing. +/// `Arbitrary` for fuzzing, and `Serialize`/`DeserializeOwned` so an AI +/// agent can produce counterexamples as JSON. pub trait Invariant: Send + Sync { - type Setup; - type Input: for<'a> Arbitrary<'a> + fmt::Debug + Clone; + type Setup: 'static; + type Input: for<'a> Arbitrary<'a> + fmt::Debug + Clone + Serialize + DeserializeOwned; fn name(&self) -> &str; @@ -115,7 +119,8 @@ pub struct FailedAttempt { pub failure_reason: String, } -/// Object-safe wrapper for `Invariant`, enabling heterogeneous collections. +/// Object-safe wrapper for `Invariant`, enabling heterogeneous collections +/// and JSON-based counterexample checking. pub trait DynInvariant: Send + Sync { fn name(&self) -> &str; fn description(&self) -> String; @@ -123,6 +128,30 @@ pub trait DynInvariant: Send + Sync { /// Run seed corpus checks followed by `num_random` randomly-generated inputs. fn run_checks(&self, num_random: usize) -> Vec>; + + /// Return a JSON example of the `Input` type (from the seed corpus). + fn input_json_example(&self) -> Option; + + /// Create the (type-erased) setup. Expensive — call once and reuse. + fn dyn_setup(&self) -> Box; + + /// Deserialize a JSON-encoded `Input` and check it against a + /// previously-created setup (from [`dyn_setup`]). + fn check_json_input( + &self, + setup: &dyn Any, + json: &str, + ) -> CheckJsonResult; +} + +/// Outcome of [`DynInvariant::check_json_input`]. +pub enum CheckJsonResult { + /// The input was valid and the invariant held. + Pass, + /// The input was valid and the invariant was violated. + Violation(InvariantViolation), + /// The JSON could not be deserialized into the expected `Input` type. + BadInput(String), } impl DynInvariant for I { @@ -158,6 +187,35 @@ impl DynInvariant for I { results } + + fn input_json_example(&self) -> Option { + self.seed_corpus() + .into_iter() + .next() + .and_then(|input| serde_json::to_string_pretty(&input).ok()) + } + + fn dyn_setup(&self) -> Box { + Box::new(Invariant::setup(self)) + } + + fn check_json_input( + &self, + setup: &dyn Any, + json: &str, + ) -> CheckJsonResult { + let setup = setup + .downcast_ref::() + .expect("DynInvariant::check_json_input called with wrong setup type"); + let input: I::Input = match serde_json::from_str(json) { + Ok(v) => v, + Err(e) => return CheckJsonResult::BadInput(e.to_string()), + }; + match self.check(setup, input) { + Ok(()) => CheckJsonResult::Pass, + Err(v) => CheckJsonResult::Violation(v), + } + } } /// Result of running an invariant check suite. @@ -193,3 +251,50 @@ impl InvariantReport { } } } + +/// Try to extract a JSON object from free-form text. Looks for a +/// ````json` code block first, then falls back to the last `{…}` that +/// parses as valid JSON. +pub fn extract_json(text: &str) -> Option { + // 1. ```json ... ``` + if let Some(start) = text.find("```json") { + let json_start = start + "```json".len(); + if let Some(end) = text[json_start..].find("```") { + let candidate = text[json_start..json_start + end].trim(); + if serde_json::from_str::(candidate).is_ok() { + return Some(candidate.to_string()); + } + } + } + + // 2. Last balanced {…} that is valid JSON + let bytes = text.as_bytes(); + let mut i = bytes.len(); + while i > 0 { + i -= 1; + if bytes[i] == b'}' { + let end = i; + let mut depth: i32 = 0; + let mut j = end + 1; + while j > 0 { + j -= 1; + match bytes[j] { + b'}' => depth += 1, + b'{' => { + depth -= 1; + if depth == 0 { + let candidate = &text[j..=end]; + if serde_json::from_str::(candidate).is_ok() { + return Some(candidate.to_string()); + } + break; + } + } + _ => {} + } + } + } + } + + None +} diff --git a/jolt-eval/src/invariant/serialization_roundtrip.rs b/jolt-eval/src/invariant/serialization_roundtrip.rs index 434dfd6cc..870e7f123 100644 --- a/jolt-eval/src/invariant/serialization_roundtrip.rs +++ b/jolt-eval/src/invariant/serialization_roundtrip.rs @@ -27,7 +27,7 @@ pub struct SerializationRoundtripSetup { /// Unit input -- the roundtrip check has no variable input beyond the /// proof generated during setup. -#[derive(Debug, Clone, Arbitrary)] +#[derive(Debug, Clone, Arbitrary, serde::Serialize, serde::Deserialize)] pub struct RoundtripInput { _dummy: u8, } diff --git a/jolt-eval/src/invariant/soundness.rs b/jolt-eval/src/invariant/soundness.rs index 70aa53177..c7c21c932 100644 --- a/jolt-eval/src/invariant/soundness.rs +++ b/jolt-eval/src/invariant/soundness.rs @@ -15,7 +15,7 @@ inventory::submit! { } /// Mutation applied to a serialized proof to test soundness. -#[derive(Debug, Clone, Arbitrary)] +#[derive(Debug, Clone, Arbitrary, serde::Serialize, serde::Deserialize)] pub struct ProofMutation { pub byte_index: usize, pub new_value: u8, diff --git a/jolt-eval/src/invariant/synthesis/redteam.rs b/jolt-eval/src/invariant/synthesis/redteam.rs index f2caf46d2..34890d677 100644 --- a/jolt-eval/src/invariant/synthesis/redteam.rs +++ b/jolt-eval/src/invariant/synthesis/redteam.rs @@ -1,13 +1,17 @@ use std::path::Path; -use super::super::{DynInvariant, FailedAttempt, SynthesisTarget}; +use super::super::{extract_json, CheckJsonResult, DynInvariant, FailedAttempt, SynthesisTarget}; use super::SynthesisRegistry; use crate::agent::{truncate, AgentHarness}; /// Result of a red-team session. pub enum RedTeamResult { - /// Found a counterexample that violates the invariant. - Violation { description: String, error: String }, + /// The agent produced a counterexample that violates the invariant. + Violation { + approach: String, + input_json: String, + error: String, + }, /// All attempts failed to find a violation. NoViolation { attempts: Vec }, } @@ -15,15 +19,12 @@ pub enum RedTeamResult { /// Configuration for an AI red-team session. pub struct RedTeamConfig { pub num_iterations: usize, - /// Number of random fuzz inputs to run after each agent attempt. - pub num_fuzz_per_iteration: usize, } impl Default for RedTeamConfig { fn default() -> Self { Self { num_iterations: 10, - num_fuzz_per_iteration: 100, } } } @@ -31,11 +32,14 @@ impl Default for RedTeamConfig { /// Run an AI red-team session against a single invariant. /// /// Each iteration: -/// 1. Builds a prompt from the invariant description + past failed attempts -/// 2. Invokes the agent (via the [`AgentHarness`] trait) to analyze the code -/// 3. Runs the invariant's seed corpus + random fuzz inputs -/// 4. If a violation is found, returns immediately -/// 5. Otherwise records the failed attempt and continues +/// 1. Builds a prompt that includes the invariant description, a JSON +/// example of the `Input` type, and past failed attempts. +/// 2. Invokes the agent (via [`AgentHarness`]) to analyze the code and +/// produce a candidate counterexample as a JSON object. +/// 3. Extracts the JSON from the agent's response, deserializes it into +/// the invariant's `Input` type, and runs [`Invariant::check`] on it. +/// 4. If the check fails, the counterexample is genuine — return it. +/// 5. Otherwise records the failed attempt and continues. /// /// The `agent` is responsible for its own isolation (e.g. worktrees). pub fn auto_redteam( @@ -45,6 +49,8 @@ pub fn auto_redteam( repo_dir: &Path, ) -> RedTeamResult { let description = invariant.description(); + let input_example = invariant.input_json_example(); + let setup = invariant.dyn_setup(); let mut failed_attempts = Vec::new(); for iteration in 0..config.num_iterations { @@ -55,40 +61,66 @@ pub fn auto_redteam( invariant.name() ); - let prompt = build_redteam_prompt(&description, &failed_attempts); + let prompt = build_redteam_prompt(&description, input_example.as_deref(), &failed_attempts); - match agent.invoke(repo_dir, &prompt) { - Ok(response) => { - let approach = truncate(&response.text, 2000).to_string(); - tracing::info!( - "Agent response ({} chars): {}...", - approach.len(), - truncate(&approach, 200) - ); - - // Run the invariant's seed corpus + random fuzz inputs - let check_results = invariant.run_checks(config.num_fuzz_per_iteration); - let violation = check_results.iter().find(|r| r.is_err()); + let response = match agent.invoke(repo_dir, &prompt) { + Ok(r) => r, + Err(e) => { + tracing::warn!("Agent invocation failed: {e}"); + failed_attempts.push(FailedAttempt { + description: format!("Iteration {}", iteration + 1), + approach: "Agent invocation failed".to_string(), + failure_reason: e.to_string(), + }); + continue; + } + }; - if let Some(Err(e)) = violation { - return RedTeamResult::Violation { - description: approach, - error: e.to_string(), - }; - } + let approach = truncate(&response.text, 2000).to_string(); + tracing::info!( + "Agent response ({} chars): {}...", + approach.len(), + truncate(&approach, 200) + ); + let Some(json) = extract_json(&response.text) else { + tracing::info!("No JSON found in agent response"); + failed_attempts.push(FailedAttempt { + description: format!("Iteration {}", iteration + 1), + approach, + failure_reason: "Agent response did not contain a JSON counterexample".to_string(), + }); + continue; + }; + + tracing::info!("Extracted JSON input: {}", truncate(&json, 200)); + + match invariant.check_json_input(&*setup, &json) { + CheckJsonResult::Violation(violation) => { + tracing::info!("Counterexample CONFIRMED: {violation}"); + return RedTeamResult::Violation { + approach, + input_json: json, + error: violation.to_string(), + }; + } + CheckJsonResult::Pass => { failed_attempts.push(FailedAttempt { description: format!("Iteration {}", iteration + 1), approach, - failure_reason: "Invariant check passed for all inputs".to_string(), + failure_reason: format!( + "Candidate input did not violate the invariant: {json}" + ), }); } - Err(e) => { - tracing::warn!("Agent invocation failed: {e}"); + CheckJsonResult::BadInput(parse_err) => { + tracing::info!("Agent produced unparseable input: {parse_err}"); failed_attempts.push(FailedAttempt { description: format!("Iteration {}", iteration + 1), - approach: "Agent invocation failed".to_string(), - failure_reason: e.to_string(), + approach, + failure_reason: format!( + "Could not deserialize agent JSON into Input type: {parse_err}" + ), }); } } @@ -101,13 +133,14 @@ pub fn auto_redteam( fn build_redteam_prompt( invariant_description: &str, + input_example: Option<&str>, failed_attempts: &[FailedAttempt], ) -> String { let mut prompt = String::new(); prompt.push_str( "You are a security researcher red-teaming a zkVM (Jolt). \ - Your goal is to find an input or scenario that VIOLATES the following invariant.\n\n", + Your goal is to find a concrete input that VIOLATES the following invariant.\n\n", ); prompt.push_str("## Invariant\n\n"); @@ -120,22 +153,30 @@ fn build_redteam_prompt( invariant is enforced.\n\ 2. Look for edge cases, off-by-one errors, missing checks, or assumptions \ that could be violated.\n\ - 3. If you find a potential weakness, describe it clearly.\n\ - 4. Try to construct a concrete input or scenario that triggers the violation.\n\ - 5. Summarize your approach and findings.\n\n\ - Focus on finding REAL bugs, not theoretical concerns. The invariant will be \ - mechanically checked after your analysis, so only genuine violations count.\n\n", + 3. Construct a concrete JSON input that you believe will trigger a violation.\n\ + 4. The input will be deserialized and checked mechanically — only genuine \ + violations count.\n\n", ); + if let Some(example) = input_example { + prompt.push_str("## Input format\n\n"); + prompt.push_str( + "The counterexample must be a JSON object matching this schema. \ + Here is an example of a valid input:\n\n```json\n", + ); + prompt.push_str(example); + prompt.push_str("\n```\n\n"); + } + if !failed_attempts.is_empty() { - prompt.push_str("## Previous Failed Attempts\n\n"); + prompt.push_str("## Previous failed attempts\n\n"); prompt.push_str( - "The following approaches have already been tried and did NOT find a violation. \ - Try a fundamentally different approach.\n\n", + "The following approaches have already been tried and did NOT produce a \ + valid counterexample. Try a fundamentally different approach.\n\n", ); for attempt in failed_attempts { prompt.push_str(&format!( - "- **{}**: {}\n Reason for failure: {}\n", + "- **{}**: {}\n Failure: {}\n", attempt.description, attempt.approach, attempt.failure_reason )); } @@ -143,11 +184,12 @@ fn build_redteam_prompt( } prompt.push_str( - "## Output\n\n\ - End your response with a clear summary of:\n\ - - What you investigated\n\ - - What you found (if anything)\n\ - - Whether you believe the invariant holds or can be violated\n", + "## Required output\n\n\ + End your response with a JSON code block containing your candidate \ + counterexample. Use exactly this format:\n\n\ + ```json\n{ ... }\n```\n\n\ + The JSON must match the input schema above. If after thorough analysis \ + you believe no violation exists, still provide your best-effort candidate.\n", ); prompt diff --git a/jolt-eval/src/invariant/zk_consistency.rs b/jolt-eval/src/invariant/zk_consistency.rs index 61112b924..5584a6e6e 100644 --- a/jolt-eval/src/invariant/zk_consistency.rs +++ b/jolt-eval/src/invariant/zk_consistency.rs @@ -32,7 +32,7 @@ pub struct ZkConsistencySetup { verifier_preprocessing: VerifierPreprocessing, } -#[derive(Debug, Clone, Arbitrary)] +#[derive(Debug, Clone, Arbitrary, serde::Serialize, serde::Deserialize)] pub struct ZkInputs { pub data: Vec, } diff --git a/jolt-eval/tests/agent_test.rs b/jolt-eval/tests/agent_test.rs index c16068a6c..267fd0c83 100644 --- a/jolt-eval/tests/agent_test.rs +++ b/jolt-eval/tests/agent_test.rs @@ -201,12 +201,12 @@ fn mock_with_diff() { #[test] fn redteam_no_violation_when_invariant_always_passes() { + // Agent produces valid JSON, but the invariant always passes let invariant = AlwaysPassInvariant; - let agent = MockAgent::always_ok("I analyzed the code and found nothing."); - let config = RedTeamConfig { - num_iterations: 3, - num_fuzz_per_iteration: 5, - }; + let agent = MockAgent::always_ok( + "I analyzed the code. Here is my candidate:\n```json\n42\n```", + ); + let config = RedTeamConfig { num_iterations: 3 }; let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); @@ -214,7 +214,7 @@ fn redteam_no_violation_when_invariant_always_passes() { RedTeamResult::NoViolation { attempts } => { assert_eq!(attempts.len(), 3); for a in &attempts { - assert_eq!(a.failure_reason, "Invariant check passed for all inputs"); + assert!(a.failure_reason.contains("did not violate")); } } RedTeamResult::Violation { .. } => { @@ -222,23 +222,25 @@ fn redteam_no_violation_when_invariant_always_passes() { } } - // Agent should have been invoked exactly 3 times assert_eq!(agent.recorded_prompts().len(), 3); } #[test] -fn redteam_finds_violation_immediately_when_invariant_always_fails() { +fn redteam_finds_violation_when_agent_produces_bad_input() { + // AlwaysFailInvariant rejects every input. Agent produces valid JSON. let invariant = AlwaysFailInvariant; - let agent = MockAgent::always_ok("Trying something."); - let config = RedTeamConfig { - num_iterations: 10, - num_fuzz_per_iteration: 0, // seed corpus alone triggers failure - }; + let agent = MockAgent::always_ok( + "I found a bug!\n```json\n99\n```", + ); + let config = RedTeamConfig { num_iterations: 10 }; let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); match result { - RedTeamResult::Violation { error, .. } => { + RedTeamResult::Violation { + input_json, error, .. + } => { + assert_eq!(input_json, "99"); assert!(error.contains("always fails")); } RedTeamResult::NoViolation { .. } => { @@ -246,42 +248,58 @@ fn redteam_finds_violation_immediately_when_invariant_always_fails() { } } - // Should stop after first iteration (found violation) + // Should stop after first iteration assert_eq!(agent.recorded_prompts().len(), 1); } #[test] -fn redteam_finds_violation_via_fuzz_inputs() { +fn redteam_finds_violation_with_targeted_input() { + // FailsOnZeroInvariant only fails for input 0. + // Agent produces exactly 0. let invariant = FailsOnZeroInvariant; - let agent = MockAgent::always_ok("Analyzing..."); - let config = RedTeamConfig { - num_iterations: 3, - // High fuzz count makes it very likely a 0 byte appears - num_fuzz_per_iteration: 1000, - }; + let agent = MockAgent::always_ok("Try zero:\n```json\n0\n```"); + let config = RedTeamConfig { num_iterations: 5 }; let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); - // With 1000 random u8 inputs per iteration, the chance of never hitting 0 - // across 3 iterations is (255/256)^3000 ≈ 0. So we expect a violation. match result { - RedTeamResult::Violation { error, .. } => { + RedTeamResult::Violation { + input_json, error, .. + } => { + assert_eq!(input_json, "0"); assert!(error.contains("zero")); } RedTeamResult::NoViolation { .. } => { - panic!("Expected violation for FailsOnZeroInvariant with high fuzz count"); + panic!("Expected violation for FailsOnZeroInvariant with input 0"); } } } +#[test] +fn redteam_no_violation_when_agent_misses() { + // FailsOnZeroInvariant only fails for 0, but agent guesses 1. + let invariant = FailsOnZeroInvariant; + let agent = MockAgent::always_ok("Trying 1:\n```json\n1\n```"); + let config = RedTeamConfig { num_iterations: 2 }; + + let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); + + match result { + RedTeamResult::NoViolation { attempts } => { + assert_eq!(attempts.len(), 2); + for a in &attempts { + assert!(a.failure_reason.contains("did not violate")); + } + } + _ => panic!("Expected NoViolation since agent never guesses 0"), + } +} + #[test] fn redteam_handles_agent_errors_gracefully() { let invariant = AlwaysPassInvariant; let agent = MockAgent::always_err("network timeout"); - let config = RedTeamConfig { - num_iterations: 3, - num_fuzz_per_iteration: 0, - }; + let config = RedTeamConfig { num_iterations: 3 }; let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); @@ -299,14 +317,51 @@ fn redteam_handles_agent_errors_gracefully() { } } +#[test] +fn redteam_handles_no_json_in_response() { + let invariant = AlwaysPassInvariant; + let agent = MockAgent::always_ok("I looked around but have no candidate to offer."); + let config = RedTeamConfig { num_iterations: 1 }; + + let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); + + match result { + RedTeamResult::NoViolation { attempts } => { + assert_eq!(attempts.len(), 1); + assert!(attempts[0].failure_reason.contains("did not contain a JSON")); + } + _ => panic!("Expected NoViolation"), + } +} + +#[test] +fn redteam_handles_invalid_json_schema() { + // Agent produces JSON, but it doesn't match the Input type (u8) + let invariant = AlwaysPassInvariant; + let agent = MockAgent::always_ok( + "Here:\n```json\n{\"not_a_u8\": true}\n```", + ); + let config = RedTeamConfig { num_iterations: 1 }; + + let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); + + // A deserialization failure is NOT a real violation — it's a BadInput + match result { + RedTeamResult::NoViolation { attempts } => { + assert_eq!(attempts.len(), 1); + assert!(attempts[0].failure_reason.contains("Could not deserialize")); + } + RedTeamResult::Violation { .. } => { + panic!("Parse error should not be treated as a violation"); + } + } +} + #[test] fn redteam_prompt_includes_invariant_description() { let invariant = AlwaysPassInvariant; - let agent = MockAgent::always_ok("ok"); - let config = RedTeamConfig { - num_iterations: 1, - num_fuzz_per_iteration: 0, - }; + let agent = MockAgent::always_ok("```json\n0\n```"); + let config = RedTeamConfig { num_iterations: 1 }; auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); @@ -317,27 +372,32 @@ fn redteam_prompt_includes_invariant_description() { } #[test] -fn redteam_prompt_includes_failed_attempts_after_first_iteration() { +fn redteam_prompt_includes_input_example() { let invariant = AlwaysPassInvariant; - let agent = MockAgent::always_ok("I tried X but it didn't work."); - let config = RedTeamConfig { - num_iterations: 3, - num_fuzz_per_iteration: 0, - }; + let agent = MockAgent::always_ok("```json\n0\n```"); + let config = RedTeamConfig { num_iterations: 1 }; auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); let prompts = agent.recorded_prompts(); - assert_eq!(prompts.len(), 3); + // Prompt should include a JSON example from the seed corpus + assert!(prompts[0].contains("Input format")); + assert!(prompts[0].contains("```json")); +} + +#[test] +fn redteam_prompt_includes_failed_attempts_after_first_iteration() { + let invariant = AlwaysPassInvariant; + let agent = MockAgent::always_ok("Analysis.\n```json\n42\n```"); + let config = RedTeamConfig { num_iterations: 3 }; - // First prompt should NOT contain "Previous Failed Attempts" - assert!(!prompts[0].contains("Previous Failed Attempts")); + auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); - // Second prompt should contain the first attempt's approach - assert!(prompts[1].contains("Previous Failed Attempts")); - assert!(prompts[1].contains("I tried X but it didn't work.")); + let prompts = agent.recorded_prompts(); + assert_eq!(prompts.len(), 3); - // Third prompt should contain both prior attempts + assert!(!prompts[0].contains("Previous failed attempts")); + assert!(prompts[1].contains("Previous failed attempts")); assert!(prompts[2].contains("Iteration 1")); assert!(prompts[2].contains("Iteration 2")); } @@ -346,10 +406,7 @@ fn redteam_prompt_includes_failed_attempts_after_first_iteration() { fn redteam_zero_iterations_returns_immediately() { let invariant = AlwaysPassInvariant; let agent = MockAgent::always_ok("should not be called"); - let config = RedTeamConfig { - num_iterations: 0, - num_fuzz_per_iteration: 0, - }; + let config = RedTeamConfig { num_iterations: 0 }; let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); @@ -368,19 +425,16 @@ fn redteam_mixed_agent_responses() { let invariant = AlwaysPassInvariant; let agent = MockAgent::from_responses(vec![ Ok(AgentResponse { - text: "first try".into(), + text: "first try\n```json\n1\n```".into(), diff: None, }), Err(AgentError::new("transient error")), Ok(AgentResponse { - text: "third try".into(), + text: "third try\n```json\n3\n```".into(), diff: None, }), ]); - let config = RedTeamConfig { - num_iterations: 3, - num_fuzz_per_iteration: 0, - }; + let config = RedTeamConfig { num_iterations: 3 }; let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); @@ -464,14 +518,15 @@ fn custom_harness_plugs_into_auto_redteam() { let harness = FirstSuccessHarness { agents: vec![ Box::new(MockAgent::always_err("agent 1 down")), - Box::new(MockAgent::always_ok("agent 2 found nothing")), + Box::new(MockAgent::always_ok( + "agent 2 found nothing\n```json\n7\n```", + )), ], }; let invariant = AlwaysPassInvariant; let config = RedTeamConfig { num_iterations: 2, - num_fuzz_per_iteration: 0, }; let result = auto_redteam(&invariant, &config, &harness, Path::new("/tmp")); diff --git a/jolt-eval/tests/macro_test.rs b/jolt-eval/tests/macro_test.rs index 66e67d384..0439ed89c 100644 --- a/jolt-eval/tests/macro_test.rs +++ b/jolt-eval/tests/macro_test.rs @@ -35,7 +35,7 @@ impl Invariant for AlwaysPassInvariant { // BoundsCheck: Test + Fuzz only, uses a struct Input type // --------------------------------------------------------------------------- -#[derive(Debug, Clone, jolt_eval::arbitrary::Arbitrary)] +#[derive(Debug, Clone, jolt_eval::arbitrary::Arbitrary, serde::Serialize, serde::Deserialize)] pub struct RangeInput { pub lo: u32, pub hi: u32, From 9b52b86cf2acb23b94312c30dd78a20f418d73ab Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Wed, 1 Apr 2026 10:43:28 -0400 Subject: [PATCH 12/86] cleanup(jolt-eval): remove truncation of agent responses in auto_redteam The full text belongs in FailedAttempt.approach so callers can decide how to display it. Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/src/invariant/synthesis/redteam.rs | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/jolt-eval/src/invariant/synthesis/redteam.rs b/jolt-eval/src/invariant/synthesis/redteam.rs index 34890d677..1c400ea11 100644 --- a/jolt-eval/src/invariant/synthesis/redteam.rs +++ b/jolt-eval/src/invariant/synthesis/redteam.rs @@ -2,7 +2,7 @@ use std::path::Path; use super::super::{extract_json, CheckJsonResult, DynInvariant, FailedAttempt, SynthesisTarget}; use super::SynthesisRegistry; -use crate::agent::{truncate, AgentHarness}; +use crate::agent::AgentHarness; /// Result of a red-team session. pub enum RedTeamResult { @@ -23,9 +23,7 @@ pub struct RedTeamConfig { impl Default for RedTeamConfig { fn default() -> Self { - Self { - num_iterations: 10, - } + Self { num_iterations: 10 } } } @@ -76,14 +74,8 @@ pub fn auto_redteam( } }; - let approach = truncate(&response.text, 2000).to_string(); - tracing::info!( - "Agent response ({} chars): {}...", - approach.len(), - truncate(&approach, 200) - ); - - let Some(json) = extract_json(&response.text) else { + let approach = response.text; + let Some(json) = extract_json(&approach) else { tracing::info!("No JSON found in agent response"); failed_attempts.push(FailedAttempt { description: format!("Iteration {}", iteration + 1), @@ -93,8 +85,6 @@ pub fn auto_redteam( continue; }; - tracing::info!("Extracted JSON input: {}", truncate(&json, 200)); - match invariant.check_json_input(&*setup, &json) { CheckJsonResult::Violation(violation) => { tracing::info!("Counterexample CONFIRMED: {violation}"); From 3f00b1e445eee74a4568268fc086ba1264ddd507 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Wed, 1 Apr 2026 10:52:49 -0400 Subject: [PATCH 13/86] test(jolt-eval): exercise measure_dyn with non-empty objective list Give ConstantObjective a configurable name and direction so multiple instances don't collide in the HashMap. test_measure_objectives now passes two objectives and asserts both values appear in the result. Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/tests/integration.rs | 40 ++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/jolt-eval/tests/integration.rs b/jolt-eval/tests/integration.rs index 0f7c4d431..bd29c2b67 100644 --- a/jolt-eval/tests/integration.rs +++ b/jolt-eval/tests/integration.rs @@ -64,12 +64,14 @@ impl jolt_eval::Invariant for FailingInvariant { /// A trivial objective for testing the framework. struct ConstantObjective { + label: &'static str, value: f64, + direction: Direction, } impl AbstractObjective for ConstantObjective { fn name(&self) -> &str { - "constant" + self.label } fn collect_measurement(&self) -> Result { @@ -77,7 +79,7 @@ impl AbstractObjective for ConstantObjective { } fn direction(&self) -> Direction { - Direction::Minimize + self.direction } } @@ -122,19 +124,35 @@ fn test_synthesis_registry() { #[test] fn test_constant_objective() { - let obj = ConstantObjective { value: 42.0 }; - assert_eq!(obj.name(), "constant"); + let obj = ConstantObjective { + label: "latency", + value: 42.0, + direction: Direction::Minimize, + }; + assert_eq!(obj.name(), "latency"); assert_eq!(obj.collect_measurement().unwrap(), 42.0); assert_eq!(obj.direction(), Direction::Minimize); } #[test] fn test_measure_objectives() { - use jolt_eval::objective::measure_objectives; - - // measure_objectives takes &[Objective], which uses the enum. - // For unit testing we just verify the function signature works - // with an empty slice. - let results = measure_objectives(&[]); - assert!(results.is_empty()); + use jolt_eval::objective::measure_dyn; + + let objectives: Vec> = vec![ + Box::new(ConstantObjective { + label: "prover_time", + value: 3.14, + direction: Direction::Minimize, + }), + Box::new(ConstantObjective { + label: "inline_count", + value: 256.0, + direction: Direction::Maximize, + }), + ]; + + let results = measure_dyn(&objectives); + assert_eq!(results.len(), 2); + assert_eq!(results["prover_time"], 3.14); + assert_eq!(results["inline_count"], 256.0); } From 7fe3d4977016792b917e728d01a8a6b2f47a799e Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Wed, 1 Apr 2026 11:04:12 -0400 Subject: [PATCH 14/86] feat(jolt-eval): use --output-format json --json-schema for structured red-team output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ClaudeCodeAgent::invoke_structured now passes --output-format json and --json-schema to the Claude CLI, so the response is guaranteed to conform to the schema. The CLI envelope's `structured_output` field is extracted directly — no fragile regex/extract_json parsing needed for agents that support it. Key changes: - AgentHarness gains invoke_structured(repo_dir, prompt, schema) with a default that falls back to invoke(). - ClaudeCodeAgent overrides invoke_structured: passes the schema via --json-schema, parses the CLI JSON envelope, extracts structured_output (or result as fallback). - Invariant::Input gains a JsonSchema bound (schemars 0.8); all 6 Input types derive JsonSchema. - DynInvariant gains input_json_schema() → serde_json::Value, backed by schemars::schema_for! in the blanket impl. - auto_redteam builds an envelope schema {"analysis": string, "counterexample": } and calls invoke_structured. Response is parsed as the envelope first; if that fails (agent doesn't support structured output), falls back to extract_json on free-form text. - Prompt updated: tells the agent to respond with {analysis, counterexample} JSON instead of a code block. Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/Cargo.toml | 1 + jolt-eval/src/agent.rs | 162 +++++++++++++----- .../src/invariant/completeness_prover.rs | 2 +- .../src/invariant/completeness_verifier.rs | 2 +- jolt-eval/src/invariant/determinism.rs | 2 +- jolt-eval/src/invariant/mod.rs | 11 +- .../src/invariant/serialization_roundtrip.rs | 2 +- jolt-eval/src/invariant/soundness.rs | 2 +- jolt-eval/src/invariant/synthesis/redteam.rs | 106 ++++++++---- jolt-eval/src/invariant/zk_consistency.rs | 2 +- jolt-eval/tests/agent_test.rs | 71 ++++---- jolt-eval/tests/integration.rs | 4 +- jolt-eval/tests/macro_test.rs | 4 +- 13 files changed, 259 insertions(+), 112 deletions(-) diff --git a/jolt-eval/Cargo.toml b/jolt-eval/Cargo.toml index d751296b0..4dd9779d7 100644 --- a/jolt-eval/Cargo.toml +++ b/jolt-eval/Cargo.toml @@ -24,6 +24,7 @@ tracing-subscriber = { workspace = true } arbitrary = { version = "1", features = ["derive"] } enumset = "1" +schemars = "0.8" inventory = { workspace = true } tempfile = "3" diff --git a/jolt-eval/src/agent.rs b/jolt-eval/src/agent.rs index f2b563498..e7a65304f 100644 --- a/jolt-eval/src/agent.rs +++ b/jolt-eval/src/agent.rs @@ -47,6 +47,23 @@ impl AgentError { /// service without any local isolation. pub trait AgentHarness: Send + Sync { fn invoke(&self, repo_dir: &Path, prompt: &str) -> Result; + + /// Invoke the agent with a JSON Schema constraint on the response. + /// + /// Agents that support structured output (e.g. Claude Code with + /// `--output-format json --json-schema`) should override this to + /// guarantee the response conforms to `schema`. The returned + /// [`AgentResponse::text`] must be the validated JSON string. + /// + /// The default falls back to [`invoke`](Self::invoke). + fn invoke_structured( + &self, + repo_dir: &Path, + prompt: &str, + _schema: &serde_json::Value, + ) -> Result { + self.invoke(repo_dir, prompt) + } } /// Agent implementation that invokes the Claude Code CLI in an isolated @@ -65,30 +82,48 @@ impl ClaudeCodeAgent { } } -impl AgentHarness for ClaudeCodeAgent { - fn invoke(&self, repo_dir: &Path, prompt: &str) -> Result { - // 1. Create worktree - let worktree_dir = create_worktree(repo_dir)?; - tracing::info!("Created worktree at {}", worktree_dir.display()); - - // 2. Run Claude +impl ClaudeCodeAgent { + fn run_cli( + &self, + worktree_dir: &Path, + prompt: &str, + extra_args: &[&str], + ) -> Result { tracing::info!( "Invoking claude (model={}, max_turns={})...", self.model, self.max_turns ); - let result = Command::new("claude") - .current_dir(&worktree_dir) + let mut cmd = Command::new("claude"); + cmd.current_dir(worktree_dir) .arg("-p") .arg(prompt) .arg("--model") .arg(&self.model) .arg("--max-turns") .arg(self.max_turns.to_string()) - .arg("--verbose") - .output(); + .arg("--verbose"); + for arg in extra_args { + cmd.arg(arg); + } + cmd.output().map_err(|e| { + AgentError::new(format!( + "Failed to invoke claude: {e}. \ + Make sure the `claude` CLI is installed and on your PATH. \ + Install via: npm install -g @anthropic-ai/claude-code" + )) + }) + } +} + +impl AgentHarness for ClaudeCodeAgent { + fn invoke(&self, repo_dir: &Path, prompt: &str) -> Result { + let worktree_dir = create_worktree(repo_dir)?; + tracing::info!("Created worktree at {}", worktree_dir.display()); + + let result = self.run_cli(&worktree_dir, prompt, &[]); - // 3. Capture diff before cleanup + // Capture diff before cleanup let diff = Command::new("git") .current_dir(&worktree_dir) .args(["diff", "HEAD"]) @@ -103,42 +138,89 @@ impl AgentHarness for ClaudeCodeAgent { } }); - // 4. Clean up worktree tracing::info!("Cleaning up worktree..."); remove_worktree(repo_dir, &worktree_dir); let _ = std::fs::remove_dir_all(&worktree_dir); - // 5. Parse result - match result { - Ok(output) => { - let stdout = String::from_utf8_lossy(&output.stdout); - let stderr = String::from_utf8_lossy(&output.stderr); - - if !output.status.success() { - tracing::warn!("claude exited with status {}", output.status); - if !stderr.is_empty() { - tracing::warn!("stderr: {}", truncate(&stderr, 500)); - } - } + let output = result?; + let stdout = String::from_utf8_lossy(&output.stdout); + let stderr = String::from_utf8_lossy(&output.stderr); - let text = if stdout.trim().is_empty() { - stderr.to_string() - } else { - stdout.to_string() - }; + if !output.status.success() { + tracing::warn!("claude exited with status {}", output.status); + if !stderr.is_empty() { + tracing::warn!("stderr: {}", truncate(&stderr, 500)); + } + } - if text.trim().is_empty() && diff.is_none() { - return Err(AgentError::new("Agent produced no output")); - } + let text = if stdout.trim().is_empty() { + stderr.to_string() + } else { + stdout.to_string() + }; - Ok(AgentResponse { text, diff }) - } - Err(e) => Err(AgentError::new(format!( - "Failed to invoke claude: {e}. \ - Make sure the `claude` CLI is installed and on your PATH. \ - Install via: npm install -g @anthropic-ai/claude-code" - ))), + if text.trim().is_empty() && diff.is_none() { + return Err(AgentError::new("Agent produced no output")); + } + + Ok(AgentResponse { text, diff }) + } + + fn invoke_structured( + &self, + repo_dir: &Path, + prompt: &str, + schema: &serde_json::Value, + ) -> Result { + let worktree_dir = create_worktree(repo_dir)?; + tracing::info!("Created worktree at {}", worktree_dir.display()); + + let schema_str = serde_json::to_string(schema) + .map_err(|e| AgentError::new(format!("schema serialization: {e}")))?; + + let result = self.run_cli( + &worktree_dir, + prompt, + &["--output-format", "json", "--json-schema", &schema_str], + ); + + tracing::info!("Cleaning up worktree..."); + remove_worktree(repo_dir, &worktree_dir); + let _ = std::fs::remove_dir_all(&worktree_dir); + + let output = result?; + let stdout = String::from_utf8_lossy(&output.stdout); + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(AgentError::new(format!( + "claude exited with status {}: {}", + output.status, + truncate(&stderr, 500) + ))); } + + // Parse the CLI JSON envelope and extract structured_output + let envelope: serde_json::Value = serde_json::from_str(&stdout).map_err(|e| { + AgentError::new(format!("failed to parse CLI JSON envelope: {e}")) + })?; + + let text = if let Some(structured) = envelope.get("structured_output") { + serde_json::to_string(structured) + .map_err(|e| AgentError::new(format!("re-serialize structured_output: {e}")))? + } else if let Some(result) = envelope.get("result") { + match result { + serde_json::Value::String(s) => s.clone(), + other => serde_json::to_string(other) + .map_err(|e| AgentError::new(format!("re-serialize result: {e}")))?, + } + } else { + return Err(AgentError::new( + "CLI JSON envelope contained neither structured_output nor result", + )); + }; + + Ok(AgentResponse { text, diff: None }) } } diff --git a/jolt-eval/src/invariant/completeness_prover.rs b/jolt-eval/src/invariant/completeness_prover.rs index c9d28ae97..16d14627d 100644 --- a/jolt-eval/src/invariant/completeness_prover.rs +++ b/jolt-eval/src/invariant/completeness_prover.rs @@ -26,7 +26,7 @@ pub struct ProverCompletenessSetup { } /// Program inputs for prover completeness testing. -#[derive(Debug, Clone, Arbitrary, serde::Serialize, serde::Deserialize)] +#[derive(Debug, Clone, Arbitrary, serde::Serialize, serde::Deserialize, schemars::JsonSchema)] pub struct ProverInputs { pub data: Vec, } diff --git a/jolt-eval/src/invariant/completeness_verifier.rs b/jolt-eval/src/invariant/completeness_verifier.rs index 8d05cce3f..07d2728db 100644 --- a/jolt-eval/src/invariant/completeness_verifier.rs +++ b/jolt-eval/src/invariant/completeness_verifier.rs @@ -28,7 +28,7 @@ pub struct VerifierCompletenessSetup { } /// Program inputs for completeness testing. -#[derive(Debug, Clone, Arbitrary, serde::Serialize, serde::Deserialize)] +#[derive(Debug, Clone, Arbitrary, serde::Serialize, serde::Deserialize, schemars::JsonSchema)] pub struct ProgramInputs { pub data: Vec, } diff --git a/jolt-eval/src/invariant/determinism.rs b/jolt-eval/src/invariant/determinism.rs index 57ed5e3f3..9e53f61af 100644 --- a/jolt-eval/src/invariant/determinism.rs +++ b/jolt-eval/src/invariant/determinism.rs @@ -25,7 +25,7 @@ pub struct DeterminismSetup { } /// Program inputs for determinism testing. -#[derive(Debug, Clone, Arbitrary, serde::Serialize, serde::Deserialize)] +#[derive(Debug, Clone, Arbitrary, serde::Serialize, serde::Deserialize, schemars::JsonSchema)] pub struct DeterminismInputs { pub data: Vec, } diff --git a/jolt-eval/src/invariant/mod.rs b/jolt-eval/src/invariant/mod.rs index c748cb350..583633300 100644 --- a/jolt-eval/src/invariant/mod.rs +++ b/jolt-eval/src/invariant/mod.rs @@ -13,6 +13,7 @@ use std::sync::Arc; use arbitrary::Arbitrary; use enumset::{EnumSet, EnumSetType}; use rand::RngCore; +use schemars::JsonSchema; use serde::de::DeserializeOwned; use serde::Serialize; @@ -67,7 +68,7 @@ impl InvariantViolation { /// agent can produce counterexamples as JSON. pub trait Invariant: Send + Sync { type Setup: 'static; - type Input: for<'a> Arbitrary<'a> + fmt::Debug + Clone + Serialize + DeserializeOwned; + type Input: for<'a> Arbitrary<'a> + fmt::Debug + Clone + Serialize + DeserializeOwned + JsonSchema; fn name(&self) -> &str; @@ -132,6 +133,9 @@ pub trait DynInvariant: Send + Sync { /// Return a JSON example of the `Input` type (from the seed corpus). fn input_json_example(&self) -> Option; + /// Return the JSON Schema for the `Input` type. + fn input_json_schema(&self) -> serde_json::Value; + /// Create the (type-erased) setup. Expensive — call once and reuse. fn dyn_setup(&self) -> Box; @@ -195,6 +199,11 @@ impl DynInvariant for I { .and_then(|input| serde_json::to_string_pretty(&input).ok()) } + fn input_json_schema(&self) -> serde_json::Value { + let schema = schemars::schema_for!(I::Input); + serde_json::to_value(schema).unwrap() + } + fn dyn_setup(&self) -> Box { Box::new(Invariant::setup(self)) } diff --git a/jolt-eval/src/invariant/serialization_roundtrip.rs b/jolt-eval/src/invariant/serialization_roundtrip.rs index 870e7f123..8c235e24f 100644 --- a/jolt-eval/src/invariant/serialization_roundtrip.rs +++ b/jolt-eval/src/invariant/serialization_roundtrip.rs @@ -27,7 +27,7 @@ pub struct SerializationRoundtripSetup { /// Unit input -- the roundtrip check has no variable input beyond the /// proof generated during setup. -#[derive(Debug, Clone, Arbitrary, serde::Serialize, serde::Deserialize)] +#[derive(Debug, Clone, Arbitrary, serde::Serialize, serde::Deserialize, schemars::JsonSchema)] pub struct RoundtripInput { _dummy: u8, } diff --git a/jolt-eval/src/invariant/soundness.rs b/jolt-eval/src/invariant/soundness.rs index c7c21c932..e18a7ffc0 100644 --- a/jolt-eval/src/invariant/soundness.rs +++ b/jolt-eval/src/invariant/soundness.rs @@ -15,7 +15,7 @@ inventory::submit! { } /// Mutation applied to a serialized proof to test soundness. -#[derive(Debug, Clone, Arbitrary, serde::Serialize, serde::Deserialize)] +#[derive(Debug, Clone, Arbitrary, serde::Serialize, serde::Deserialize, schemars::JsonSchema)] pub struct ProofMutation { pub byte_index: usize, pub new_value: u8, diff --git a/jolt-eval/src/invariant/synthesis/redteam.rs b/jolt-eval/src/invariant/synthesis/redteam.rs index 1c400ea11..0f90b3b65 100644 --- a/jolt-eval/src/invariant/synthesis/redteam.rs +++ b/jolt-eval/src/invariant/synthesis/redteam.rs @@ -1,6 +1,6 @@ use std::path::Path; -use super::super::{extract_json, CheckJsonResult, DynInvariant, FailedAttempt, SynthesisTarget}; +use super::super::{CheckJsonResult, DynInvariant, FailedAttempt, SynthesisTarget}; use super::SynthesisRegistry; use crate::agent::AgentHarness; @@ -30,16 +30,17 @@ impl Default for RedTeamConfig { /// Run an AI red-team session against a single invariant. /// /// Each iteration: -/// 1. Builds a prompt that includes the invariant description, a JSON -/// example of the `Input` type, and past failed attempts. -/// 2. Invokes the agent (via [`AgentHarness`]) to analyze the code and -/// produce a candidate counterexample as a JSON object. -/// 3. Extracts the JSON from the agent's response, deserializes it into -/// the invariant's `Input` type, and runs [`Invariant::check`] on it. +/// 1. Builds a prompt with the invariant description, a JSON example of +/// the `Input` type, and past failed attempts. +/// 2. Derives a JSON Schema for the response envelope (an object with +/// `analysis` and `counterexample` fields) and invokes the agent via +/// [`AgentHarness::invoke_structured`]. Agents that support structured +/// output (e.g. `ClaudeCodeAgent` with `--json-schema`) will guarantee +/// the response conforms; others fall back to free-form text. +/// 3. Parses the `counterexample` from the response, deserializes it into +/// the invariant's `Input` type, and runs `Invariant::check`. /// 4. If the check fails, the counterexample is genuine — return it. /// 5. Otherwise records the failed attempt and continues. -/// -/// The `agent` is responsible for its own isolation (e.g. worktrees). pub fn auto_redteam( invariant: &dyn DynInvariant, config: &RedTeamConfig, @@ -48,6 +49,8 @@ pub fn auto_redteam( ) -> RedTeamResult { let description = invariant.description(); let input_example = invariant.input_json_example(); + let input_schema = invariant.input_json_schema(); + let envelope_schema = build_envelope_schema(&input_schema); let setup = invariant.dyn_setup(); let mut failed_attempts = Vec::new(); @@ -59,9 +62,10 @@ pub fn auto_redteam( invariant.name() ); - let prompt = build_redteam_prompt(&description, input_example.as_deref(), &failed_attempts); + let prompt = + build_redteam_prompt(&description, input_example.as_deref(), &failed_attempts); - let response = match agent.invoke(repo_dir, &prompt) { + let response = match agent.invoke_structured(repo_dir, &prompt, &envelope_schema) { Ok(r) => r, Err(e) => { tracing::warn!("Agent invocation failed: {e}"); @@ -74,32 +78,44 @@ pub fn auto_redteam( } }; - let approach = response.text; - let Some(json) = extract_json(&approach) else { - tracing::info!("No JSON found in agent response"); - failed_attempts.push(FailedAttempt { - description: format!("Iteration {}", iteration + 1), - approach, - failure_reason: "Agent response did not contain a JSON counterexample".to_string(), - }); - continue; + // Parse the structured response envelope. + // Agents using --json-schema return validated JSON directly. + // The fallback path (default invoke_structured) returns free-form + // text, so we try structured parsing first, then extract_json. + let (analysis, counterexample_json) = match parse_envelope(&response.text) { + Some(pair) => pair, + None => { + // Fallback: try to find raw JSON in free-form text + match super::super::extract_json(&response.text) { + Some(json) => (response.text.clone(), json), + None => { + failed_attempts.push(FailedAttempt { + description: format!("Iteration {}", iteration + 1), + approach: response.text, + failure_reason: + "Agent response did not contain a JSON counterexample".to_string(), + }); + continue; + } + } + } }; - match invariant.check_json_input(&*setup, &json) { + match invariant.check_json_input(&*setup, &counterexample_json) { CheckJsonResult::Violation(violation) => { tracing::info!("Counterexample CONFIRMED: {violation}"); return RedTeamResult::Violation { - approach, - input_json: json, + approach: analysis, + input_json: counterexample_json, error: violation.to_string(), }; } CheckJsonResult::Pass => { failed_attempts.push(FailedAttempt { description: format!("Iteration {}", iteration + 1), - approach, + approach: analysis, failure_reason: format!( - "Candidate input did not violate the invariant: {json}" + "Candidate input did not violate the invariant: {counterexample_json}" ), }); } @@ -107,7 +123,7 @@ pub fn auto_redteam( tracing::info!("Agent produced unparseable input: {parse_err}"); failed_attempts.push(FailedAttempt { description: format!("Iteration {}", iteration + 1), - approach, + approach: analysis, failure_reason: format!( "Could not deserialize agent JSON into Input type: {parse_err}" ), @@ -121,6 +137,31 @@ pub fn auto_redteam( } } +/// Build the JSON Schema for the structured response envelope. +/// The agent's response must be `{"analysis": "", "counterexample": }`. +fn build_envelope_schema(input_schema: &serde_json::Value) -> serde_json::Value { + serde_json::json!({ + "type": "object", + "properties": { + "analysis": { + "type": "string", + "description": "Your analysis of the invariant and approach to finding a violation" + }, + "counterexample": input_schema + }, + "required": ["analysis", "counterexample"] + }) +} + +/// Try to parse the response as a structured `{"analysis", "counterexample"}` envelope. +/// Returns `(analysis, counterexample_json)` on success. +fn parse_envelope(text: &str) -> Option<(String, String)> { + let val: serde_json::Value = serde_json::from_str(text).ok()?; + let analysis = val.get("analysis")?.as_str()?.to_string(); + let counterexample = val.get("counterexample")?; + Some((analysis, serde_json::to_string(counterexample).ok()?)) +} + fn build_redteam_prompt( invariant_description: &str, input_example: Option<&str>, @@ -143,7 +184,8 @@ fn build_redteam_prompt( invariant is enforced.\n\ 2. Look for edge cases, off-by-one errors, missing checks, or assumptions \ that could be violated.\n\ - 3. Construct a concrete JSON input that you believe will trigger a violation.\n\ + 3. Construct a concrete counterexample input that you believe will trigger \ + a violation.\n\ 4. The input will be deserialized and checked mechanically — only genuine \ violations count.\n\n", ); @@ -151,7 +193,7 @@ fn build_redteam_prompt( if let Some(example) = input_example { prompt.push_str("## Input format\n\n"); prompt.push_str( - "The counterexample must be a JSON object matching this schema. \ + "The counterexample must be a JSON value matching the schema. \ Here is an example of a valid input:\n\n```json\n", ); prompt.push_str(example); @@ -175,11 +217,9 @@ fn build_redteam_prompt( prompt.push_str( "## Required output\n\n\ - End your response with a JSON code block containing your candidate \ - counterexample. Use exactly this format:\n\n\ - ```json\n{ ... }\n```\n\n\ - The JSON must match the input schema above. If after thorough analysis \ - you believe no violation exists, still provide your best-effort candidate.\n", + Respond with a JSON object containing:\n\ + - `analysis`: your reasoning and what you investigated\n\ + - `counterexample`: the candidate input matching the schema above\n", ); prompt diff --git a/jolt-eval/src/invariant/zk_consistency.rs b/jolt-eval/src/invariant/zk_consistency.rs index 5584a6e6e..f818273ee 100644 --- a/jolt-eval/src/invariant/zk_consistency.rs +++ b/jolt-eval/src/invariant/zk_consistency.rs @@ -32,7 +32,7 @@ pub struct ZkConsistencySetup { verifier_preprocessing: VerifierPreprocessing, } -#[derive(Debug, Clone, Arbitrary, serde::Serialize, serde::Deserialize)] +#[derive(Debug, Clone, Arbitrary, serde::Serialize, serde::Deserialize, schemars::JsonSchema)] pub struct ZkInputs { pub data: Vec, } diff --git a/jolt-eval/tests/agent_test.rs b/jolt-eval/tests/agent_test.rs index 267fd0c83..6a4595a87 100644 --- a/jolt-eval/tests/agent_test.rs +++ b/jolt-eval/tests/agent_test.rs @@ -199,13 +199,19 @@ fn mock_with_diff() { // auto_redteam tests with MockAgent // ========================================================================= +/// Helper: build a structured envelope response string. +fn envelope(analysis: &str, counterexample: impl serde::Serialize) -> String { + serde_json::json!({ + "analysis": analysis, + "counterexample": counterexample, + }) + .to_string() +} + #[test] fn redteam_no_violation_when_invariant_always_passes() { - // Agent produces valid JSON, but the invariant always passes let invariant = AlwaysPassInvariant; - let agent = MockAgent::always_ok( - "I analyzed the code. Here is my candidate:\n```json\n42\n```", - ); + let agent = MockAgent::always_ok(&envelope("I analyzed the code.", 42)); let config = RedTeamConfig { num_iterations: 3 }; let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); @@ -226,12 +232,10 @@ fn redteam_no_violation_when_invariant_always_passes() { } #[test] -fn redteam_finds_violation_when_agent_produces_bad_input() { - // AlwaysFailInvariant rejects every input. Agent produces valid JSON. +fn redteam_finds_violation_with_structured_response() { + // AlwaysFailInvariant rejects every input. let invariant = AlwaysFailInvariant; - let agent = MockAgent::always_ok( - "I found a bug!\n```json\n99\n```", - ); + let agent = MockAgent::always_ok(&envelope("I found a bug!", 99)); let config = RedTeamConfig { num_iterations: 10 }; let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); @@ -248,16 +252,14 @@ fn redteam_finds_violation_when_agent_produces_bad_input() { } } - // Should stop after first iteration assert_eq!(agent.recorded_prompts().len(), 1); } #[test] fn redteam_finds_violation_with_targeted_input() { // FailsOnZeroInvariant only fails for input 0. - // Agent produces exactly 0. let invariant = FailsOnZeroInvariant; - let agent = MockAgent::always_ok("Try zero:\n```json\n0\n```"); + let agent = MockAgent::always_ok(&envelope("Try zero", 0)); let config = RedTeamConfig { num_iterations: 5 }; let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); @@ -277,9 +279,8 @@ fn redteam_finds_violation_with_targeted_input() { #[test] fn redteam_no_violation_when_agent_misses() { - // FailsOnZeroInvariant only fails for 0, but agent guesses 1. let invariant = FailsOnZeroInvariant; - let agent = MockAgent::always_ok("Trying 1:\n```json\n1\n```"); + let agent = MockAgent::always_ok(&envelope("Trying 1", 1)); let config = RedTeamConfig { num_iterations: 2 }; let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); @@ -319,6 +320,7 @@ fn redteam_handles_agent_errors_gracefully() { #[test] fn redteam_handles_no_json_in_response() { + // Agent returns plain text (no envelope, no code block) let invariant = AlwaysPassInvariant; let agent = MockAgent::always_ok("I looked around but have no candidate to offer."); let config = RedTeamConfig { num_iterations: 1 }; @@ -335,17 +337,14 @@ fn redteam_handles_no_json_in_response() { } #[test] -fn redteam_handles_invalid_json_schema() { - // Agent produces JSON, but it doesn't match the Input type (u8) +fn redteam_handles_invalid_counterexample_type() { + // Structured envelope with wrong counterexample type for Input=u8 let invariant = AlwaysPassInvariant; - let agent = MockAgent::always_ok( - "Here:\n```json\n{\"not_a_u8\": true}\n```", - ); + let agent = MockAgent::always_ok(&envelope("Here", "not_a_number")); let config = RedTeamConfig { num_iterations: 1 }; let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); - // A deserialization failure is NOT a real violation — it's a BadInput match result { RedTeamResult::NoViolation { attempts } => { assert_eq!(attempts.len(), 1); @@ -357,10 +356,27 @@ fn redteam_handles_invalid_json_schema() { } } +#[test] +fn redteam_fallback_extracts_json_from_freeform_text() { + // Agent doesn't return structured envelope, but has a code block + let invariant = AlwaysFailInvariant; + let agent = MockAgent::always_ok("Found it!\n```json\n77\n```"); + let config = RedTeamConfig { num_iterations: 1 }; + + let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); + + match result { + RedTeamResult::Violation { input_json, .. } => { + assert_eq!(input_json, "77"); + } + _ => panic!("Expected violation via extract_json fallback"), + } +} + #[test] fn redteam_prompt_includes_invariant_description() { let invariant = AlwaysPassInvariant; - let agent = MockAgent::always_ok("```json\n0\n```"); + let agent = MockAgent::always_ok(&envelope("ok", 0)); let config = RedTeamConfig { num_iterations: 1 }; auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); @@ -374,13 +390,12 @@ fn redteam_prompt_includes_invariant_description() { #[test] fn redteam_prompt_includes_input_example() { let invariant = AlwaysPassInvariant; - let agent = MockAgent::always_ok("```json\n0\n```"); + let agent = MockAgent::always_ok(&envelope("ok", 0)); let config = RedTeamConfig { num_iterations: 1 }; auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); let prompts = agent.recorded_prompts(); - // Prompt should include a JSON example from the seed corpus assert!(prompts[0].contains("Input format")); assert!(prompts[0].contains("```json")); } @@ -388,7 +403,7 @@ fn redteam_prompt_includes_input_example() { #[test] fn redteam_prompt_includes_failed_attempts_after_first_iteration() { let invariant = AlwaysPassInvariant; - let agent = MockAgent::always_ok("Analysis.\n```json\n42\n```"); + let agent = MockAgent::always_ok(&envelope("Tried something", 42)); let config = RedTeamConfig { num_iterations: 3 }; auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); @@ -425,12 +440,12 @@ fn redteam_mixed_agent_responses() { let invariant = AlwaysPassInvariant; let agent = MockAgent::from_responses(vec![ Ok(AgentResponse { - text: "first try\n```json\n1\n```".into(), + text: envelope("first try", 1), diff: None, }), Err(AgentError::new("transient error")), Ok(AgentResponse { - text: "third try\n```json\n3\n```".into(), + text: envelope("third try", 3), diff: None, }), ]); @@ -518,9 +533,7 @@ fn custom_harness_plugs_into_auto_redteam() { let harness = FirstSuccessHarness { agents: vec![ Box::new(MockAgent::always_err("agent 1 down")), - Box::new(MockAgent::always_ok( - "agent 2 found nothing\n```json\n7\n```", - )), + Box::new(MockAgent::always_ok(&envelope("agent 2 found nothing", 7))), ], }; diff --git a/jolt-eval/tests/integration.rs b/jolt-eval/tests/integration.rs index bd29c2b67..592eb8ab0 100644 --- a/jolt-eval/tests/integration.rs +++ b/jolt-eval/tests/integration.rs @@ -141,7 +141,7 @@ fn test_measure_objectives() { let objectives: Vec> = vec![ Box::new(ConstantObjective { label: "prover_time", - value: 3.14, + value: 3.125, direction: Direction::Minimize, }), Box::new(ConstantObjective { @@ -153,6 +153,6 @@ fn test_measure_objectives() { let results = measure_dyn(&objectives); assert_eq!(results.len(), 2); - assert_eq!(results["prover_time"], 3.14); + assert_eq!(results["prover_time"], 3.125); assert_eq!(results["inline_count"], 256.0); } diff --git a/jolt-eval/tests/macro_test.rs b/jolt-eval/tests/macro_test.rs index 0439ed89c..84552be01 100644 --- a/jolt-eval/tests/macro_test.rs +++ b/jolt-eval/tests/macro_test.rs @@ -35,7 +35,9 @@ impl Invariant for AlwaysPassInvariant { // BoundsCheck: Test + Fuzz only, uses a struct Input type // --------------------------------------------------------------------------- -#[derive(Debug, Clone, jolt_eval::arbitrary::Arbitrary, serde::Serialize, serde::Deserialize)] +#[derive( + Debug, Clone, jolt_eval::arbitrary::Arbitrary, serde::Serialize, serde::Deserialize, schemars::JsonSchema, +)] pub struct RangeInput { pub lo: u32, pub hi: u32, From 5cd0271bdfd22ad4caf7532e0b9a637aef0bcfe3 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Wed, 1 Apr 2026 13:25:28 -0400 Subject: [PATCH 15/86] docs(jolt-eval): add Fuzzing section to README, remove Crate structure Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/README.md | 62 ++++++++++++++++++++------------------------- 1 file changed, 28 insertions(+), 34 deletions(-) diff --git a/jolt-eval/README.md b/jolt-eval/README.md index 467912710..713af3947 100644 --- a/jolt-eval/README.md +++ b/jolt-eval/README.md @@ -89,6 +89,34 @@ cargo run --bin check-invariants -- --elf path/to/guest.elf \ --invariant soundness --num-random 100 ``` +### Fuzzing + +The `fuzz` binary runs randomized inputs (via the `Arbitrary` trait) against invariants that include `SynthesisTarget::Fuzz`: + +```bash +# Fuzz all invariants with 1000 random inputs +cargo run --bin fuzz -- --elf path/to/guest.elf --iterations 1000 + +# Fuzz a specific invariant with a time limit +cargo run --bin fuzz -- --elf path/to/guest.elf \ + --invariant soundness --duration 5m + +# List available fuzzable invariants +cargo run --bin fuzz -- --list +``` + +For deeper coverage, the `#[invariant]` macro generates a `_fuzz_check` function suitable for use with `cargo fuzz` / `libfuzzer_sys`: + +```rust +// fuzz/fuzz_targets/soundness.rs +#![no_main] +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &[u8]| { + my_crate::my_soundness_invariant_fuzz_check(data); +}); +``` + ### Measuring objectives ```bash @@ -153,37 +181,3 @@ let seconds = obj.collect_measurement().unwrap(); | **Security review** | Try to find a counterexample to some invariant (via red-team) | -- | | **Optimization** | Ensure all invariants still hold | Maximize an objective function $f(o_1, \ldots, o_n)$ | | **Refactor** | Ensure all invariants still hold | Special case of optimization where the objective captures code quality | - -## Crate structure - -``` -jolt-eval/ - src/ - lib.rs # Type aliases, TestCase, top-level check/measure fns - invariant/ - mod.rs # Invariant trait, DynInvariant, InvariantReport - soundness.rs # Proof mutation fuzzing - completeness_verifier.rs # Honest proof acceptance - completeness_prover.rs # Prover panic detection - determinism.rs # Byte-identical proof comparison - serialization_roundtrip.rs # Serialize/deserialize equality - zk_consistency.rs # ZK mode prove+verify - synthesis/ - mod.rs # SynthesisRegistry - test.rs # #[test] generation - fuzz.rs # libfuzzer_sys target generation - redteam.rs # AI red-team loop with worktree isolation - objective/ - mod.rs # AbstractObjective trait, Objective enum - peak_rss.rs # Peak RSS via sysinfo - prover_time.rs # Wall-clock prover time - proof_size.rs # Serialized proof size - verifier_time.rs # Wall-clock verifier time - guest_cycles.rs # Guest cycle count via tracing - inline_lengths.rs # INLINE instruction count - wrapping_cost.rs # Constraint system size - (OptimizationAttempt type) # in mod.rs - macros/ # #[invariant(targets = [...])] proc macro - bin/ # CLI binaries - tests/ # Framework smoke tests -``` From 4fd724ed1c512915154d312a65cdd1fb1598efd1 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Wed, 1 Apr 2026 13:40:54 -0400 Subject: [PATCH 16/86] feat(jolt-eval): add cargo-fuzz scaffolding and fuzz_invariant() harness Adds fuzz/Cargo.toml and one fuzz target per built-in invariant, each a single-line call to the new `fuzz_invariant(name, data)` library function. Adding a new fuzz target is one file: #![no_main] use libfuzzer_sys::fuzz_target; fuzz_target!(|data: &[u8]| { jolt_eval::invariant::synthesis::fuzz::fuzz_invariant("my_invariant", data); }); Run with: cd jolt-eval && JOLT_FUZZ_ELF=path/to/guest.elf cargo fuzz run soundness fuzz_invariant() loads the ELF from JOLT_FUZZ_ELF, builds the inventory-registered invariant and its setup once (LazyLock), then interprets each fuzz input as raw JSON fed through check_json_input. Also: - Removes the _fuzz_check codegen from the #[invariant] proc macro (redundant now that fuzz targets call the library function directly) - Removes generate_fuzz_target() string-template codegen - Adds SynthesisRegistry::into_invariants() for ownership transfer - Tightens Invariant::Setup to Send + Sync + 'static (needed for the LazyLock cache in fuzz_invariant) Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/fuzz/.gitignore | 4 + jolt-eval/fuzz/Cargo.toml | 54 +++++++ jolt-eval/fuzz/fuzz_targets/determinism.rs | 6 + .../fuzz/fuzz_targets/prover_completeness.rs | 6 + .../fuzz_targets/serialization_roundtrip.rs | 6 + jolt-eval/fuzz/fuzz_targets/soundness.rs | 6 + .../fuzz_targets/verifier_completeness.rs | 6 + jolt-eval/fuzz/fuzz_targets/zk_consistency.rs | 6 + jolt-eval/macros/src/lib.rs | 43 +----- jolt-eval/src/invariant/mod.rs | 6 +- jolt-eval/src/invariant/synthesis/fuzz.rs | 133 +++++++++++++----- jolt-eval/src/invariant/synthesis/mod.rs | 5 + jolt-eval/tests/macro_test.rs | 46 ++---- 13 files changed, 217 insertions(+), 110 deletions(-) create mode 100644 jolt-eval/fuzz/.gitignore create mode 100644 jolt-eval/fuzz/Cargo.toml create mode 100644 jolt-eval/fuzz/fuzz_targets/determinism.rs create mode 100644 jolt-eval/fuzz/fuzz_targets/prover_completeness.rs create mode 100644 jolt-eval/fuzz/fuzz_targets/serialization_roundtrip.rs create mode 100644 jolt-eval/fuzz/fuzz_targets/soundness.rs create mode 100644 jolt-eval/fuzz/fuzz_targets/verifier_completeness.rs create mode 100644 jolt-eval/fuzz/fuzz_targets/zk_consistency.rs diff --git a/jolt-eval/fuzz/.gitignore b/jolt-eval/fuzz/.gitignore new file mode 100644 index 000000000..1a45eee77 --- /dev/null +++ b/jolt-eval/fuzz/.gitignore @@ -0,0 +1,4 @@ +target +corpus +artifacts +coverage diff --git a/jolt-eval/fuzz/Cargo.toml b/jolt-eval/fuzz/Cargo.toml new file mode 100644 index 000000000..a0ba8125b --- /dev/null +++ b/jolt-eval/fuzz/Cargo.toml @@ -0,0 +1,54 @@ +[package] +name = "jolt-eval-fuzz" +version = "0.0.0" +publish = false +edition = "2021" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" +jolt-eval = { path = ".." } + +[[bin]] +name = "soundness" +path = "fuzz_targets/soundness.rs" +test = false +doc = false +bench = false + +[[bin]] +name = "verifier_completeness" +path = "fuzz_targets/verifier_completeness.rs" +test = false +doc = false +bench = false + +[[bin]] +name = "prover_completeness" +path = "fuzz_targets/prover_completeness.rs" +test = false +doc = false +bench = false + +[[bin]] +name = "determinism" +path = "fuzz_targets/determinism.rs" +test = false +doc = false +bench = false + +[[bin]] +name = "serialization_roundtrip" +path = "fuzz_targets/serialization_roundtrip.rs" +test = false +doc = false +bench = false + +[[bin]] +name = "zk_consistency" +path = "fuzz_targets/zk_consistency.rs" +test = false +doc = false +bench = false diff --git a/jolt-eval/fuzz/fuzz_targets/determinism.rs b/jolt-eval/fuzz/fuzz_targets/determinism.rs new file mode 100644 index 000000000..52f5d7e74 --- /dev/null +++ b/jolt-eval/fuzz/fuzz_targets/determinism.rs @@ -0,0 +1,6 @@ +#![no_main] +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &[u8]| { + jolt_eval::invariant::synthesis::fuzz::fuzz_invariant("determinism", data); +}); diff --git a/jolt-eval/fuzz/fuzz_targets/prover_completeness.rs b/jolt-eval/fuzz/fuzz_targets/prover_completeness.rs new file mode 100644 index 000000000..11022c0f6 --- /dev/null +++ b/jolt-eval/fuzz/fuzz_targets/prover_completeness.rs @@ -0,0 +1,6 @@ +#![no_main] +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &[u8]| { + jolt_eval::invariant::synthesis::fuzz::fuzz_invariant("prover_completeness", data); +}); diff --git a/jolt-eval/fuzz/fuzz_targets/serialization_roundtrip.rs b/jolt-eval/fuzz/fuzz_targets/serialization_roundtrip.rs new file mode 100644 index 000000000..b7c0c3a1d --- /dev/null +++ b/jolt-eval/fuzz/fuzz_targets/serialization_roundtrip.rs @@ -0,0 +1,6 @@ +#![no_main] +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &[u8]| { + jolt_eval::invariant::synthesis::fuzz::fuzz_invariant("serialization_roundtrip", data); +}); diff --git a/jolt-eval/fuzz/fuzz_targets/soundness.rs b/jolt-eval/fuzz/fuzz_targets/soundness.rs new file mode 100644 index 000000000..72c1a45af --- /dev/null +++ b/jolt-eval/fuzz/fuzz_targets/soundness.rs @@ -0,0 +1,6 @@ +#![no_main] +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &[u8]| { + jolt_eval::invariant::synthesis::fuzz::fuzz_invariant("soundness", data); +}); diff --git a/jolt-eval/fuzz/fuzz_targets/verifier_completeness.rs b/jolt-eval/fuzz/fuzz_targets/verifier_completeness.rs new file mode 100644 index 000000000..589fa34c6 --- /dev/null +++ b/jolt-eval/fuzz/fuzz_targets/verifier_completeness.rs @@ -0,0 +1,6 @@ +#![no_main] +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &[u8]| { + jolt_eval::invariant::synthesis::fuzz::fuzz_invariant("verifier_completeness", data); +}); diff --git a/jolt-eval/fuzz/fuzz_targets/zk_consistency.rs b/jolt-eval/fuzz/fuzz_targets/zk_consistency.rs new file mode 100644 index 000000000..817641064 --- /dev/null +++ b/jolt-eval/fuzz/fuzz_targets/zk_consistency.rs @@ -0,0 +1,6 @@ +#![no_main] +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &[u8]| { + jolt_eval::invariant::synthesis::fuzz::fuzz_invariant("zk_consistency", data); +}); diff --git a/jolt-eval/macros/src/lib.rs b/jolt-eval/macros/src/lib.rs index 83b5e2745..55220604d 100644 --- a/jolt-eval/macros/src/lib.rs +++ b/jolt-eval/macros/src/lib.rs @@ -6,21 +6,24 @@ use syn::{parse_macro_input, DeriveInput, Ident}; /// Attribute macro for invariant structs. /// -/// Generates test and fuzz harness functions based on the specified targets. +/// Generates test harness and red-team description functions based on +/// the specified targets. /// /// # Usage /// /// ```ignore -/// #[jolt_eval_macros::invariant(targets = [Test, Fuzz, RedTeam])] +/// #[jolt_eval_macros::invariant(targets = [Test, RedTeam])] /// #[derive(Default)] /// pub struct MySoundnessInvariant { ... } /// ``` /// /// Generates: /// - For `Test`: A `#[cfg(test)]` module with seed corpus and random tests -/// - For `Fuzz`: A `fuzz_check` function suitable for `libfuzzer_sys` /// - For `RedTeam`: A `redteam_description` function returning the invariant's description /// +/// For `Fuzz`, use the `fuzz_invariant()` library function in a +/// `fuzz/fuzz_targets/` binary instead — see the fuzz directory. +/// /// The struct must implement `Invariant + Default`. #[proc_macro_attribute] pub fn invariant(attr: TokenStream, item: TokenStream) -> TokenStream { @@ -31,7 +34,6 @@ pub fn invariant(attr: TokenStream, item: TokenStream) -> TokenStream { let targets = parse_targets(attr); let has_test = targets.contains(&"Test".to_string()); - let has_fuzz = targets.contains(&"Fuzz".to_string()); let has_redteam = targets.contains(&"RedTeam".to_string()); let test_block = if has_test { @@ -84,38 +86,6 @@ pub fn invariant(attr: TokenStream, item: TokenStream) -> TokenStream { quote! {} }; - let fuzz_fn_name = Ident::new(&format!("{snake_name}_fuzz_check"), struct_name.span()); - let fuzz_block = if has_fuzz { - quote! { - pub fn #fuzz_fn_name(data: &[u8]) { - use jolt_eval::Invariant; - use std::sync::LazyLock; - - static SETUP: LazyLock<( - #struct_name, - <#struct_name as jolt_eval::Invariant>::Setup, - )> = LazyLock::new(|| { - let invariant = #struct_name::default(); - let setup = invariant.setup(); - (invariant, setup) - }); - - let mut u = jolt_eval::arbitrary::Unstructured::new(data); - if let Ok(input) = < - <#struct_name as jolt_eval::Invariant>::Input - as jolt_eval::arbitrary::Arbitrary - >::arbitrary(&mut u) { - let (invariant, setup) = &*SETUP; - if let Err(e) = invariant.check(setup, input) { - panic!("Invariant '{}' violated: {}", invariant.name(), e); - } - } - } - } - } else { - quote! {} - }; - let redteam_fn_name = Ident::new( &format!("{snake_name}_redteam_description"), struct_name.span(), @@ -136,7 +106,6 @@ pub fn invariant(attr: TokenStream, item: TokenStream) -> TokenStream { #input #test_block - #fuzz_block #redteam_block }; diff --git a/jolt-eval/src/invariant/mod.rs b/jolt-eval/src/invariant/mod.rs index 583633300..67f59d154 100644 --- a/jolt-eval/src/invariant/mod.rs +++ b/jolt-eval/src/invariant/mod.rs @@ -67,7 +67,7 @@ impl InvariantViolation { /// `Arbitrary` for fuzzing, and `Serialize`/`DeserializeOwned` so an AI /// agent can produce counterexamples as JSON. pub trait Invariant: Send + Sync { - type Setup: 'static; + type Setup: Send + Sync + 'static; type Input: for<'a> Arbitrary<'a> + fmt::Debug + Clone + Serialize + DeserializeOwned + JsonSchema; fn name(&self) -> &str; @@ -137,7 +137,7 @@ pub trait DynInvariant: Send + Sync { fn input_json_schema(&self) -> serde_json::Value; /// Create the (type-erased) setup. Expensive — call once and reuse. - fn dyn_setup(&self) -> Box; + fn dyn_setup(&self) -> Box; /// Deserialize a JSON-encoded `Input` and check it against a /// previously-created setup (from [`dyn_setup`]). @@ -204,7 +204,7 @@ impl DynInvariant for I { serde_json::to_value(schema).unwrap() } - fn dyn_setup(&self) -> Box { + fn dyn_setup(&self) -> Box { Box::new(Invariant::setup(self)) } diff --git a/jolt-eval/src/invariant/synthesis/fuzz.rs b/jolt-eval/src/invariant/synthesis/fuzz.rs index 34314708a..df4e3ed14 100644 --- a/jolt-eval/src/invariant/synthesis/fuzz.rs +++ b/jolt-eval/src/invariant/synthesis/fuzz.rs @@ -1,43 +1,108 @@ -use super::super::{DynInvariant, SynthesisTarget}; +use std::sync::Arc; + +use super::super::{registered_invariants, CheckJsonResult, DynInvariant, SynthesisTarget}; use super::SynthesisRegistry; +use crate::TestCase; -/// Generate `libfuzzer_sys` fuzz target source code for a named invariant. +/// Fuzz a named invariant with raw byte data from libfuzzer. +/// +/// `data` is fed through `arbitrary::Unstructured` to produce the +/// invariant's `Input` type, which is then checked against the +/// invariant. Setup is performed once and cached for the process +/// lifetime. +/// +/// Panics on invariant violation (which is what libfuzzer needs to +/// detect a finding). /// -/// The generated code should be placed in a `fuzz/fuzz_targets/` directory -/// and compiled as a separate binary with `cargo fuzz`. -pub fn generate_fuzz_target(_invariant_name: &str, struct_path: &str) -> String { - format!( - r#"#![no_main] -use libfuzzer_sys::fuzz_target; -use arbitrary::{{Arbitrary, Unstructured}}; -use jolt_eval::Invariant; - -// Lazily initialize the invariant and setup (expensive one-time cost) -use std::sync::LazyLock; -static SETUP: LazyLock<({struct_path}, <{struct_path} as Invariant>::Setup)> = LazyLock::new(|| {{ - let invariant = {struct_path}::default(); - let setup = invariant.setup(); - (invariant, setup) -}}); - -fuzz_target!(|data: &[u8]| {{ - let mut u = Unstructured::new(data); - if let Ok(input) = <<{struct_path} as Invariant>::Input as Arbitrary>::arbitrary(&mut u) {{ - let (invariant, setup) = &*SETUP; - // We don't panic on invariant violations during fuzzing -- - // instead we log them. The fuzzer's job is to find inputs - // that trigger violations. - if let Err(e) = invariant.check(setup, input) {{ - eprintln!("INVARIANT VIOLATION: {{}}", e); - panic!("Invariant '{{}}' violated: {{}}", invariant.name(), e); - }} - }} -}}); -"# - ) +/// # Usage in a fuzz target +/// +/// ```ignore +/// #![no_main] +/// use libfuzzer_sys::fuzz_target; +/// fuzz_target!(|data: &[u8]| { +/// jolt_eval::invariant::synthesis::fuzz::fuzz_invariant("soundness", data); +/// }); +/// ``` +/// +/// Set `JOLT_FUZZ_ELF` to the path of a pre-compiled guest ELF before +/// running `cargo fuzz`. +pub fn fuzz_invariant(invariant_name: &str, data: &[u8]) { + use std::any::Any; + use std::sync::LazyLock; + + // One-time: build every invariant and its setup from the ELF. + struct CachedInvariant { + inv: Box, + setup: Box, + } + + static CACHE: LazyLock> = LazyLock::new(|| { + let elf_path = std::env::var("JOLT_FUZZ_ELF") + .expect("Set JOLT_FUZZ_ELF to the path of a compiled guest ELF"); + let elf_bytes = std::fs::read(&elf_path) + .unwrap_or_else(|e| panic!("Failed to read {elf_path}: {e}")); + let memory_config = common::jolt_device::MemoryConfig { + max_input_size: 4096, + max_output_size: 4096, + max_untrusted_advice_size: 0, + max_trusted_advice_size: 0, + stack_size: 65536, + heap_size: 32768, + program_size: None, + }; + let test_case = Arc::new(TestCase { + elf_contents: elf_bytes, + memory_config, + max_trace_length: 65536, + }); + let registry = SynthesisRegistry::from_inventory(test_case, vec![]); + registry + .into_invariants() + .into_iter() + .map(|inv| { + let setup = inv.dyn_setup(); + CachedInvariant { inv, setup } + }) + .collect() + }); + + let cached = CACHE + .iter() + .find(|c| c.inv.name() == invariant_name) + .unwrap_or_else(|| panic!("Invariant '{invariant_name}' not found")); + + // Use the fuzzer-provided bytes to produce an Input via Arbitrary, + // by going through the JSON round-trip: Arbitrary -> serde_json -> check_json_input. + // This is the most direct path that uses the fuzzer's data. + // DynInvariant erases the Input type, so we can't call Arbitrary + // on the concrete type directly. Instead, interpret the fuzz data + // as a raw JSON string and feed it through check_json_input. The + // fuzzer will mutate bytes toward valid JSON that deserializes into + // the Input type — this is the standard "structure-aware via serde" + // fuzzing pattern. + if let Ok(json_str) = std::str::from_utf8(data) { + match cached.inv.check_json_input(&*cached.setup, json_str) { + CheckJsonResult::Violation(e) => { + panic!( + "Invariant '{}' violated: {e}\nInput JSON: {json_str}", + cached.inv.name() + ); + } + CheckJsonResult::Pass | CheckJsonResult::BadInput(_) => {} + } + } } /// List all invariants suitable for fuzz target generation. pub fn fuzzable_invariants(registry: &SynthesisRegistry) -> Vec<&dyn DynInvariant> { registry.for_target(SynthesisTarget::Fuzz) } + +/// Return the names of all `inventory`-registered invariants that +/// include [`SynthesisTarget::Fuzz`]. +pub fn fuzzable_invariant_names() -> Vec<&'static str> { + registered_invariants() + .filter(|e| (e.targets)().contains(SynthesisTarget::Fuzz)) + .map(|e| e.name) + .collect() +} diff --git a/jolt-eval/src/invariant/synthesis/mod.rs b/jolt-eval/src/invariant/synthesis/mod.rs index 1c6d74f70..dc5630096 100644 --- a/jolt-eval/src/invariant/synthesis/mod.rs +++ b/jolt-eval/src/invariant/synthesis/mod.rs @@ -36,6 +36,11 @@ impl SynthesisRegistry { &self.invariants } + /// Consume the registry and return the invariant list. + pub fn into_invariants(self) -> Vec> { + self.invariants + } + /// Return invariants that include the given synthesis target. pub fn for_target(&self, target: SynthesisTarget) -> Vec<&dyn DynInvariant> { self.invariants diff --git a/jolt-eval/tests/macro_test.rs b/jolt-eval/tests/macro_test.rs index 84552be01..85d23bb66 100644 --- a/jolt-eval/tests/macro_test.rs +++ b/jolt-eval/tests/macro_test.rs @@ -2,10 +2,10 @@ use enumset::EnumSet; use jolt_eval::invariant::{Invariant, InvariantViolation, SynthesisTarget}; // --------------------------------------------------------------------------- -// AlwaysPass: exercises all three synthesis targets +// AlwaysPass: exercises Test + RedTeam synthesis targets // --------------------------------------------------------------------------- -#[jolt_eval_macros::invariant(targets = [Test, Fuzz, RedTeam])] +#[jolt_eval_macros::invariant(targets = [Test, RedTeam])] #[derive(Default)] pub struct AlwaysPassInvariant; @@ -32,18 +32,23 @@ impl Invariant for AlwaysPassInvariant { } // --------------------------------------------------------------------------- -// BoundsCheck: Test + Fuzz only, uses a struct Input type +// BoundsCheck: Test only, uses a struct Input type // --------------------------------------------------------------------------- #[derive( - Debug, Clone, jolt_eval::arbitrary::Arbitrary, serde::Serialize, serde::Deserialize, schemars::JsonSchema, + Debug, + Clone, + jolt_eval::arbitrary::Arbitrary, + serde::Serialize, + serde::Deserialize, + schemars::JsonSchema, )] pub struct RangeInput { pub lo: u32, pub hi: u32, } -#[jolt_eval_macros::invariant(targets = [Test, Fuzz])] +#[jolt_eval_macros::invariant(targets = [Test])] #[derive(Default)] pub struct BoundsCheckInvariant; @@ -120,25 +125,6 @@ impl Invariant for RedTeamOnlyInvariant { // Tests that verify the macro-generated functions exist and work correctly // =========================================================================== -// --- Fuzz harness functions --- - -#[test] -fn fuzz_always_pass_with_various_inputs() { - // Generated by #[invariant(targets = [... Fuzz ...])] - always_pass_invariant_fuzz_check(&[]); - always_pass_invariant_fuzz_check(&[0]); - always_pass_invariant_fuzz_check(&[1, 2, 3, 4, 5]); - always_pass_invariant_fuzz_check(&[255; 100]); -} - -#[test] -fn fuzz_bounds_check_with_various_inputs() { - // Needs at least 8 bytes for two u32s - bounds_check_invariant_fuzz_check(&[0u8; 8]); - bounds_check_invariant_fuzz_check(&[0xFF; 8]); - bounds_check_invariant_fuzz_check(&[1, 0, 0, 0, 2, 0, 0, 0]); -} - // --- Red-team description functions --- #[test] @@ -159,18 +145,6 @@ fn redteam_only_description() { ); } -// --- Verify that no fuzz/redteam functions are generated for wrong targets --- -// (These are compile-time checks — if the functions existed, we'd get -// ambiguity or the test would compile when it shouldn't.) - -#[test] -fn redteam_only_has_no_fuzz() { - // RedTeamOnlyInvariant was declared with targets = [RedTeam], - // so `red_team_only_invariant_fuzz_check` should NOT exist. - // We can't assert "function doesn't exist" at runtime, but if this - // file compiles without calling it, the macro correctly omitted it. -} - // --- Synthesized test modules are auto-discovered by nextest --- // The #[test] functions `seed_corpus` and `random_inputs` inside the // generated `*_synthesized` modules are run automatically. We verify From 62e2ff8f7b6a254822c770d1cc07fc1230d9ad9f Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Wed, 1 Apr 2026 14:09:19 -0400 Subject: [PATCH 17/86] feat(jolt-eval): add guest catalog and --guest flag, make TestCase optional Introduces a `guests` module with a fixed catalog of known guest programs (muldiv, fibonacci, sha2, sha3, collatz, alloc), each with its memory config, max trace length, and default inputs extracted from the #[jolt::provable(...)] attributes. `GuestSpec::compile()` invokes `host::Program::new(package).build()` to cross-compile the guest to RISC-V automatically. All 5 CLI binaries now accept `--guest ` (compiles automatically) as the primary interface, with `--elf ` preserved as a fallback for custom ELFs. The shared `guests::resolve_test_case()` helper handles both paths. `InvariantEntry::build` and `ObjectiveEntry::build` now take `Option>` / `Option<&SharedSetup>` respectively, with a `needs_guest: bool` flag. `SynthesisRegistry::from_inventory` and `build_objectives_from_inventory` skip entries that need a guest when `None` is passed. This lays the groundwork for invariants/objectives that operate on lower-level primitives (e.g. polynomial binding) and don't require a compiled guest program. Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/bin/check_invariants.rs | 47 ++--- jolt-eval/bin/fuzz.rs | 57 ++---- jolt-eval/bin/measure_objectives.rs | 53 +++-- jolt-eval/bin/optimize.rs | 55 +++--- jolt-eval/bin/redteam.rs | 57 +++--- jolt-eval/src/agent.rs | 10 +- jolt-eval/src/guests.rs | 183 ++++++++++++++++++ .../src/invariant/completeness_prover.rs | 3 +- .../src/invariant/completeness_verifier.rs | 3 +- jolt-eval/src/invariant/determinism.rs | 3 +- jolt-eval/src/invariant/mod.rs | 27 +-- .../src/invariant/serialization_roundtrip.rs | 3 +- jolt-eval/src/invariant/soundness.rs | 3 +- jolt-eval/src/invariant/synthesis/fuzz.rs | 6 +- jolt-eval/src/invariant/synthesis/mod.rs | 10 +- jolt-eval/src/invariant/synthesis/redteam.rs | 7 +- jolt-eval/src/invariant/zk_consistency.rs | 3 +- jolt-eval/src/lib.rs | 7 +- jolt-eval/src/objective/guest_cycles.rs | 5 +- jolt-eval/src/objective/inline_lengths.rs | 3 +- jolt-eval/src/objective/mod.rs | 9 +- jolt-eval/src/objective/peak_rss.rs | 5 +- jolt-eval/src/objective/proof_size.rs | 5 +- jolt-eval/src/objective/prover_time.rs | 5 +- jolt-eval/src/objective/verifier_time.rs | 5 +- jolt-eval/src/objective/wrapping_cost.rs | 5 +- jolt-eval/tests/agent_test.rs | 67 +++---- jolt-eval/tests/macro_test.rs | 10 +- 28 files changed, 405 insertions(+), 251 deletions(-) create mode 100644 jolt-eval/src/guests.rs diff --git a/jolt-eval/bin/check_invariants.rs b/jolt-eval/bin/check_invariants.rs index 1914486d5..a8b8e328d 100644 --- a/jolt-eval/bin/check_invariants.rs +++ b/jolt-eval/bin/check_invariants.rs @@ -1,16 +1,22 @@ -use std::sync::Arc; - use clap::Parser; use tracing::info; +use jolt_eval::guests; use jolt_eval::invariant::synthesis::{invariant_names, SynthesisRegistry}; use jolt_eval::invariant::{DynInvariant, InvariantReport}; -use jolt_eval::TestCase; #[derive(Parser)] #[command(name = "check-invariants")] #[command(about = "Run Jolt invariant checks")] struct Cli { + /// Guest program to evaluate (e.g. muldiv, fibonacci, sha2) + #[arg(long)] + guest: Option, + + /// Path to a pre-compiled guest ELF (alternative to --guest) + #[arg(long)] + elf: Option, + /// Only run the named invariant (default: all) #[arg(long)] invariant: Option, @@ -19,41 +25,22 @@ struct Cli { #[arg(long, default_value = "10")] num_random: usize, - /// Path to a pre-compiled guest ELF + /// Max trace length override #[arg(long)] - elf: Option, - - /// Max trace length for the test program - #[arg(long, default_value = "65536")] - max_trace_length: usize, + max_trace_length: Option, } fn main() -> eyre::Result<()> { tracing_subscriber::fmt::init(); let cli = Cli::parse(); - let test_case = if let Some(elf_path) = &cli.elf { - let elf_bytes = std::fs::read(elf_path)?; - let memory_config = common::jolt_device::MemoryConfig { - max_input_size: 4096, - max_output_size: 4096, - max_untrusted_advice_size: 0, - max_trusted_advice_size: 0, - stack_size: 65536, - heap_size: 32768, - program_size: None, - }; - Arc::new(TestCase { - elf_contents: elf_bytes, - memory_config, - max_trace_length: cli.max_trace_length, - }) - } else { - eprintln!("Error: --elf is required. Provide a pre-compiled guest ELF."); - std::process::exit(1); - }; + let (test_case, default_inputs) = guests::resolve_test_case( + cli.guest.as_deref(), + cli.elf.as_deref(), + cli.max_trace_length, + ); - let registry = SynthesisRegistry::from_inventory(test_case, vec![]); + let registry = SynthesisRegistry::from_inventory(Some(test_case), default_inputs); let invariants: Vec<&dyn DynInvariant> = if let Some(name) = &cli.invariant { registry diff --git a/jolt-eval/bin/fuzz.rs b/jolt-eval/bin/fuzz.rs index c99cb798e..9155af4a7 100644 --- a/jolt-eval/bin/fuzz.rs +++ b/jolt-eval/bin/fuzz.rs @@ -1,16 +1,23 @@ -use std::sync::Arc; use std::time::{Duration, Instant}; use clap::Parser; +use jolt_eval::guests; use jolt_eval::invariant::synthesis::{invariant_names, SynthesisRegistry}; use jolt_eval::invariant::{DynInvariant, InvariantReport, SynthesisTarget}; -use jolt_eval::TestCase; #[derive(Parser)] #[command(name = "fuzz")] #[command(about = "Fuzz-test Jolt invariants with random inputs")] struct Cli { + /// Guest program to evaluate (e.g. muldiv, fibonacci, sha2) + #[arg(long)] + guest: Option, + + /// Path to a pre-compiled guest ELF (alternative to --guest) + #[arg(long)] + elf: Option, + /// Only fuzz the named invariant (default: all fuzzable) #[arg(long)] invariant: Option, @@ -23,17 +30,9 @@ struct Cli { #[arg(long)] duration: Option, - /// Size of random byte buffer fed to Arbitrary (bytes) - #[arg(long, default_value = "4096")] - input_size: usize, - - /// Path to a pre-compiled guest ELF + /// Max trace length override #[arg(long)] - elf: Option, - - /// Max trace length for the test program - #[arg(long, default_value = "65536")] - max_trace_length: usize, + max_trace_length: Option, /// List available fuzzable invariants and exit #[arg(long)] @@ -52,28 +51,13 @@ fn main() -> eyre::Result<()> { return Ok(()); } - let test_case = if let Some(elf_path) = &cli.elf { - let elf_bytes = std::fs::read(elf_path)?; - let memory_config = common::jolt_device::MemoryConfig { - max_input_size: 4096, - max_output_size: 4096, - max_untrusted_advice_size: 0, - max_trusted_advice_size: 0, - stack_size: 65536, - heap_size: 32768, - program_size: None, - }; - Arc::new(TestCase { - elf_contents: elf_bytes, - memory_config, - max_trace_length: cli.max_trace_length, - }) - } else { - eprintln!("Error: --elf is required. Provide a pre-compiled guest ELF."); - std::process::exit(1); - }; + let (test_case, default_inputs) = guests::resolve_test_case( + cli.guest.as_deref(), + cli.elf.as_deref(), + cli.max_trace_length, + ); - let registry = SynthesisRegistry::from_inventory(test_case, vec![]); + let registry = SynthesisRegistry::from_inventory(Some(test_case), default_inputs); let fuzzable: Vec<&dyn DynInvariant> = if let Some(name) = &cli.invariant { let matches: Vec<_> = registry @@ -105,10 +89,9 @@ fn main() -> eyre::Result<()> { }); println!( - "Fuzzing {} invariant(s), {} iterations, input size {} bytes", + "Fuzzing {} invariant(s), {} iterations", fuzzable.len(), cli.iterations, - cli.input_size, ); if let Some(d) = &cli.duration { println!("Time limit: {d}"); @@ -204,9 +187,7 @@ fn parse_duration(s: &str) -> Option { } else if let Some(n) = s.strip_suffix('m') { n.parse::().ok().map(|m| Duration::from_secs(m * 60)) } else if let Some(n) = s.strip_suffix('h') { - n.parse::() - .ok() - .map(|h| Duration::from_secs(h * 3600)) + n.parse::().ok().map(|h| Duration::from_secs(h * 3600)) } else { s.parse::().ok().map(Duration::from_secs) } diff --git a/jolt-eval/bin/measure_objectives.rs b/jolt-eval/bin/measure_objectives.rs index 67ae35731..365322f54 100644 --- a/jolt-eval/bin/measure_objectives.rs +++ b/jolt-eval/bin/measure_objectives.rs @@ -1,12 +1,21 @@ use clap::Parser; +use jolt_eval::guests; use jolt_eval::objective::{build_objectives_from_inventory, registered_objectives}; -use jolt_eval::{SharedSetup, TestCase}; +use jolt_eval::SharedSetup; #[derive(Parser)] #[command(name = "measure-objectives")] #[command(about = "Measure Jolt performance objectives")] struct Cli { + /// Guest program to evaluate (e.g. muldiv, fibonacci, sha2) + #[arg(long)] + guest: Option, + + /// Path to a pre-compiled guest ELF (alternative to --guest) + #[arg(long)] + elf: Option, + /// Only measure the named objective (default: all) #[arg(long)] objective: Option, @@ -15,42 +24,23 @@ struct Cli { #[arg(long)] samples: Option, - /// Path to a pre-compiled guest ELF + /// Max trace length override #[arg(long)] - elf: Option, - - /// Max trace length for the test program - #[arg(long, default_value = "65536")] - max_trace_length: usize, + max_trace_length: Option, } fn main() -> eyre::Result<()> { tracing_subscriber::fmt::init(); let cli = Cli::parse(); - let test_case = if let Some(elf_path) = &cli.elf { - let elf_bytes = std::fs::read(elf_path)?; - let memory_config = common::jolt_device::MemoryConfig { - max_input_size: 4096, - max_output_size: 4096, - max_untrusted_advice_size: 0, - max_trusted_advice_size: 0, - stack_size: 65536, - heap_size: 32768, - program_size: None, - }; - TestCase { - elf_contents: elf_bytes, - memory_config, - max_trace_length: cli.max_trace_length, - } - } else { - eprintln!("Error: --elf is required. Provide a pre-compiled guest ELF."); - std::process::exit(1); - }; + let (test_case, default_inputs) = guests::resolve_test_case( + cli.guest.as_deref(), + cli.elf.as_deref(), + cli.max_trace_length, + ); - let setup = SharedSetup::new(test_case); - let objectives = build_objectives_from_inventory(&setup, vec![]); + let setup = SharedSetup::new_from_arc(test_case); + let objectives = build_objectives_from_inventory(Some(&setup), default_inputs); let filtered: Vec<_> = if let Some(name) = &cli.objective { objectives @@ -63,7 +53,10 @@ fn main() -> eyre::Result<()> { if filtered.is_empty() { let all_names: Vec<_> = registered_objectives().map(|e| e.name).collect(); - eprintln!("No matching objectives. Available: {}", all_names.join(", ")); + eprintln!( + "No matching objectives. Available: {}", + all_names.join(", ") + ); std::process::exit(1); } diff --git a/jolt-eval/bin/optimize.rs b/jolt-eval/bin/optimize.rs index 5c92be781..3943490c1 100644 --- a/jolt-eval/bin/optimize.rs +++ b/jolt-eval/bin/optimize.rs @@ -1,21 +1,29 @@ use std::collections::HashMap; use std::process::Command; -use std::sync::Arc; use clap::Parser; use jolt_eval::agent::ClaudeCodeAgent; +use jolt_eval::guests; use jolt_eval::invariant::synthesis::SynthesisRegistry; use jolt_eval::objective::optimize::{auto_optimize, OptimizeConfig, OptimizeEnv}; use jolt_eval::objective::{ build_objectives_from_inventory, measure_dyn, AbstractObjective, Direction, }; -use jolt_eval::{SharedSetup, TestCase}; +use jolt_eval::SharedSetup; #[derive(Parser)] #[command(name = "optimize")] #[command(about = "AI-driven optimization of Jolt objectives")] struct Cli { + /// Guest program to evaluate (e.g. muldiv, fibonacci, sha2) + #[arg(long)] + guest: Option, + + /// Path to a pre-compiled guest ELF (alternative to --guest) + #[arg(long)] + elf: Option, + /// Objectives to optimize (comma-separated). Default: all. #[arg(long)] objectives: Option, @@ -28,14 +36,6 @@ struct Cli { #[arg(long, default_value = "claude-sonnet-4-20250514")] model: String, - /// Path to a pre-compiled guest ELF - #[arg(long)] - elf: String, - - /// Max trace length for the test program - #[arg(long, default_value = "65536")] - max_trace_length: usize, - /// Maximum number of Claude agentic turns per iteration #[arg(long, default_value = "30")] max_turns: usize, @@ -43,6 +43,10 @@ struct Cli { /// Extra context to include in the optimization prompt #[arg(long)] hint: Option, + + /// Max trace length override + #[arg(long)] + max_trace_length: Option, } struct RealEnv { @@ -102,24 +106,14 @@ fn main() -> eyre::Result<()> { tracing_subscriber::fmt::init(); let cli = Cli::parse(); - let elf_bytes = std::fs::read(&cli.elf)?; - let memory_config = common::jolt_device::MemoryConfig { - max_input_size: 4096, - max_output_size: 4096, - max_untrusted_advice_size: 0, - max_trusted_advice_size: 0, - stack_size: 65536, - heap_size: 32768, - program_size: None, - }; - let test_case = TestCase { - elf_contents: elf_bytes, - memory_config, - max_trace_length: cli.max_trace_length, - }; + let (test_case, default_inputs) = guests::resolve_test_case( + cli.guest.as_deref(), + cli.elf.as_deref(), + cli.max_trace_length, + ); - let setup = SharedSetup::new(test_case); - let all_objectives = build_objectives_from_inventory(&setup, vec![]); + let setup = SharedSetup::new_from_arc(test_case.clone()); + let all_objectives = build_objectives_from_inventory(Some(&setup), default_inputs.clone()); let all_names: Vec = all_objectives .iter() .map(|o| o.name().to_string()) @@ -147,12 +141,7 @@ fn main() -> eyre::Result<()> { std::process::exit(1); } - let test_case2 = TestCase { - elf_contents: std::fs::read(&cli.elf)?, - memory_config, - max_trace_length: cli.max_trace_length, - }; - let registry = SynthesisRegistry::from_inventory(Arc::new(test_case2), vec![]); + let registry = SynthesisRegistry::from_inventory(Some(test_case), default_inputs); let repo_dir = std::env::current_dir()?; let mut env = RealEnv { diff --git a/jolt-eval/bin/redteam.rs b/jolt-eval/bin/redteam.rs index 85f5b8ae5..344ff3a56 100644 --- a/jolt-eval/bin/redteam.rs +++ b/jolt-eval/bin/redteam.rs @@ -1,18 +1,24 @@ -use std::sync::Arc; - use clap::Parser; use tracing::info; use jolt_eval::agent::ClaudeCodeAgent; +use jolt_eval::guests; use jolt_eval::invariant::synthesis::redteam::{auto_redteam, RedTeamConfig, RedTeamResult}; use jolt_eval::invariant::synthesis::{invariant_names, SynthesisRegistry}; use jolt_eval::invariant::SynthesisTarget; -use jolt_eval::TestCase; #[derive(Parser)] #[command(name = "redteam")] #[command(about = "AI-driven red team testing of Jolt invariants")] struct Cli { + /// Guest program to evaluate (e.g. muldiv, fibonacci, sha2) + #[arg(long)] + guest: Option, + + /// Path to a pre-compiled guest ELF (alternative to --guest) + #[arg(long)] + elf: Option, + /// Name of the invariant to test #[arg(long)] invariant: String, @@ -25,18 +31,14 @@ struct Cli { #[arg(long, default_value = "claude-sonnet-4-20250514")] model: String, - /// Path to a pre-compiled guest ELF - #[arg(long)] - elf: String, - - /// Max trace length for the test program - #[arg(long, default_value = "65536")] - max_trace_length: usize, - /// Maximum number of Claude agentic turns per iteration #[arg(long, default_value = "30")] max_turns: usize, + /// Max trace length override + #[arg(long)] + max_trace_length: Option, + /// List available red-teamable invariants and exit #[arg(long)] list: bool, @@ -54,23 +56,13 @@ fn main() -> eyre::Result<()> { return Ok(()); } - let elf_bytes = std::fs::read(&cli.elf)?; - let memory_config = common::jolt_device::MemoryConfig { - max_input_size: 4096, - max_output_size: 4096, - max_untrusted_advice_size: 0, - max_trusted_advice_size: 0, - stack_size: 65536, - heap_size: 32768, - program_size: None, - }; - let test_case = Arc::new(TestCase { - elf_contents: elf_bytes, - memory_config, - max_trace_length: cli.max_trace_length, - }); + let (test_case, default_inputs) = guests::resolve_test_case( + cli.guest.as_deref(), + cli.elf.as_deref(), + cli.max_trace_length, + ); - let registry = SynthesisRegistry::from_inventory(test_case, vec![]); + let registry = SynthesisRegistry::from_inventory(Some(test_case), default_inputs); let invariant = registry .for_target(SynthesisTarget::RedTeam) @@ -78,7 +70,10 @@ fn main() -> eyre::Result<()> { .find(|inv| inv.name() == cli.invariant.as_str()); let Some(invariant) = invariant else { - eprintln!("Invariant '{}' not found or not red-teamable.", cli.invariant); + eprintln!( + "Invariant '{}' not found or not red-teamable.", + cli.invariant + ); eprintln!("Run with --list to see available invariants."); std::process::exit(1); }; @@ -86,7 +81,6 @@ fn main() -> eyre::Result<()> { let config = RedTeamConfig { num_iterations: cli.iterations, }; - let agent = ClaudeCodeAgent::new(&cli.model, cli.max_turns); let repo_dir = std::env::current_dir()?; @@ -112,10 +106,7 @@ fn main() -> eyre::Result<()> { } RedTeamResult::NoViolation { attempts } => { println!(); - println!( - "No violations found after {} iterations.", - attempts.len() - ); + println!("No violations found after {} iterations.", attempts.len()); for attempt in &attempts { println!( " {}: {} -- {}", diff --git a/jolt-eval/src/agent.rs b/jolt-eval/src/agent.rs index e7a65304f..6e1270522 100644 --- a/jolt-eval/src/agent.rs +++ b/jolt-eval/src/agent.rs @@ -201,9 +201,8 @@ impl AgentHarness for ClaudeCodeAgent { } // Parse the CLI JSON envelope and extract structured_output - let envelope: serde_json::Value = serde_json::from_str(&stdout).map_err(|e| { - AgentError::new(format!("failed to parse CLI JSON envelope: {e}")) - })?; + let envelope: serde_json::Value = serde_json::from_str(&stdout) + .map_err(|e| AgentError::new(format!("failed to parse CLI JSON envelope: {e}")))?; let text = if let Some(structured) = envelope.get("structured_output") { serde_json::to_string(structured) @@ -308,10 +307,7 @@ impl MockAgent { pub fn always_ok(text: &str) -> Self { let text = text.to_string(); Self { - responses: std::sync::Mutex::new(vec![Ok(AgentResponse { - text, - diff: None, - })]), + responses: std::sync::Mutex::new(vec![Ok(AgentResponse { text, diff: None })]), prompts: std::sync::Mutex::new(Vec::new()), } } diff --git a/jolt-eval/src/guests.rs b/jolt-eval/src/guests.rs new file mode 100644 index 000000000..3f496ce14 --- /dev/null +++ b/jolt-eval/src/guests.rs @@ -0,0 +1,183 @@ +use std::sync::Arc; + +use common::jolt_device::MemoryConfig; + +use crate::TestCase; + +/// A known guest program that jolt-eval can compile and run. +pub struct GuestSpec { + /// Cargo package name of the guest crate (e.g. "muldiv-guest"). + pub package: &'static str, + /// Short name used in CLI `--guest` flags. + pub name: &'static str, + pub heap_size: u64, + pub stack_size: u64, + pub max_input_size: u64, + pub max_output_size: u64, + pub max_trace_length: usize, + /// Default inputs to serialize and pass to the guest program. + pub default_inputs: fn() -> Vec, +} + +impl GuestSpec { + pub fn memory_config(&self) -> MemoryConfig { + MemoryConfig { + max_input_size: self.max_input_size, + max_output_size: self.max_output_size, + max_untrusted_advice_size: 0, + max_trusted_advice_size: 0, + stack_size: self.stack_size, + heap_size: self.heap_size, + program_size: None, + } + } + + /// Compile the guest and return a `TestCase`. + /// + /// Invokes the `jolt` CLI to cross-compile the guest crate to + /// RISC-V, then wraps the resulting ELF bytes in a `TestCase`. + pub fn compile(&self, target_dir: &str) -> TestCase { + let mut program = jolt_core::host::Program::new(self.package); + program.set_memory_config(self.memory_config()); + program.build(target_dir); + let elf_bytes = program + .get_elf_contents() + .expect("guest ELF not found after build"); + TestCase { + elf_contents: elf_bytes, + memory_config: self.memory_config(), + max_trace_length: self.max_trace_length, + } + } +} + +/// The fixed catalog of guest programs available for evaluation. +/// +/// Modeled after the benchmark suite in `jolt-core/benches/e2e_profiling.rs`. +/// Each entry carries the memory config and default inputs extracted from +/// the `#[jolt::provable(...)]` attributes in the guest crate. +pub static GUESTS: &[GuestSpec] = &[ + GuestSpec { + package: "muldiv-guest", + name: "muldiv", + heap_size: 32768, + stack_size: 4096, + max_input_size: 4096, + max_output_size: 4096, + max_trace_length: 65536, + default_inputs: || postcard::to_stdvec(&(12031293u32, 17u32, 92u32)).unwrap(), + }, + GuestSpec { + package: "fibonacci-guest", + name: "fibonacci", + heap_size: 32768, + stack_size: 4096, + max_input_size: 4096, + max_output_size: 4096, + max_trace_length: 65536, + default_inputs: || postcard::to_stdvec(&100u32).unwrap(), + }, + GuestSpec { + package: "sha2-guest", + name: "sha2", + heap_size: 32768, + stack_size: 4096, + max_input_size: 4096, + max_output_size: 4096, + max_trace_length: 65536, + default_inputs: || postcard::to_stdvec(&vec![5u8; 32]).unwrap(), + }, + GuestSpec { + package: "sha3-guest", + name: "sha3", + heap_size: 32768, + stack_size: 4096, + max_input_size: 4096, + max_output_size: 4096, + max_trace_length: 65536, + default_inputs: || postcard::to_stdvec(&vec![5u8; 32]).unwrap(), + }, + GuestSpec { + package: "collatz-guest", + name: "collatz", + heap_size: 32768, + stack_size: 4096, + max_input_size: 4096, + max_output_size: 4096, + max_trace_length: 1048576, + default_inputs: || postcard::to_stdvec(&19u32).unwrap(), + }, + GuestSpec { + package: "alloc-guest", + name: "alloc", + heap_size: 32768, + stack_size: 4096, + max_input_size: 4096, + max_output_size: 4096, + max_trace_length: 65536, + default_inputs: Vec::new, + }, +]; + +/// Look up a guest by its short name. +pub fn find_guest(name: &str) -> Option<&'static GuestSpec> { + GUESTS.iter().find(|g| g.name == name) +} + +/// Return the short names of all known guests. +pub fn guest_names() -> Vec<&'static str> { + GUESTS.iter().map(|g| g.name).collect() +} + +/// Resolve a `TestCase` from either `--guest ` or `--elf `. +/// +/// If `guest` is `Some`, compiles the named guest. If `elf` is `Some`, +/// reads the ELF from disk with a default memory config. Exits the +/// process with a helpful message if neither is provided. +pub fn resolve_test_case( + guest: Option<&str>, + elf: Option<&str>, + max_trace_length_override: Option, +) -> (Arc, Vec) { + if let Some(name) = guest { + let spec = find_guest(name).unwrap_or_else(|| { + eprintln!( + "Unknown guest '{name}'. Available: {}", + guest_names().join(", ") + ); + std::process::exit(1); + }); + let mut tc = spec.compile("/tmp/jolt-guest-targets"); + if let Some(mtl) = max_trace_length_override { + tc.max_trace_length = mtl; + } + let inputs = (spec.default_inputs)(); + (Arc::new(tc), inputs) + } else if let Some(path) = elf { + let elf_bytes = std::fs::read(path).unwrap_or_else(|e| { + eprintln!("Failed to read ELF {path}: {e}"); + std::process::exit(1); + }); + let tc = TestCase { + elf_contents: elf_bytes, + memory_config: MemoryConfig { + max_input_size: 4096, + max_output_size: 4096, + max_untrusted_advice_size: 0, + max_trusted_advice_size: 0, + stack_size: 65536, + heap_size: 32768, + program_size: None, + }, + max_trace_length: max_trace_length_override.unwrap_or(65536), + }; + (Arc::new(tc), vec![]) + } else { + eprintln!( + "Provide either --guest or --elf .\n\ + Available guests: {}", + guest_names().join(", ") + ); + std::process::exit(1); + } +} diff --git a/jolt-eval/src/invariant/completeness_prover.rs b/jolt-eval/src/invariant/completeness_prover.rs index 16d14627d..39acc4f82 100644 --- a/jolt-eval/src/invariant/completeness_prover.rs +++ b/jolt-eval/src/invariant/completeness_prover.rs @@ -10,7 +10,8 @@ inventory::submit! { InvariantEntry { name: "prover_completeness", targets: || SynthesisTarget::Test | SynthesisTarget::Fuzz, - build: |tc, _inputs| Box::new(ProverCompletenessInvariant::new(tc)), + needs_guest: true, + build: |tc, _inputs| Box::new(ProverCompletenessInvariant::new(tc.unwrap())), } } diff --git a/jolt-eval/src/invariant/completeness_verifier.rs b/jolt-eval/src/invariant/completeness_verifier.rs index 07d2728db..89df6f626 100644 --- a/jolt-eval/src/invariant/completeness_verifier.rs +++ b/jolt-eval/src/invariant/completeness_verifier.rs @@ -10,7 +10,8 @@ inventory::submit! { InvariantEntry { name: "verifier_completeness", targets: || SynthesisTarget::Test | SynthesisTarget::Fuzz, - build: |tc, _inputs| Box::new(VerifierCompletenessInvariant::new(tc)), + needs_guest: true, + build: |tc, _inputs| Box::new(VerifierCompletenessInvariant::new(tc.unwrap())), } } diff --git a/jolt-eval/src/invariant/determinism.rs b/jolt-eval/src/invariant/determinism.rs index 9e53f61af..993ebb140 100644 --- a/jolt-eval/src/invariant/determinism.rs +++ b/jolt-eval/src/invariant/determinism.rs @@ -10,7 +10,8 @@ inventory::submit! { InvariantEntry { name: "determinism", targets: || SynthesisTarget::Test | SynthesisTarget::Fuzz, - build: |tc, _inputs| Box::new(DeterminismInvariant::new(tc)), + needs_guest: true, + build: |tc, _inputs| Box::new(DeterminismInvariant::new(tc.unwrap())), } } diff --git a/jolt-eval/src/invariant/mod.rs b/jolt-eval/src/invariant/mod.rs index 67f59d154..277ec76f8 100644 --- a/jolt-eval/src/invariant/mod.rs +++ b/jolt-eval/src/invariant/mod.rs @@ -68,7 +68,12 @@ impl InvariantViolation { /// agent can produce counterexamples as JSON. pub trait Invariant: Send + Sync { type Setup: Send + Sync + 'static; - type Input: for<'a> Arbitrary<'a> + fmt::Debug + Clone + Serialize + DeserializeOwned + JsonSchema; + type Input: for<'a> Arbitrary<'a> + + fmt::Debug + + Clone + + Serialize + + DeserializeOwned + + JsonSchema; fn name(&self) -> &str; @@ -94,10 +99,16 @@ pub trait Invariant: Send + Sync { /// Each built-in invariant module calls `inventory::submit!` with one of /// these, so all invariants are discoverable at runtime without manual /// registration. +/// Factory function type for constructing an invariant from an optional +/// test case and default inputs. +pub type InvariantBuildFn = fn(Option>, Vec) -> Box; + pub struct InvariantEntry { pub name: &'static str, pub targets: fn() -> EnumSet, - pub build: fn(Arc, Vec) -> Box, + /// Whether this invariant requires a compiled guest program. + pub needs_guest: bool, + pub build: InvariantBuildFn, } inventory::collect!(InvariantEntry); @@ -141,11 +152,7 @@ pub trait DynInvariant: Send + Sync { /// Deserialize a JSON-encoded `Input` and check it against a /// previously-created setup (from [`dyn_setup`]). - fn check_json_input( - &self, - setup: &dyn Any, - json: &str, - ) -> CheckJsonResult; + fn check_json_input(&self, setup: &dyn Any, json: &str) -> CheckJsonResult; } /// Outcome of [`DynInvariant::check_json_input`]. @@ -208,11 +215,7 @@ impl DynInvariant for I { Box::new(Invariant::setup(self)) } - fn check_json_input( - &self, - setup: &dyn Any, - json: &str, - ) -> CheckJsonResult { + fn check_json_input(&self, setup: &dyn Any, json: &str) -> CheckJsonResult { let setup = setup .downcast_ref::() .expect("DynInvariant::check_json_input called with wrong setup type"); diff --git a/jolt-eval/src/invariant/serialization_roundtrip.rs b/jolt-eval/src/invariant/serialization_roundtrip.rs index 8c235e24f..9210e06fa 100644 --- a/jolt-eval/src/invariant/serialization_roundtrip.rs +++ b/jolt-eval/src/invariant/serialization_roundtrip.rs @@ -10,7 +10,8 @@ inventory::submit! { InvariantEntry { name: "serialization_roundtrip", targets: || { SynthesisTarget::Test.into() }, - build: |tc, inputs| Box::new(SerializationRoundtripInvariant::new(tc, inputs)), + needs_guest: true, + build: |tc, inputs| Box::new(SerializationRoundtripInvariant::new(tc.unwrap(), inputs)), } } diff --git a/jolt-eval/src/invariant/soundness.rs b/jolt-eval/src/invariant/soundness.rs index e18a7ffc0..80e7eb7d3 100644 --- a/jolt-eval/src/invariant/soundness.rs +++ b/jolt-eval/src/invariant/soundness.rs @@ -10,7 +10,8 @@ inventory::submit! { InvariantEntry { name: "soundness", targets: || SynthesisTarget::Test | SynthesisTarget::Fuzz | SynthesisTarget::RedTeam, - build: |tc, inputs| Box::new(SoundnessInvariant::new(tc, inputs)), + needs_guest: true, + build: |tc, inputs| Box::new(SoundnessInvariant::new(tc.unwrap(), inputs)), } } diff --git a/jolt-eval/src/invariant/synthesis/fuzz.rs b/jolt-eval/src/invariant/synthesis/fuzz.rs index df4e3ed14..8fc27b100 100644 --- a/jolt-eval/src/invariant/synthesis/fuzz.rs +++ b/jolt-eval/src/invariant/synthesis/fuzz.rs @@ -39,8 +39,8 @@ pub fn fuzz_invariant(invariant_name: &str, data: &[u8]) { static CACHE: LazyLock> = LazyLock::new(|| { let elf_path = std::env::var("JOLT_FUZZ_ELF") .expect("Set JOLT_FUZZ_ELF to the path of a compiled guest ELF"); - let elf_bytes = std::fs::read(&elf_path) - .unwrap_or_else(|e| panic!("Failed to read {elf_path}: {e}")); + let elf_bytes = + std::fs::read(&elf_path).unwrap_or_else(|e| panic!("Failed to read {elf_path}: {e}")); let memory_config = common::jolt_device::MemoryConfig { max_input_size: 4096, max_output_size: 4096, @@ -55,7 +55,7 @@ pub fn fuzz_invariant(invariant_name: &str, data: &[u8]) { memory_config, max_trace_length: 65536, }); - let registry = SynthesisRegistry::from_inventory(test_case, vec![]); + let registry = SynthesisRegistry::from_inventory(Some(test_case), vec![]); registry .into_invariants() .into_iter() diff --git a/jolt-eval/src/invariant/synthesis/mod.rs b/jolt-eval/src/invariant/synthesis/mod.rs index dc5630096..70c875090 100644 --- a/jolt-eval/src/invariant/synthesis/mod.rs +++ b/jolt-eval/src/invariant/synthesis/mod.rs @@ -20,10 +20,16 @@ impl SynthesisRegistry { } /// Build a registry from all `inventory`-registered invariants. - pub fn from_inventory(test_case: Arc, default_inputs: Vec) -> Self { + /// + /// Pass `None` to include only invariants that don't require a guest + /// program (those with `needs_guest: false`). + pub fn from_inventory(test_case: Option>, default_inputs: Vec) -> Self { let mut registry = Self::new(); for entry in registered_invariants() { - registry.register((entry.build)(Arc::clone(&test_case), default_inputs.clone())); + if entry.needs_guest && test_case.is_none() { + continue; + } + registry.register((entry.build)(test_case.clone(), default_inputs.clone())); } registry } diff --git a/jolt-eval/src/invariant/synthesis/redteam.rs b/jolt-eval/src/invariant/synthesis/redteam.rs index 0f90b3b65..133d6b4f6 100644 --- a/jolt-eval/src/invariant/synthesis/redteam.rs +++ b/jolt-eval/src/invariant/synthesis/redteam.rs @@ -62,8 +62,7 @@ pub fn auto_redteam( invariant.name() ); - let prompt = - build_redteam_prompt(&description, input_example.as_deref(), &failed_attempts); + let prompt = build_redteam_prompt(&description, input_example.as_deref(), &failed_attempts); let response = match agent.invoke_structured(repo_dir, &prompt, &envelope_schema) { Ok(r) => r, @@ -92,8 +91,8 @@ pub fn auto_redteam( failed_attempts.push(FailedAttempt { description: format!("Iteration {}", iteration + 1), approach: response.text, - failure_reason: - "Agent response did not contain a JSON counterexample".to_string(), + failure_reason: "Agent response did not contain a JSON counterexample" + .to_string(), }); continue; } diff --git a/jolt-eval/src/invariant/zk_consistency.rs b/jolt-eval/src/invariant/zk_consistency.rs index f818273ee..e0aded252 100644 --- a/jolt-eval/src/invariant/zk_consistency.rs +++ b/jolt-eval/src/invariant/zk_consistency.rs @@ -10,7 +10,8 @@ inventory::submit! { InvariantEntry { name: "zk_consistency", targets: || SynthesisTarget::Test | SynthesisTarget::Fuzz, - build: |tc, _inputs| Box::new(ZkConsistencyInvariant::new(tc)), + needs_guest: true, + build: |tc, _inputs| Box::new(ZkConsistencyInvariant::new(tc.unwrap())), } } diff --git a/jolt-eval/src/lib.rs b/jolt-eval/src/lib.rs index 27e982bcf..39688ca8a 100644 --- a/jolt-eval/src/lib.rs +++ b/jolt-eval/src/lib.rs @@ -1,6 +1,7 @@ #![allow(non_snake_case)] pub mod agent; +pub mod guests; pub mod invariant; pub mod objective; @@ -160,10 +161,14 @@ pub struct SharedSetup { impl SharedSetup { pub fn new(test_case: TestCase) -> Self { + Self::new_from_arc(Arc::new(test_case)) + } + + pub fn new_from_arc(test_case: Arc) -> Self { let prover_pp = test_case.prover_preprocessing(); let verifier_pp = TestCase::verifier_preprocessing(&prover_pp); Self { - test_case: Arc::new(test_case), + test_case, prover_preprocessing: Arc::new(prover_pp), verifier_preprocessing: Arc::new(verifier_pp), } diff --git a/jolt-eval/src/objective/guest_cycles.rs b/jolt-eval/src/objective/guest_cycles.rs index 5fcd4f269..c47694621 100644 --- a/jolt-eval/src/objective/guest_cycles.rs +++ b/jolt-eval/src/objective/guest_cycles.rs @@ -7,9 +7,10 @@ inventory::submit! { ObjectiveEntry { name: "guest_cycle_count", direction: Direction::Minimize, - build: |setup, inputs| Box::new(GuestCycleCountObjective::new( + needs_guest: true, + build: |s, inputs| { let setup = s.unwrap(); Box::new(GuestCycleCountObjective::new( setup.test_case.clone(), inputs, - )), + )) }, } } diff --git a/jolt-eval/src/objective/inline_lengths.rs b/jolt-eval/src/objective/inline_lengths.rs index 50ccd446c..6eee4e5f5 100644 --- a/jolt-eval/src/objective/inline_lengths.rs +++ b/jolt-eval/src/objective/inline_lengths.rs @@ -7,7 +7,8 @@ inventory::submit! { ObjectiveEntry { name: "inline_lengths", direction: Direction::Maximize, - build: |setup, _inputs| Box::new(InlineLengthsObjective::new(setup.test_case.clone())), + needs_guest: true, + build: |s, _inputs| { let setup = s.unwrap(); Box::new(InlineLengthsObjective::new(setup.test_case.clone())) }, } } diff --git a/jolt-eval/src/objective/mod.rs b/jolt-eval/src/objective/mod.rs index 8f1c16f9a..259940887 100644 --- a/jolt-eval/src/objective/mod.rs +++ b/jolt-eval/src/objective/mod.rs @@ -68,7 +68,9 @@ pub trait AbstractObjective: Send + Sync { pub struct ObjectiveEntry { pub name: &'static str, pub direction: Direction, - pub build: fn(&SharedSetup, Vec) -> Box, + /// Whether this objective requires a compiled guest program. + pub needs_guest: bool, + pub build: fn(Option<&SharedSetup>, Vec) -> Box, } inventory::collect!(ObjectiveEntry); @@ -78,11 +80,14 @@ pub fn registered_objectives() -> impl Iterator } /// Build all registered objectives from a [`SharedSetup`]. +/// +/// Pass `None` to include only objectives that don't require a guest. pub fn build_objectives_from_inventory( - setup: &SharedSetup, + setup: Option<&SharedSetup>, inputs: Vec, ) -> Vec> { inventory::iter::() + .filter(|entry| !entry.needs_guest || setup.is_some()) .map(|entry| (entry.build)(setup, inputs.clone())) .collect() } diff --git a/jolt-eval/src/objective/peak_rss.rs b/jolt-eval/src/objective/peak_rss.rs index 84af1da60..d8d8577aa 100644 --- a/jolt-eval/src/objective/peak_rss.rs +++ b/jolt-eval/src/objective/peak_rss.rs @@ -9,9 +9,10 @@ inventory::submit! { ObjectiveEntry { name: "peak_rss", direction: Direction::Minimize, - build: |setup, inputs| Box::new(PeakRssObjective::new( + needs_guest: true, + build: |s, inputs| { let setup = s.unwrap(); Box::new(PeakRssObjective::new( setup.test_case.clone(), setup.prover_preprocessing.clone(), inputs, - )), + )) }, } } diff --git a/jolt-eval/src/objective/proof_size.rs b/jolt-eval/src/objective/proof_size.rs index b8214a643..321d950c5 100644 --- a/jolt-eval/src/objective/proof_size.rs +++ b/jolt-eval/src/objective/proof_size.rs @@ -7,9 +7,10 @@ inventory::submit! { ObjectiveEntry { name: "proof_size", direction: Direction::Minimize, - build: |setup, inputs| Box::new(ProofSizeObjective::new( + needs_guest: true, + build: |s, inputs| { let setup = s.unwrap(); Box::new(ProofSizeObjective::new( setup.test_case.clone(), setup.prover_preprocessing.clone(), inputs, - )), + )) }, } } diff --git a/jolt-eval/src/objective/prover_time.rs b/jolt-eval/src/objective/prover_time.rs index c29c61e4d..b083a3696 100644 --- a/jolt-eval/src/objective/prover_time.rs +++ b/jolt-eval/src/objective/prover_time.rs @@ -8,9 +8,10 @@ inventory::submit! { ObjectiveEntry { name: "prover_time", direction: Direction::Minimize, - build: |setup, inputs| Box::new(ProverTimeObjective::new( + needs_guest: true, + build: |s, inputs| { let setup = s.unwrap(); Box::new(ProverTimeObjective::new( setup.test_case.clone(), setup.prover_preprocessing.clone(), inputs, - )), + )) }, } } diff --git a/jolt-eval/src/objective/verifier_time.rs b/jolt-eval/src/objective/verifier_time.rs index 34e76c443..4936799c6 100644 --- a/jolt-eval/src/objective/verifier_time.rs +++ b/jolt-eval/src/objective/verifier_time.rs @@ -8,10 +8,11 @@ inventory::submit! { ObjectiveEntry { name: "verifier_time", direction: Direction::Minimize, - build: |setup, inputs| Box::new(VerifierTimeObjective::new( + needs_guest: true, + build: |s, inputs| { let setup = s.unwrap(); Box::new(VerifierTimeObjective::new( setup.test_case.clone(), setup.prover_preprocessing.clone(), setup.verifier_preprocessing.clone(), inputs, - )), + )) }, } } diff --git a/jolt-eval/src/objective/wrapping_cost.rs b/jolt-eval/src/objective/wrapping_cost.rs index 3a5eef453..972201a90 100644 --- a/jolt-eval/src/objective/wrapping_cost.rs +++ b/jolt-eval/src/objective/wrapping_cost.rs @@ -7,9 +7,10 @@ inventory::submit! { ObjectiveEntry { name: "wrapping_cost", direction: Direction::Minimize, - build: |setup, _inputs| Box::new(WrappingCostObjective::new( + needs_guest: true, + build: |s, _inputs| { let setup = s.unwrap(); Box::new(WrappingCostObjective::new( setup.test_case.clone(), setup.prover_preprocessing.clone(), - )), + )) }, } } diff --git a/jolt-eval/tests/agent_test.rs b/jolt-eval/tests/agent_test.rs index 6a4595a87..714733454 100644 --- a/jolt-eval/tests/agent_test.rs +++ b/jolt-eval/tests/agent_test.rs @@ -330,7 +330,9 @@ fn redteam_handles_no_json_in_response() { match result { RedTeamResult::NoViolation { attempts } => { assert_eq!(attempts.len(), 1); - assert!(attempts[0].failure_reason.contains("did not contain a JSON")); + assert!(attempts[0] + .failure_reason + .contains("did not contain a JSON")); } _ => panic!("Expected NoViolation"), } @@ -538,9 +540,7 @@ fn custom_harness_plugs_into_auto_redteam() { }; let invariant = AlwaysPassInvariant; - let config = RedTeamConfig { - num_iterations: 2, - }; + let config = RedTeamConfig { num_iterations: 2 }; let result = auto_redteam(&invariant, &config, &harness, Path::new("/tmp")); @@ -655,8 +655,8 @@ fn optimize_accepts_improvement() { diff: Some("fake diff".into()), })]); - let mut env = MockOptimizeEnv::new(d(&[("time", Direction::Minimize)])) - .with_measurements(vec![ + let mut env = + MockOptimizeEnv::new(d(&[("time", Direction::Minimize)])).with_measurements(vec![ m(&[("time", 10.0)]), // baseline m(&[("time", 8.0)]), // improved ]); @@ -683,8 +683,8 @@ fn optimize_rejects_regression() { diff: Some("bad diff".into()), })]); - let mut env = MockOptimizeEnv::new(d(&[("time", Direction::Minimize)])) - .with_measurements(vec![ + let mut env = + MockOptimizeEnv::new(d(&[("time", Direction::Minimize)])).with_measurements(vec![ m(&[("time", 10.0)]), // baseline m(&[("time", 12.0)]), // regression ]); @@ -739,8 +739,8 @@ fn optimize_maximize_direction() { diff: Some("diff".into()), })]); - let mut env = MockOptimizeEnv::new(d(&[("inlines", Direction::Maximize)])) - .with_measurements(vec![ + let mut env = + MockOptimizeEnv::new(d(&[("inlines", Direction::Maximize)])).with_measurements(vec![ m(&[("inlines", 100.0)]), // baseline m(&[("inlines", 150.0)]), // improvement (higher is better) ]); @@ -763,8 +763,8 @@ fn optimize_maximize_rejects_decrease() { diff: Some("diff".into()), })]); - let mut env = MockOptimizeEnv::new(d(&[("inlines", Direction::Maximize)])) - .with_measurements(vec![ + let mut env = + MockOptimizeEnv::new(d(&[("inlines", Direction::Maximize)])).with_measurements(vec![ m(&[("inlines", 100.0)]), m(&[("inlines", 80.0)]), // regression for Maximize ]); @@ -798,8 +798,8 @@ fn optimize_multi_iteration_progressive_improvement() { }), ]); - let mut env = MockOptimizeEnv::new(d(&[("time", Direction::Minimize)])) - .with_measurements(vec![ + let mut env = + MockOptimizeEnv::new(d(&[("time", Direction::Minimize)])).with_measurements(vec![ m(&[("time", 10.0)]), // baseline m(&[("time", 8.0)]), // iter 1: improvement m(&[("time", 9.0)]), // iter 2: regression from 8.0 @@ -833,10 +833,7 @@ fn optimize_stops_when_agent_produces_no_diff() { ]); let mut env = MockOptimizeEnv::new(d(&[("time", Direction::Minimize)])) - .with_measurements(vec![ - m(&[("time", 10.0)]), - m(&[("time", 9.0)]), - ]); + .with_measurements(vec![m(&[("time", 10.0)]), m(&[("time", 9.0)])]); let config = OptimizeConfig { num_iterations: 5, @@ -860,10 +857,7 @@ fn optimize_stops_when_agent_errors() { ]); let mut env = MockOptimizeEnv::new(d(&[("time", Direction::Minimize)])) - .with_measurements(vec![ - m(&[("time", 10.0)]), - m(&[("time", 10.0)]), - ]); + .with_measurements(vec![m(&[("time", 10.0)]), m(&[("time", 10.0)])]); let config = OptimizeConfig { num_iterations: 5, @@ -932,10 +926,7 @@ fn optimize_prompt_includes_measurements_and_hint() { })]); let mut env = MockOptimizeEnv::new(d(&[("time", Direction::Minimize)])) - .with_measurements(vec![ - m(&[("time", 42.0)]), - m(&[("time", 42.0)]), - ]); + .with_measurements(vec![m(&[("time", 42.0)]), m(&[("time", 42.0)])]); let config = OptimizeConfig { num_iterations: 1, @@ -964,8 +955,8 @@ fn optimize_prompt_includes_past_attempts() { }), ]); - let mut env = MockOptimizeEnv::new(d(&[("time", Direction::Minimize)])) - .with_measurements(vec![ + let mut env = + MockOptimizeEnv::new(d(&[("time", Direction::Minimize)])).with_measurements(vec![ m(&[("time", 10.0)]), m(&[("time", 10.0)]), // no improvement m(&[("time", 10.0)]), @@ -995,10 +986,7 @@ fn optimize_diff_is_applied() { })]); let mut env = MockOptimizeEnv::new(d(&[("time", Direction::Minimize)])) - .with_measurements(vec![ - m(&[("time", 10.0)]), - m(&[("time", 10.0)]), - ]); + .with_measurements(vec![m(&[("time", 10.0)]), m(&[("time", 10.0)])]); let config = OptimizeConfig { num_iterations: 1, @@ -1015,9 +1003,18 @@ fn optimize_diff_is_applied() { fn optimize_invariant_failure_mid_sequence() { // 3 iterations: improve, invariant fail, improve let agent = MockAgent::from_responses(vec![ - Ok(AgentResponse { text: "i1".into(), diff: Some("d1".into()) }), - Ok(AgentResponse { text: "i2".into(), diff: Some("d2".into()) }), - Ok(AgentResponse { text: "i3".into(), diff: Some("d3".into()) }), + Ok(AgentResponse { + text: "i1".into(), + diff: Some("d1".into()), + }), + Ok(AgentResponse { + text: "i2".into(), + diff: Some("d2".into()), + }), + Ok(AgentResponse { + text: "i3".into(), + diff: Some("d3".into()), + }), ]); let mut env = MockOptimizeEnv::new(d(&[("time", Direction::Minimize)])) diff --git a/jolt-eval/tests/macro_test.rs b/jolt-eval/tests/macro_test.rs index 85d23bb66..a91217e56 100644 --- a/jolt-eval/tests/macro_test.rs +++ b/jolt-eval/tests/macro_test.rs @@ -78,8 +78,14 @@ impl Invariant for BoundsCheckInvariant { fn seed_corpus(&self) -> Vec { vec![ RangeInput { lo: 0, hi: 0 }, - RangeInput { lo: 0, hi: u32::MAX }, - RangeInput { lo: u32::MAX, hi: 0 }, + RangeInput { + lo: 0, + hi: u32::MAX, + }, + RangeInput { + lo: u32::MAX, + hi: 0, + }, RangeInput { lo: 100, hi: 50 }, ] } From 3aa13973940d05336026f557895919f4fe9a8144 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Thu, 2 Apr 2026 13:23:03 -0400 Subject: [PATCH 18/86] refactor(jolt-eval): replace proof-level invariants with split-eq polynomial fuzz targets Remove the 6 generic proof-level invariants (soundness, completeness, determinism, serialization_roundtrip, zk_consistency) that required a guest ELF and were slow to exercise. Replace with 2 focused, guest-free invariants that fuzz GruenSplitEqPolynomial::bind against DensePolynomial reference implementations for both LowToHigh and HighToLow binding orders. Also makes JOLT_FUZZ_ELF optional so guest-free invariants work without it, uses DEFAULT_MAX_*_ADVICE_SIZE constants in guest configs, and adds the invariant/objective design spec. Co-Authored-By: Claude Opus 4.6 (1M context) --- Cargo.lock | 39 + Cargo.toml | 1 + invariant_spec.md | 250 ++ jolt-eval/Cargo.toml | 1 + jolt-eval/fuzz/Cargo.lock | 2387 +++++++++++++++++ jolt-eval/fuzz/Cargo.toml | 45 +- jolt-eval/fuzz/fuzz_targets/determinism.rs | 6 - .../fuzz/fuzz_targets/prover_completeness.rs | 6 - .../fuzz_targets/serialization_roundtrip.rs | 6 - jolt-eval/fuzz/fuzz_targets/soundness.rs | 6 - .../fuzz_targets/split_eq_bind_high_low.rs | 5 + .../fuzz_targets/split_eq_bind_low_high.rs | 5 + .../fuzz_targets/verifier_completeness.rs | 6 - jolt-eval/fuzz/fuzz_targets/zk_consistency.rs | 6 - jolt-eval/src/guests.rs | 9 +- .../src/invariant/completeness_prover.rs | 109 - .../src/invariant/completeness_verifier.rs | 97 - jolt-eval/src/invariant/determinism.rs | 117 - jolt-eval/src/invariant/mod.rs | 7 +- .../src/invariant/serialization_roundtrip.rs | 101 - jolt-eval/src/invariant/soundness.rs | 135 - jolt-eval/src/invariant/split_eq_bind.rs | 199 ++ jolt-eval/src/invariant/synthesis/fuzz.rs | 53 +- jolt-eval/src/invariant/zk_consistency.rs | 109 - 24 files changed, 2928 insertions(+), 777 deletions(-) create mode 100644 invariant_spec.md create mode 100644 jolt-eval/fuzz/Cargo.lock delete mode 100644 jolt-eval/fuzz/fuzz_targets/determinism.rs delete mode 100644 jolt-eval/fuzz/fuzz_targets/prover_completeness.rs delete mode 100644 jolt-eval/fuzz/fuzz_targets/serialization_roundtrip.rs delete mode 100644 jolt-eval/fuzz/fuzz_targets/soundness.rs create mode 100644 jolt-eval/fuzz/fuzz_targets/split_eq_bind_high_low.rs create mode 100644 jolt-eval/fuzz/fuzz_targets/split_eq_bind_low_high.rs delete mode 100644 jolt-eval/fuzz/fuzz_targets/verifier_completeness.rs delete mode 100644 jolt-eval/fuzz/fuzz_targets/zk_consistency.rs delete mode 100644 jolt-eval/src/invariant/completeness_prover.rs delete mode 100644 jolt-eval/src/invariant/completeness_verifier.rs delete mode 100644 jolt-eval/src/invariant/determinism.rs delete mode 100644 jolt-eval/src/invariant/serialization_roundtrip.rs delete mode 100644 jolt-eval/src/invariant/soundness.rs create mode 100644 jolt-eval/src/invariant/split_eq_bind.rs delete mode 100644 jolt-eval/src/invariant/zk_consistency.rs diff --git a/Cargo.lock b/Cargo.lock index 6b2c12542..f6f408dd4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2951,12 +2951,16 @@ dependencies = [ "common", "enumset", "eyre", + "inventory", "jolt-core", "jolt-eval-macros", "postcard", "rand 0.8.5", + "rand_chacha 0.3.1", "rayon", + "schemars 0.8.22", "serde", + "serde_json", "sysinfo", "tempfile", "thiserror 2.0.18", @@ -4998,6 +5002,18 @@ dependencies = [ "sdd", ] +[[package]] +name = "schemars" +version = "0.8.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615" +dependencies = [ + "dyn-clone", + "schemars_derive", + "serde", + "serde_json", +] + [[package]] name = "schemars" version = "0.9.0" @@ -5022,6 +5038,18 @@ dependencies = [ "serde_json", ] +[[package]] +name = "schemars_derive" +version = "0.8.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32e265784ad618884abaea0600a9adf15393368d840e0222d101a072f3f7534d" +dependencies = [ + "proc-macro2", + "quote", + "serde_derive_internals", + "syn 2.0.117", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -5143,6 +5171,17 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "serde_derive_internals" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "serde_json" version = "1.0.149" diff --git a/Cargo.toml b/Cargo.toml index be20856ee..61cd677e5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -95,6 +95,7 @@ members = [ "jolt-eval", "jolt-eval/macros", ] +exclude = ["jolt-eval/fuzz"] [features] host = ["jolt-sdk/host"] diff --git a/invariant_spec.md b/invariant_spec.md new file mode 100644 index 000000000..f9707d03a --- /dev/null +++ b/invariant_spec.md @@ -0,0 +1,250 @@ +# Invariants and Objectives + +I want to introduce a Rust framework that gives some explicit structure to the "evaluation" part of the intent-execution-evaluation model described [here](https://gist.github.com/moodlezoup/e9f95839d9d848938eb54b662c6c5d25). The motivation is twofold: +1. Maximize agent productivity +2. Minimize the human verification surface + +"Evaluation" should be further broken down into **invariants** and **objectives**. + +**Invariants** are evaluations with a binary outcome, i.e. things that we want to always hold: +- All the tests pass +- No linter warnings/errors +- No unused dependencies + +**Objectives** are evaluations with a numerical outcome, i.e. things we may want to optimize for: +- Peak memory usage +- Runtime +- Code coverage +- Some subjective score of code quality, as judged by AI + +Note that by definition, invariants are a special case of objectives, but it's useful to think of them as separate categories. + +The key property for both invariants are objectives is that they must be **mechanically checkable**. This is important for both of our motivations: it increases agent productivity, by giving the agent a way to check its work without a human in the loop; and it allows the human to gain assurance about the larger codebase while only focusing on a smaller kernel of invariants/objectives. + +## Invariants + +Given a single invariant description (a small amount of Rust encoding the invariant), we should be able to mechanically synthesize it into: +- A test, +- A `libfuzzer_sys` fuzz target, +- And/or a "red team" harness for AI agents to try to find a violation of the invariant + - Assuming the invariant and harness are well-written, this should totally eliminate the possibility of false positives + - Should be flexible with respect to the agent setup (which model, how many agents, guiding prompt, etc.) + +In the long-term we should also be able to formally verify certain invariants. + +The invariant description should specify which of the above to generate. The "regular" tests generated from invariants should be run in CI. Fuzzing and AI-driven security reviews can be run at a less-frequent cadence or ad-hoc. + +Pseudocode for an Invariant trait: +```rust + trait Invariant: Send + Sync { + type Setup; + type Input: Arbitrary; + + fn name(&self) -> &str; + /// Used as context for an AI agent trying to violate this invariant + fn description(&self) -> String; + /// What to synthesize from this invariant + fn targets(&self) -> EnumSet // ⊆ {Test, Fuzz, RedTeam} + fn setup(&self) -> Self::Setup; + fn check(&self, setup: &Self::Setup, input: Self::Input) -> Result<(), InvariantViolation>; + /// Returns a seed corpus for tests/fuzzing (known-interesting inputs) + fn seed_corpus(&self) -> Vec { + vec![] + } +} + +``` + +Pseudocode for the AI "red team" harness: +```rust +fn auto_redteam(invariant: Invariant, prompt: String) { + for _ in NUM_ITERATIONS { + // Note: AI should run in an isolated worktree to produce the + // claimed bad input. The invariant is checked in the original + // working tree so the AI cannot cheat. + if let Some(bad_input) = todo!("Tell Claude to find violation of invariant") { + if let Err(e) = invariant.check(bad_input) { + todo!("Log counterexample and error"); + todo!("Tell Claude to summarize how it found the violation"); + break; + } + } else { + todo!("Clean up the worktree, cache description of failed attempt") + } + } +} + +struct InvariantCounterexample { + description: String, + input: I::Input, + error: InvariantViolation, +} + +struct FailedAttempt { + description: String, + approach: String, // What the agent tried + failure_reason: String, // Why it didn't produce a valid counterexample +} +``` + +## Objectives + +The top-level interface for working with objectives should look something like: +```rust +fn measure_objectives(objectives: Vec) -> HashMap; +``` +The function would iterate through the provided objectives, dispatch to their respective `collect_measurement` methods. + +Psuedocode for an Objective trait +```rust +trait AbstractObjective: Send + Sync { + fn name(&self) -> &str; + fn collect_measurement(&self) -> Result; + /// How many samples to take for statistical significance + fn recommended_samples(&self) -> usize { 1 } + /// What threshold is considered a regression, e.g., 5% slowdown + fn regression_threshold(&self) -> Option { None } + /// Is lower better or higher better? + fn direction(&self) -> Direction; // Minimize or Maximize +} +``` + +Objectives can be used as building blocks for expressive, AI-driven optimization tasks (cf. [autoresearch](https://github.com/karpathy/autoresearch)). +Pseudocode for a simple optimization harness: +```rust +fn auto_optimize) -> f64>(objectives: Vec, objective_function: F, prompt: String) { + let mut baseline = objective_function(measure_objectives(objectives)); + for _ in NUM_ITERATIONS { + todo!("Tell Claude Code to optimize for the given objective function"); + // Can also point Claude to specific functions/snippets to optimize + let new_score = objective_function(measure_objectives(objectives)); + let invariants_hold = check_invariants(); + if invariants_hold && new_score > baseline { + // Successful optimization + baseline = new_score; + todo!("Commit changes for async human review"); + } else { + todo!("Revert changes, cache description of the failed attempt"); + } + } +} + +pub enum Objective { + PeakRss(PeakRssObjective), + ProverTime(ProverTimeObjective), + ProofSize(ProofSizeObjective), + VerifierTime(VerifierTimeObjective), + GuestCycleCount(GuestCycleCountObjective), + // ... +} + +impl Objective { + pub fn collect_measurement(&self) -> Measurement { + match self { + Self::PeakRss(o) => o.collect_measurement(), + Self::ProverTime(o) => o.collect_measurement(), + // ... + } + } +} + +struct OptimizationAttempt { + description: String, // What the agent tried + diff: String, // The actual code change + measurements: HashMap, + invariants_passed: bool +} +``` + +Objectives are ideally reproducible, deterministic, and quick to obtain, though none of these are hard rules –– in particular, performance metrics like runtime inevitably have some variance and may be slow to obtain. +## Framing tasks in terms of invariants and objectives + +### Implementing a new feature +- Add new invariants to capture the behavior of the feature +- Modify existing invariants as necessary +- The spec for a new feature should clearly document new and modified invariants, as well as expected impact on objectives + - Impact on objectives can be mechanically validated +- Ensure that all invariants hold +### Bug fix +- Add a new invariant (or modify existing one) to fail without the fix +- Ensure that all other invariants still hold +- Document impact on objectives +### Security review +- Try to find a counterexample to some invariant +### Optimization +- For some function $f(o_1, o_2, \dots, o_n)$ that takes as input the objectives and outputs a single score, maximize the score +- Can apply techniques from multi-objective optimization literature +- Ensure that all invariants still hold +### Refactor +- Special case of optimization, where the objective function captures some notion of code quality + +## As applied to Jolt + +### Example invariants + +- **Soundness**: For a fixed program, input, and honest prover output/proof, the verifier does not accept for any other output/proof. +- **(Verifier) Completeness**: For a fixed program, input, and honest prover output/proof, the verifier accepts the honest output/proof. +- **(Prover) Completness**: For a fixed program, input, and valid size parameters for that program/input pair, the prover should produce a proof (or OOM/timeout). +- **Determinism**: Same program + input → same proof (byte-identical). +- **Serialization roundtrip**: `deserialize(serialize(proof)) == proof` +### Example objectives + +- Peak RSS (prover memory) +- Prover time +- Proof size +- Verifier time +- Guest cycle counts +- Virtual/inline sequence lengths +- Wrapping cost (Transpiled verifier constraint count) + +### Crate structure + +``` +jolt-eval/ + ├── Cargo.toml + ├── src/ + │ ├── lib.rs # Re-exports, top-level check/measure fns + │ │ + │ ├── invariant/ + │ │ ├── mod.rs # Invariant trait, InvariantViolation, SynthesisTarget, + │ │ │ # FailedAttempt, centralized Invariant enum + │ │ ├── soundness.rs # Soundness invariant (proof mutation) + │ │ ├── completeness_verifier.rs # Verifier completeness (honest proof accepted) + │ │ ├── completeness_prover.rs # Prover completeness (prover doesn't panic) + │ │ ├── determinism.rs # Same input → same proof + │ │ ├── serialization_roundtrip.rs # serialize(deserialize(x)) == x + │ │ ├── zk_consistency.rs # host and host,zk both produce valid proofs + │ │ └── synthesis/ + │ │ ├── mod.rs # Synthesis registry, shared types + │ │ ├── test.rs # #[test] generation from invariants + │ │ ├── fuzz.rs # libfuzzer_sys target generation + │ │ └── redteam.rs # auto_redteam loop, worktree orchestration, + │ │ # InvariantCounterexample, prompt construction + │ │ + │ └── objective/ + │ ├── mod.rs # AbstractObjective trait, Measurement, Unit, Direction, + │ │ # centralized Objective enum, measure_objectives() + │ ├── peak_rss.rs # Peak resident set size + │ ├── prover_time.rs # Wall-clock prover time + │ ├── proof_size.rs # Serialized proof byte length + │ ├── verifier_time.rs # Wall-clock verifier time + │ ├── guest_cycles.rs # Guest instruction cycle count + │ ├── inline_lengths.rs # Virtual/inline sequence lengths + │ ├── wrapping_cost.rs # Transpiled verifier constraint count + │ └── optimize.rs # auto_optimize loop, OptimizationAttempt, + │ # baseline tracking, commit/revert logic + │ + | + ├── macros/ + │ ├── Cargo.toml # jolt-eval-macros proc-macro crate + │ └── src/ + │ └── lib.rs # #[invariant(targets = [...])] attribute macro + │ + ├── bin/ + │ ├── check_invariants.rs # CLI: run all or selected invariants + │ ├── measure_objectives.rs # CLI: measure all or selected objectives, compare to baseline + │ └── redteam.rs # CLI: --invariant --iterations N --model + │ + └── tests/ + └── integration.rs # Smoke tests for the framework itself +``` diff --git a/jolt-eval/Cargo.toml b/jolt-eval/Cargo.toml index 4dd9779d7..6a10ed73a 100644 --- a/jolt-eval/Cargo.toml +++ b/jolt-eval/Cargo.toml @@ -19,6 +19,7 @@ tracing = { workspace = true } clap = { workspace = true, features = ["derive"] } rayon = { workspace = true } rand = { workspace = true } +rand_chacha = { workspace = true } sysinfo = { workspace = true } tracing-subscriber = { workspace = true } diff --git a/jolt-eval/fuzz/Cargo.lock b/jolt-eval/fuzz/Cargo.lock new file mode 100644 index 000000000..d9d37cde4 --- /dev/null +++ b/jolt-eval/fuzz/Cargo.lock @@ -0,0 +1,2387 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "addr2line" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59317f77929f0e679d39364702289274de2f0f0b22cbf50b2b8cff2169a0b27a" +dependencies = [ + "cpp_demangle", + "fallible-iterator", + "gimli", + "memmap2", + "object 0.39.0", + "rustc-demangle", + "smallvec", + "typed-arena", +] + +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "allocative" +version = "0.3.4" +source = "git+https://github.com/facebookexperimental/allocative?rev=85b773d85d526d068ce94724ff7a7b81203fc95e#85b773d85d526d068ce94724ff7a7b81203fc95e" +dependencies = [ + "allocative_derive", + "ctor", +] + +[[package]] +name = "allocative_derive" +version = "0.3.3" +source = "git+https://github.com/facebookexperimental/allocative?rev=85b773d85d526d068ce94724ff7a7b81203fc95e#85b773d85d526d068ce94724ff7a7b81203fc95e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anstream" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" + +[[package]] +name = "anstyle-parse" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.61.2", +] + +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + +[[package]] +name = "arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1" +dependencies = [ + "derive_arbitrary", +] + +[[package]] +name = "ark-bn254" +version = "0.5.0" +source = "git+https://github.com/a16z/arkworks-algebra?branch=dev%2Ftwist-shout#76bb3a4518928f1ff7f15875f940d614bb9845e6" +dependencies = [ + "ark-ec", + "ark-ff", + "ark-serialize", + "ark-std", +] + +[[package]] +name = "ark-ec" +version = "0.5.0" +source = "git+https://github.com/a16z/arkworks-algebra?branch=dev%2Ftwist-shout#76bb3a4518928f1ff7f15875f940d614bb9845e6" +dependencies = [ + "ahash", + "ark-ff", + "ark-poly", + "ark-serialize", + "ark-std", + "educe", + "fnv", + "hashbrown 0.15.5", + "itertools 0.13.0", + "num-bigint", + "num-integer", + "num-traits", + "rayon", + "zeroize", +] + +[[package]] +name = "ark-ff" +version = "0.5.0" +source = "git+https://github.com/a16z/arkworks-algebra?branch=dev%2Ftwist-shout#76bb3a4518928f1ff7f15875f940d614bb9845e6" +dependencies = [ + "allocative", + "ark-ff-asm", + "ark-ff-macros", + "ark-serialize", + "ark-std", + "arrayvec", + "digest", + "educe", + "itertools 0.13.0", + "num-bigint", + "num-traits", + "paste", + "rayon", + "zeroize", +] + +[[package]] +name = "ark-ff-asm" +version = "0.5.0" +source = "git+https://github.com/a16z/arkworks-algebra?branch=dev%2Ftwist-shout#76bb3a4518928f1ff7f15875f940d614bb9845e6" +dependencies = [ + "quote", + "syn 2.0.117", +] + +[[package]] +name = "ark-ff-macros" +version = "0.5.0" +source = "git+https://github.com/a16z/arkworks-algebra?branch=dev%2Ftwist-shout#76bb3a4518928f1ff7f15875f940d614bb9845e6" +dependencies = [ + "num-bigint", + "num-traits", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "ark-poly" +version = "0.5.0" +source = "git+https://github.com/a16z/arkworks-algebra?branch=dev%2Ftwist-shout#76bb3a4518928f1ff7f15875f940d614bb9845e6" +dependencies = [ + "ahash", + "ark-ff", + "ark-serialize", + "ark-std", + "educe", + "fnv", + "hashbrown 0.15.5", +] + +[[package]] +name = "ark-serialize" +version = "0.5.0" +source = "git+https://github.com/a16z/arkworks-algebra?branch=dev%2Ftwist-shout#76bb3a4518928f1ff7f15875f940d614bb9845e6" +dependencies = [ + "ark-serialize-derive", + "ark-std", + "arrayvec", + "digest", + "num-bigint", + "rayon", +] + +[[package]] +name = "ark-serialize-derive" +version = "0.5.0" +source = "git+https://github.com/a16z/arkworks-algebra?branch=dev%2Ftwist-shout#76bb3a4518928f1ff7f15875f940d614bb9845e6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "ark-std" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "246a225cc6131e9ee4f24619af0f19d67761fff15d7ccc22e42b80846e69449a" +dependencies = [ + "num-traits", + "rand", + "rayon", +] + +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + +[[package]] +name = "bincode" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36eaf5d7b090263e8150820482d5d93cd964a81e4019913c972f4edcc6edb740" +dependencies = [ + "bincode_derive", + "serde", + "unty", +] + +[[package]] +name = "bincode_derive" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf95709a440f45e986983918d0e8a1f30a9b1df04918fc828670606804ac3c09" +dependencies = [ + "virtue", +] + +[[package]] +name = "bitflags" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" + +[[package]] +name = "blake2" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" +dependencies = [ + "digest", +] + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "bumpalo" +version = "3.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" + +[[package]] +name = "cc" +version = "1.2.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1e928d4b69e3077709075a938a05ffbedfa53a84c8f766efbf8220bb1ff60e1" +dependencies = [ + "find-msvc-tools", + "jobserver", + "libc", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "chrono" +version = "0.4.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" +dependencies = [ + "iana-time-zone", + "js-sys", + "num-traits", + "wasm-bindgen", + "windows-link", +] + +[[package]] +name = "clap" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b193af5b67834b676abd72466a96c1024e6a6ad978a1f484bd90b85c94041351" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1110bd8a634a1ab8cb04345d8d878267d57c3cf1b38d91b71af6686408bbca6a" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "clap_lex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" + +[[package]] +name = "cobs" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa961b519f0b462e3a3b4a34b64d119eeaca1d59af726fe450bbba07a9fc0a1" +dependencies = [ + "thiserror", +] + +[[package]] +name = "colorchoice" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" + +[[package]] +name = "common" +version = "0.2.0" +dependencies = [ + "allocative", + "ark-serialize", + "serde", + "syn 2.0.117", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "cpp_demangle" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0667304c32ea56cb4cd6d2d7c0cfe9a2f8041229db8c033af7f8d69492429def" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "ctor" +version = "0.1.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d2301688392eb071b0bf1a37be05c469d3cc4dbbd95df672fe28ab021e6a096" +dependencies = [ + "quote", + "syn 1.0.109", +] + +[[package]] +name = "darling" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1247195ecd7e3c85f83c8d2a366e4210d588e802133e1e355180a9870b517ea4" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "darling_macro" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" +dependencies = [ + "darling_core", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "derive_arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "derive_more" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d751e9e49156b02b44f9c1815bcb94b984cdcc4396ecc32521c739452808b134" +dependencies = [ + "derive_more-impl", +] + +[[package]] +name = "derive_more-impl" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "799a97264921d8623a957f6c3b9011f3b5492f557bbb7a5a19b7fa6d06ba8dcb" +dependencies = [ + "proc-macro2", + "quote", + "rustc_version", + "syn 2.0.117", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", + "subtle", +] + +[[package]] +name = "dory-derive" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc9c63de9e3d87d5be179ce2ddde4f31d95c12c1f20ccdbc3a70b004813959ca" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "dory-pcs" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8c58baea9f0ed973489cd1981b0e6a8c91aafddb05e3903b1dd54175ddcb52d" +dependencies = [ + "ark-bn254", + "ark-ec", + "ark-ff", + "ark-serialize", + "ark-std", + "bincode 1.3.3", + "blake2", + "digest", + "dory-derive", + "rand_core", + "rayon", + "serde", + "thiserror", + "tracing", +] + +[[package]] +name = "dyn-clone" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" + +[[package]] +name = "educe" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d7bc049e1bd8cdeb31b68bbd586a9464ecf9f3944af3958a7a9d0f8b9799417" +dependencies = [ + "enum-ordinalize", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "embedded-io" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced" + +[[package]] +name = "embedded-io" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d" + +[[package]] +name = "enum-ordinalize" +version = "4.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a1091a7bb1f8f2c4b28f1fe2cef4980ca2d410a3d727d67ecc3178c9b0800f0" +dependencies = [ + "enum-ordinalize-derive", +] + +[[package]] +name = "enum-ordinalize-derive" +version = "4.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ca9601fb2d62598ee17836250842873a413586e5d7ed88b356e38ddbb0ec631" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "enumset" +version = "1.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25b07a8dfbbbfc0064c0a6bdf9edcf966de6b1c33ce344bdeca3b41615452634" +dependencies = [ + "enumset_derive", +] + +[[package]] +name = "enumset_derive" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f43e744e4ea338060faee68ed933e46e722fb7f3617e722a5772d7e856d8b3ce" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "eyre" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cd915d99f24784cdc19fd37ef22b97e3ff0ae756c7e492e9fbfe897d61e2aec" +dependencies = [ + "indenter", + "once_cell", +] + +[[package]] +name = "fallible-iterator" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + +[[package]] +name = "fixedbitset" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" + +[[package]] +name = "flate2" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "foldhash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "wasi", + "wasm-bindgen", +] + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "r-efi 5.3.0", + "wasip2", + "wasm-bindgen", +] + +[[package]] +name = "getrandom" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if", + "libc", + "r-efi 6.0.0", + "wasip2", + "wasip3", +] + +[[package]] +name = "gimli" +version = "0.33.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19e16c5073773ccf057c282be832a59ee53ef5ff98db3aeff7f8314f52ffc196" +dependencies = [ + "stable_deref_trait", +] + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "allocator-api2", + "foldhash 0.1.5", +] + +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +dependencies = [ + "foldhash 0.2.0", +] + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "iana-time-zone" +version = "0.1.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + +[[package]] +name = "indenter" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "964de6e86d545b246d84badc0fef527924ace5134f30641c203ef52ba83f58d5" + +[[package]] +name = "indexmap" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +dependencies = [ + "equivalent", + "hashbrown 0.16.1", + "serde", + "serde_core", +] + +[[package]] +name = "inventory" +version = "0.3.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4f0c30c76f2f4ccee3fe55a2435f691ca00c0e4bd87abe4f4a851b1d4dac39b" +dependencies = [ + "rustversion", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.4", + "libc", +] + +[[package]] +name = "jolt-core" +version = "0.1.0" +dependencies = [ + "allocative", + "ark-bn254", + "ark-ec", + "ark-ff", + "ark-serialize", + "ark-std", + "bincode 2.0.1", + "blake2", + "chrono", + "clap", + "common", + "derive_more", + "dory-pcs", + "eyre", + "fixedbitset", + "itertools 0.14.0", + "jolt-inlines-keccak256", + "jolt-inlines-sha2", + "jolt-optimizations", + "memory-stats", + "num", + "num-derive", + "num-traits", + "postcard", + "rand", + "rand_chacha", + "rand_core", + "rayon", + "serde", + "sha3", + "strum", + "strum_macros", + "thiserror", + "tracer", + "tracing", + "tracing-chrome", + "tracing-subscriber", +] + +[[package]] +name = "jolt-eval" +version = "0.1.0" +dependencies = [ + "arbitrary", + "ark-bn254", + "ark-serialize", + "clap", + "common", + "enumset", + "eyre", + "inventory", + "jolt-core", + "jolt-eval-macros", + "postcard", + "rand", + "rayon", + "schemars", + "serde", + "serde_json", + "sysinfo", + "tempfile", + "thiserror", + "tracer", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "jolt-eval-fuzz" +version = "0.0.0" +dependencies = [ + "jolt-eval", + "libfuzzer-sys", +] + +[[package]] +name = "jolt-eval-macros" +version = "0.1.0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "jolt-inlines-keccak256" +version = "0.1.0" +dependencies = [ + "jolt-inlines-sdk", +] + +[[package]] +name = "jolt-inlines-sdk" +version = "0.1.0" +dependencies = [ + "inventory", + "jolt-platform", + "num-bigint", + "tracer", +] + +[[package]] +name = "jolt-inlines-sha2" +version = "0.1.0" +dependencies = [ + "jolt-inlines-sdk", +] + +[[package]] +name = "jolt-optimizations" +version = "0.5.0" +source = "git+https://github.com/a16z/arkworks-algebra?branch=dev%2Ftwist-shout#76bb3a4518928f1ff7f15875f940d614bb9845e6" +dependencies = [ + "ark-bn254", + "ark-ec", + "ark-ff", + "ark-serialize", + "ark-std", + "arrayvec", + "num-bigint", + "num-integer", + "num-rational", + "num-traits", + "rayon", +] + +[[package]] +name = "jolt-platform" +version = "0.1.0" +dependencies = [ + "getrandom 0.2.17", + "getrandom 0.3.4", + "rand", +] + +[[package]] +name = "js-sys" +version = "0.3.94" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e04e2ef80ce82e13552136fabeef8a5ed1f985a96805761cbb9a2c34e7664d9" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "keccak" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb26cec98cce3a3d96cbb7bced3c4b16e3d13f27ec56dbd62cbc8f39cfb9d653" +dependencies = [ + "cpufeatures", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + +[[package]] +name = "libc" +version = "0.2.184" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48f5d2a454e16a5ea0f4ced81bd44e4cfc7bd3a507b61887c99fd3538b28e4af" + +[[package]] +name = "libfuzzer-sys" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f12a681b7dd8ce12bff52488013ba614b869148d54dd79836ab85aafdd53f08d" +dependencies = [ + "arbitrary", + "cc", +] + +[[package]] +name = "linux-raw-sys" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "matchers" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" +dependencies = [ + "regex-automata", +] + +[[package]] +name = "memchr" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" + +[[package]] +name = "memmap2" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3" +dependencies = [ + "libc", +] + +[[package]] +name = "memory-stats" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c73f5c649995a115e1a0220b35e4df0a1294500477f97a91d0660fb5abeb574a" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + +[[package]] +name = "ntapi" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3b335231dfd352ffb0f8017f3b6027a4917f7df785ea2143d8af2adc66980ae" +dependencies = [ + "winapi", +] + +[[package]] +name = "nu-ansi-term" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-derive" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "objc2-core-foundation" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a180dd8642fa45cdb7dd721cd4c11b1cadd4929ce112ebd8b9f5803cc79d536" +dependencies = [ + "bitflags", +] + +[[package]] +name = "objc2-io-kit" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33fafba39597d6dc1fb709123dfa8289d39406734be322956a69f0931c73bb15" +dependencies = [ + "libc", + "objc2-core-foundation", +] + +[[package]] +name = "object" +version = "0.38.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271638cd5fa9cca89c4c304675ca658efc4e64a66c716b7cfe1afb4b9611dbbc" +dependencies = [ + "crc32fast", + "hashbrown 0.16.1", + "indexmap", + "memchr", +] + +[[package]] +name = "object" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63944c133d03f44e75866bbd160b95af0ec3f6a13d936d69d31c81078cbc5baf" +dependencies = [ + "flate2", + "memchr", + "ruzstd", +] + +[[package]] +name = "once_cell" +version = "1.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + +[[package]] +name = "postcard" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6764c3b5dd454e283a30e6dfe78e9b31096d9e32036b5d1eaac7a6119ccb9a24" +dependencies = [ + "cobs", + "embedded-io 0.4.0", + "embedded-io 0.6.1", + "serde", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn 2.0.117", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.17", +] + +[[package]] +name = "rayon" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + +[[package]] +name = "rustc-demangle" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b50b8869d9fc858ce7266cce0194bd74df58b9d0e3f6df3a9fc8eb470d95c09d" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + +[[package]] +name = "rustix" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.61.2", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "ruzstd" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5ff0cc5e135c8870a775d3320910cd9b564ec036b4dc0b8741629020be63f01" +dependencies = [ + "twox-hash", +] + +[[package]] +name = "schemars" +version = "0.8.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615" +dependencies = [ + "dyn-clone", + "schemars_derive", + "serde", + "serde_json", +] + +[[package]] +name = "schemars_derive" +version = "0.8.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32e265784ad618884abaea0600a9adf15393368d840e0222d101a072f3f7534d" +dependencies = [ + "proc-macro2", + "quote", + "serde_derive_internals", + "syn 2.0.117", +] + +[[package]] +name = "semver" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "serde_derive_internals" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "sha3" +version = "0.10.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75872d278a8f37ef87fa0ddbda7802605cb18344497949862c0d4dcb291eba60" +dependencies = [ + "digest", + "keccak", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "simd-adler32" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "strum" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9628de9b8791db39ceda2b119bbe13134770b56c138ec1d3af810d045c04f9bd" + +[[package]] +name = "strum_macros" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab85eea0270ee17587ed4156089e10b9e6880ee688791d45a905f5b1ca36f664" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "sysinfo" +version = "0.38.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92ab6a2f8bfe508deb3c6406578252e491d299cbbf3bc0529ecc3313aee4a52f" +dependencies = [ + "libc", + "memchr", + "ntapi", + "objc2-core-foundation", + "objc2-io-kit", + "windows", +] + +[[package]] +name = "tempfile" +version = "3.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" +dependencies = [ + "fastrand", + "getrandom 0.4.2", + "once_cell", + "rustix", + "windows-sys 0.61.2", +] + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "tracer" +version = "0.2.0" +dependencies = [ + "addr2line", + "ark-serialize", + "clap", + "common", + "derive_more", + "fnv", + "inventory", + "itertools 0.14.0", + "jolt-platform", + "object 0.38.1", + "paste", + "postcard", + "serde", + "serde_json", + "strum", + "strum_macros", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "tracing" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "tracing-chrome" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf0a738ed5d6450a9fb96e86a23ad808de2b727fd1394585da5cdd6788ffe724" +dependencies = [ + "serde_json", + "tracing-core", + "tracing-subscriber", +] + +[[package]] +name = "tracing-core" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7f578e5945fb242538965c2d0b04418d38ec25c79d160cd279bf0731c8d319" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex-automata", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "twox-hash" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" + +[[package]] +name = "typed-arena" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a" + +[[package]] +name = "typenum" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + +[[package]] +name = "unty" +version = "0.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d49784317cd0d1ee7ec5c716dd598ec5b4483ea832a2dced265471cc0f690ae" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "virtue" +version = "0.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "051eb1abcf10076295e815102942cc58f9d5e3b4560e46e53c21e8ff6f3af7b1" + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasip2" +version = "1.0.2+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0551fc1bb415591e3372d0bc4780db7e587d84e2a7e79da121051c5c4b89d0b0" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fbdf9a35adf44786aecd5ff89b4563a90325f9da0923236f6104e603c7e86be" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dca9693ef2bab6d4e6707234500350d8dad079eb508dca05530c85dc3a529ff2" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn 2.0.117", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39129a682a6d2d841b6c429d0c51e5cb0ed1a03829d8b3d1e69a011e62cb3d3b" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "527fadee13e0c05939a6a05d5bd6eec6cd2e3dbd648b9f8e447c6518133d8580" +dependencies = [ + "windows-collections", + "windows-core", + "windows-future", + "windows-numerics", +] + +[[package]] +name = "windows-collections" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23b2d95af1a8a14a3c7367e1ed4fc9c20e0a26e79551b1454d72583c97cc6610" +dependencies = [ + "windows-core", +] + +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-future" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1d6f90251fe18a279739e78025bd6ddc52a7e22f921070ccdc67dde84c605cb" +dependencies = [ + "windows-core", + "windows-link", + "windows-threading", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-numerics" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e2e40844ac143cdb44aead537bbf727de9b044e107a0f1220392177d15b0f26" +dependencies = [ + "windows-core", + "windows-link", +] + +[[package]] +name = "windows-result" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows-threading" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3949bd5b99cafdf1c7ca86b43ca564028dfe27d66958f2470940f73d86d75b37" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn 2.0.117", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn 2.0.117", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] + +[[package]] +name = "zerocopy" +version = "0.8.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "zeroize" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" +dependencies = [ + "zeroize_derive", +] + +[[package]] +name = "zeroize_derive" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85a5b4158499876c763cb03bc4e49185d3cccbabb15b33c627f7884f43db852e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/jolt-eval/fuzz/Cargo.toml b/jolt-eval/fuzz/Cargo.toml index a0ba8125b..af96ee259 100644 --- a/jolt-eval/fuzz/Cargo.toml +++ b/jolt-eval/fuzz/Cargo.toml @@ -4,6 +4,15 @@ version = "0.0.0" publish = false edition = "2021" +[workspace] + +[patch.crates-io] +ark-bn254 = { git = "https://github.com/a16z/arkworks-algebra", branch = "dev/twist-shout" } +ark-ff = { git = "https://github.com/a16z/arkworks-algebra", branch = "dev/twist-shout" } +ark-ec = { git = "https://github.com/a16z/arkworks-algebra", branch = "dev/twist-shout" } +ark-serialize = { git = "https://github.com/a16z/arkworks-algebra", branch = "dev/twist-shout" } +allocative = { git = "https://github.com/facebookexperimental/allocative", rev = "85b773d85d526d068ce94724ff7a7b81203fc95e" } + [package.metadata] cargo-fuzz = true @@ -12,43 +21,15 @@ libfuzzer-sys = "0.4" jolt-eval = { path = ".." } [[bin]] -name = "soundness" -path = "fuzz_targets/soundness.rs" -test = false -doc = false -bench = false - -[[bin]] -name = "verifier_completeness" -path = "fuzz_targets/verifier_completeness.rs" -test = false -doc = false -bench = false - -[[bin]] -name = "prover_completeness" -path = "fuzz_targets/prover_completeness.rs" -test = false -doc = false -bench = false - -[[bin]] -name = "determinism" -path = "fuzz_targets/determinism.rs" -test = false -doc = false -bench = false - -[[bin]] -name = "serialization_roundtrip" -path = "fuzz_targets/serialization_roundtrip.rs" +name = "split_eq_bind_low_high" +path = "fuzz_targets/split_eq_bind_low_high.rs" test = false doc = false bench = false [[bin]] -name = "zk_consistency" -path = "fuzz_targets/zk_consistency.rs" +name = "split_eq_bind_high_low" +path = "fuzz_targets/split_eq_bind_high_low.rs" test = false doc = false bench = false diff --git a/jolt-eval/fuzz/fuzz_targets/determinism.rs b/jolt-eval/fuzz/fuzz_targets/determinism.rs deleted file mode 100644 index 52f5d7e74..000000000 --- a/jolt-eval/fuzz/fuzz_targets/determinism.rs +++ /dev/null @@ -1,6 +0,0 @@ -#![no_main] -use libfuzzer_sys::fuzz_target; - -fuzz_target!(|data: &[u8]| { - jolt_eval::invariant::synthesis::fuzz::fuzz_invariant("determinism", data); -}); diff --git a/jolt-eval/fuzz/fuzz_targets/prover_completeness.rs b/jolt-eval/fuzz/fuzz_targets/prover_completeness.rs deleted file mode 100644 index 11022c0f6..000000000 --- a/jolt-eval/fuzz/fuzz_targets/prover_completeness.rs +++ /dev/null @@ -1,6 +0,0 @@ -#![no_main] -use libfuzzer_sys::fuzz_target; - -fuzz_target!(|data: &[u8]| { - jolt_eval::invariant::synthesis::fuzz::fuzz_invariant("prover_completeness", data); -}); diff --git a/jolt-eval/fuzz/fuzz_targets/serialization_roundtrip.rs b/jolt-eval/fuzz/fuzz_targets/serialization_roundtrip.rs deleted file mode 100644 index b7c0c3a1d..000000000 --- a/jolt-eval/fuzz/fuzz_targets/serialization_roundtrip.rs +++ /dev/null @@ -1,6 +0,0 @@ -#![no_main] -use libfuzzer_sys::fuzz_target; - -fuzz_target!(|data: &[u8]| { - jolt_eval::invariant::synthesis::fuzz::fuzz_invariant("serialization_roundtrip", data); -}); diff --git a/jolt-eval/fuzz/fuzz_targets/soundness.rs b/jolt-eval/fuzz/fuzz_targets/soundness.rs deleted file mode 100644 index 72c1a45af..000000000 --- a/jolt-eval/fuzz/fuzz_targets/soundness.rs +++ /dev/null @@ -1,6 +0,0 @@ -#![no_main] -use libfuzzer_sys::fuzz_target; - -fuzz_target!(|data: &[u8]| { - jolt_eval::invariant::synthesis::fuzz::fuzz_invariant("soundness", data); -}); diff --git a/jolt-eval/fuzz/fuzz_targets/split_eq_bind_high_low.rs b/jolt-eval/fuzz/fuzz_targets/split_eq_bind_high_low.rs new file mode 100644 index 000000000..0dad467d6 --- /dev/null +++ b/jolt-eval/fuzz/fuzz_targets/split_eq_bind_high_low.rs @@ -0,0 +1,5 @@ +#![no_main] +use libfuzzer_sys::fuzz_target; +fuzz_target!(|data: &[u8]| { + jolt_eval::invariant::synthesis::fuzz::fuzz_invariant("split_eq_bind_high_low", data); +}); diff --git a/jolt-eval/fuzz/fuzz_targets/split_eq_bind_low_high.rs b/jolt-eval/fuzz/fuzz_targets/split_eq_bind_low_high.rs new file mode 100644 index 000000000..e55116849 --- /dev/null +++ b/jolt-eval/fuzz/fuzz_targets/split_eq_bind_low_high.rs @@ -0,0 +1,5 @@ +#![no_main] +use libfuzzer_sys::fuzz_target; +fuzz_target!(|data: &[u8]| { + jolt_eval::invariant::synthesis::fuzz::fuzz_invariant("split_eq_bind_low_high", data); +}); diff --git a/jolt-eval/fuzz/fuzz_targets/verifier_completeness.rs b/jolt-eval/fuzz/fuzz_targets/verifier_completeness.rs deleted file mode 100644 index 589fa34c6..000000000 --- a/jolt-eval/fuzz/fuzz_targets/verifier_completeness.rs +++ /dev/null @@ -1,6 +0,0 @@ -#![no_main] -use libfuzzer_sys::fuzz_target; - -fuzz_target!(|data: &[u8]| { - jolt_eval::invariant::synthesis::fuzz::fuzz_invariant("verifier_completeness", data); -}); diff --git a/jolt-eval/fuzz/fuzz_targets/zk_consistency.rs b/jolt-eval/fuzz/fuzz_targets/zk_consistency.rs deleted file mode 100644 index 817641064..000000000 --- a/jolt-eval/fuzz/fuzz_targets/zk_consistency.rs +++ /dev/null @@ -1,6 +0,0 @@ -#![no_main] -use libfuzzer_sys::fuzz_target; - -fuzz_target!(|data: &[u8]| { - jolt_eval::invariant::synthesis::fuzz::fuzz_invariant("zk_consistency", data); -}); diff --git a/jolt-eval/src/guests.rs b/jolt-eval/src/guests.rs index 3f496ce14..5f5ab86ea 100644 --- a/jolt-eval/src/guests.rs +++ b/jolt-eval/src/guests.rs @@ -1,5 +1,6 @@ use std::sync::Arc; +use common::constants::{DEFAULT_MAX_TRUSTED_ADVICE_SIZE, DEFAULT_MAX_UNTRUSTED_ADVICE_SIZE}; use common::jolt_device::MemoryConfig; use crate::TestCase; @@ -24,8 +25,8 @@ impl GuestSpec { MemoryConfig { max_input_size: self.max_input_size, max_output_size: self.max_output_size, - max_untrusted_advice_size: 0, - max_trusted_advice_size: 0, + max_untrusted_advice_size: DEFAULT_MAX_UNTRUSTED_ADVICE_SIZE, + max_trusted_advice_size: DEFAULT_MAX_TRUSTED_ADVICE_SIZE, stack_size: self.stack_size, heap_size: self.heap_size, program_size: None, @@ -163,8 +164,8 @@ pub fn resolve_test_case( memory_config: MemoryConfig { max_input_size: 4096, max_output_size: 4096, - max_untrusted_advice_size: 0, - max_trusted_advice_size: 0, + max_untrusted_advice_size: DEFAULT_MAX_UNTRUSTED_ADVICE_SIZE, + max_trusted_advice_size: DEFAULT_MAX_TRUSTED_ADVICE_SIZE, stack_size: 65536, heap_size: 32768, program_size: None, diff --git a/jolt-eval/src/invariant/completeness_prover.rs b/jolt-eval/src/invariant/completeness_prover.rs deleted file mode 100644 index 39acc4f82..000000000 --- a/jolt-eval/src/invariant/completeness_prover.rs +++ /dev/null @@ -1,109 +0,0 @@ -use std::sync::Arc; - -use arbitrary::Arbitrary; -use enumset::EnumSet; - -use super::{Invariant, InvariantEntry, InvariantViolation, SynthesisTarget}; -use crate::{ProverPreprocessing, TestCase}; - -inventory::submit! { - InvariantEntry { - name: "prover_completeness", - targets: || SynthesisTarget::Test | SynthesisTarget::Fuzz, - needs_guest: true, - build: |tc, _inputs| Box::new(ProverCompletenessInvariant::new(tc.unwrap())), - } -} - -/// Prover completeness: for a fixed program, input, and valid size parameters, -/// the prover should produce a proof without panicking. -pub struct ProverCompletenessInvariant { - pub test_case: Arc, -} - -pub struct ProverCompletenessSetup { - test_case: Arc, - prover_preprocessing: ProverPreprocessing, -} - -/// Program inputs for prover completeness testing. -#[derive(Debug, Clone, Arbitrary, serde::Serialize, serde::Deserialize, schemars::JsonSchema)] -pub struct ProverInputs { - pub data: Vec, -} - -impl ProverCompletenessInvariant { - pub fn new(test_case: Arc) -> Self { - Self { test_case } - } -} - -impl Invariant for ProverCompletenessInvariant { - type Setup = ProverCompletenessSetup; - type Input = ProverInputs; - - fn name(&self) -> &str { - "prover_completeness" - } - - fn description(&self) -> String { - "For a fixed program, input, and valid size parameters, \ - the prover should produce a proof (not panic)." - .to_string() - } - - fn targets(&self) -> EnumSet { - SynthesisTarget::Test | SynthesisTarget::Fuzz - } - - fn setup(&self) -> Self::Setup { - let prover_pp = self.test_case.prover_preprocessing(); - ProverCompletenessSetup { - test_case: Arc::clone(&self.test_case), - prover_preprocessing: prover_pp, - } - } - - fn check(&self, setup: &Self::Setup, input: ProverInputs) -> Result<(), InvariantViolation> { - let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { - setup - .test_case - .prove(&setup.prover_preprocessing, &input.data) - })); - - match result { - Ok((_proof, io_device)) => { - // Guest panics are acceptable (the guest may reject bad input). - // Prover panics are not -- those are caught by catch_unwind above. - if io_device.panic { - // Guest panicked, but prover completed successfully - Ok(()) - } else { - Ok(()) - } - } - Err(panic_info) => { - let msg = if let Some(s) = panic_info.downcast_ref::() { - s.clone() - } else if let Some(s) = panic_info.downcast_ref::<&str>() { - s.to_string() - } else { - "unknown panic".to_string() - }; - Err(InvariantViolation::with_details( - "Prover panicked", - format!("inputs: {} bytes, panic: {msg}", input.data.len()), - )) - } - } - } - - fn seed_corpus(&self) -> Vec { - vec![ - ProverInputs { data: vec![] }, - ProverInputs { - data: vec![0u8; 64], - }, - ] - } -} diff --git a/jolt-eval/src/invariant/completeness_verifier.rs b/jolt-eval/src/invariant/completeness_verifier.rs deleted file mode 100644 index 89df6f626..000000000 --- a/jolt-eval/src/invariant/completeness_verifier.rs +++ /dev/null @@ -1,97 +0,0 @@ -use std::sync::Arc; - -use arbitrary::Arbitrary; -use enumset::EnumSet; - -use super::{Invariant, InvariantEntry, InvariantViolation, SynthesisTarget}; -use crate::{ProverPreprocessing, TestCase, VerifierPreprocessing}; - -inventory::submit! { - InvariantEntry { - name: "verifier_completeness", - targets: || SynthesisTarget::Test | SynthesisTarget::Fuzz, - needs_guest: true, - build: |tc, _inputs| Box::new(VerifierCompletenessInvariant::new(tc.unwrap())), - } -} - -/// Verifier completeness: for a fixed program and honest prover output/proof, -/// the verifier accepts the honest output/proof. -pub struct VerifierCompletenessInvariant { - pub test_case: Arc, -} - -/// Pre-computed preprocessing shared across checks. -pub struct VerifierCompletenessSetup { - test_case: Arc, - prover_preprocessing: ProverPreprocessing, - verifier_preprocessing: VerifierPreprocessing, -} - -/// Program inputs for completeness testing. -#[derive(Debug, Clone, Arbitrary, serde::Serialize, serde::Deserialize, schemars::JsonSchema)] -pub struct ProgramInputs { - pub data: Vec, -} - -impl VerifierCompletenessInvariant { - pub fn new(test_case: Arc) -> Self { - Self { test_case } - } -} - -impl Invariant for VerifierCompletenessInvariant { - type Setup = VerifierCompletenessSetup; - type Input = ProgramInputs; - - fn name(&self) -> &str { - "verifier_completeness" - } - - fn description(&self) -> String { - "For a fixed program, input, and honest prover output/proof, \ - the verifier accepts the honest output/proof." - .to_string() - } - - fn targets(&self) -> EnumSet { - SynthesisTarget::Test | SynthesisTarget::Fuzz - } - - fn setup(&self) -> Self::Setup { - let prover_pp = self.test_case.prover_preprocessing(); - let verifier_pp = TestCase::verifier_preprocessing(&prover_pp); - VerifierCompletenessSetup { - test_case: Arc::clone(&self.test_case), - prover_preprocessing: prover_pp, - verifier_preprocessing: verifier_pp, - } - } - - fn check(&self, setup: &Self::Setup, input: ProgramInputs) -> Result<(), InvariantViolation> { - let (proof, io_device) = setup - .test_case - .prove(&setup.prover_preprocessing, &input.data); - - // If the guest panicked, skip -- we only care about non-panicking executions - if io_device.panic { - return Ok(()); - } - - TestCase::verify(&setup.verifier_preprocessing, proof, &io_device).map_err(|e| { - InvariantViolation::with_details( - "Verifier rejected honest proof", - format!("inputs: {} bytes, error: {e}", input.data.len()), - ) - }) - } - - fn seed_corpus(&self) -> Vec { - vec![ - ProgramInputs { data: vec![] }, - ProgramInputs { - data: vec![0u8; 32], - }, - ] - } -} diff --git a/jolt-eval/src/invariant/determinism.rs b/jolt-eval/src/invariant/determinism.rs deleted file mode 100644 index 993ebb140..000000000 --- a/jolt-eval/src/invariant/determinism.rs +++ /dev/null @@ -1,117 +0,0 @@ -use std::sync::Arc; - -use arbitrary::Arbitrary; -use enumset::EnumSet; - -use super::{Invariant, InvariantEntry, InvariantViolation, SynthesisTarget}; -use crate::{serialize_proof, ProverPreprocessing, TestCase}; - -inventory::submit! { - InvariantEntry { - name: "determinism", - targets: || SynthesisTarget::Test | SynthesisTarget::Fuzz, - needs_guest: true, - build: |tc, _inputs| Box::new(DeterminismInvariant::new(tc.unwrap())), - } -} - -/// Determinism invariant: same program + input must produce byte-identical proofs. -pub struct DeterminismInvariant { - pub test_case: Arc, -} - -pub struct DeterminismSetup { - test_case: Arc, - prover_preprocessing: ProverPreprocessing, -} - -/// Program inputs for determinism testing. -#[derive(Debug, Clone, Arbitrary, serde::Serialize, serde::Deserialize, schemars::JsonSchema)] -pub struct DeterminismInputs { - pub data: Vec, -} - -impl DeterminismInvariant { - pub fn new(test_case: Arc) -> Self { - Self { test_case } - } -} - -impl Invariant for DeterminismInvariant { - type Setup = DeterminismSetup; - type Input = DeterminismInputs; - - fn name(&self) -> &str { - "determinism" - } - - fn description(&self) -> String { - "Same program + input must produce the same proof (byte-identical).".to_string() - } - - fn targets(&self) -> EnumSet { - SynthesisTarget::Test | SynthesisTarget::Fuzz - } - - fn setup(&self) -> Self::Setup { - let prover_pp = self.test_case.prover_preprocessing(); - DeterminismSetup { - test_case: Arc::clone(&self.test_case), - prover_preprocessing: prover_pp, - } - } - - fn check( - &self, - setup: &Self::Setup, - input: DeterminismInputs, - ) -> Result<(), InvariantViolation> { - let (proof1, io1) = setup - .test_case - .prove(&setup.prover_preprocessing, &input.data); - let (proof2, io2) = setup - .test_case - .prove(&setup.prover_preprocessing, &input.data); - - let bytes1 = serialize_proof(&proof1); - let bytes2 = serialize_proof(&proof2); - - if bytes1 != bytes2 { - // Find first differing byte - let first_diff = bytes1 - .iter() - .zip(bytes2.iter()) - .position(|(a, b)| a != b) - .unwrap_or(bytes1.len().min(bytes2.len())); - - return Err(InvariantViolation::with_details( - "Non-deterministic proof generation", - format!( - "proofs differ at byte {first_diff} (len1={}, len2={})", - bytes1.len(), - bytes2.len() - ), - )); - } - - // Also check that I/O is deterministic - if io1.outputs != io2.outputs { - return Err(InvariantViolation::new("Non-deterministic program outputs")); - } - - if io1.panic != io2.panic { - return Err(InvariantViolation::new("Non-deterministic panic behavior")); - } - - Ok(()) - } - - fn seed_corpus(&self) -> Vec { - vec![ - DeterminismInputs { data: vec![] }, - DeterminismInputs { - data: vec![1, 2, 3, 4], - }, - ] - } -} diff --git a/jolt-eval/src/invariant/mod.rs b/jolt-eval/src/invariant/mod.rs index 277ec76f8..477314aa0 100644 --- a/jolt-eval/src/invariant/mod.rs +++ b/jolt-eval/src/invariant/mod.rs @@ -1,10 +1,5 @@ -pub mod completeness_prover; -pub mod completeness_verifier; -pub mod determinism; -pub mod serialization_roundtrip; -pub mod soundness; +pub mod split_eq_bind; pub mod synthesis; -pub mod zk_consistency; use std::any::Any; use std::fmt; diff --git a/jolt-eval/src/invariant/serialization_roundtrip.rs b/jolt-eval/src/invariant/serialization_roundtrip.rs deleted file mode 100644 index 9210e06fa..000000000 --- a/jolt-eval/src/invariant/serialization_roundtrip.rs +++ /dev/null @@ -1,101 +0,0 @@ -use std::sync::Arc; - -use arbitrary::Arbitrary; -use enumset::EnumSet; - -use super::{Invariant, InvariantEntry, InvariantViolation, SynthesisTarget}; -use crate::{deserialize_proof, serialize_proof, TestCase}; - -inventory::submit! { - InvariantEntry { - name: "serialization_roundtrip", - targets: || { SynthesisTarget::Test.into() }, - needs_guest: true, - build: |tc, inputs| Box::new(SerializationRoundtripInvariant::new(tc.unwrap(), inputs)), - } -} - -/// Serialization roundtrip invariant: `deserialize(serialize(proof)) == proof`, -/// verified by checking that re-serialization produces identical bytes. -pub struct SerializationRoundtripInvariant { - pub test_case: Arc, - pub default_inputs: Vec, -} - -pub struct SerializationRoundtripSetup { - proof_bytes: Vec, -} - -/// Unit input -- the roundtrip check has no variable input beyond the -/// proof generated during setup. -#[derive(Debug, Clone, Arbitrary, serde::Serialize, serde::Deserialize, schemars::JsonSchema)] -pub struct RoundtripInput { - _dummy: u8, -} - -impl SerializationRoundtripInvariant { - pub fn new(test_case: Arc, default_inputs: Vec) -> Self { - Self { - test_case, - default_inputs, - } - } -} - -impl Invariant for SerializationRoundtripInvariant { - type Setup = SerializationRoundtripSetup; - type Input = RoundtripInput; - - fn name(&self) -> &str { - "serialization_roundtrip" - } - - fn description(&self) -> String { - "deserialize(serialize(proof)) == proof, verified via byte-identical \ - re-serialization." - .to_string() - } - - fn targets(&self) -> EnumSet { - SynthesisTarget::Test.into() - } - - fn setup(&self) -> Self::Setup { - let prover_pp = self.test_case.prover_preprocessing(); - let (proof, _io) = self.test_case.prove(&prover_pp, &self.default_inputs); - let proof_bytes = serialize_proof(&proof); - SerializationRoundtripSetup { proof_bytes } - } - - fn check(&self, setup: &Self::Setup, _input: RoundtripInput) -> Result<(), InvariantViolation> { - let deserialized = deserialize_proof(&setup.proof_bytes).map_err(|e| { - InvariantViolation::with_details("Deserialization failed", e.to_string()) - })?; - - let reserialized = serialize_proof(&deserialized); - - if setup.proof_bytes != reserialized { - let first_diff = setup - .proof_bytes - .iter() - .zip(reserialized.iter()) - .position(|(a, b)| a != b) - .unwrap_or(setup.proof_bytes.len().min(reserialized.len())); - - Err(InvariantViolation::with_details( - "Serialization roundtrip mismatch", - format!( - "bytes differ at offset {first_diff} (original={}, roundtripped={})", - setup.proof_bytes.len(), - reserialized.len() - ), - )) - } else { - Ok(()) - } - } - - fn seed_corpus(&self) -> Vec { - vec![RoundtripInput { _dummy: 0 }] - } -} diff --git a/jolt-eval/src/invariant/soundness.rs b/jolt-eval/src/invariant/soundness.rs deleted file mode 100644 index 80e7eb7d3..000000000 --- a/jolt-eval/src/invariant/soundness.rs +++ /dev/null @@ -1,135 +0,0 @@ -use std::sync::Arc; - -use arbitrary::Arbitrary; -use enumset::EnumSet; - -use super::{Invariant, InvariantEntry, InvariantViolation, SynthesisTarget}; -use crate::{serialize_proof, JoltDevice, Proof, TestCase, VerifierPreprocessing}; - -inventory::submit! { - InvariantEntry { - name: "soundness", - targets: || SynthesisTarget::Test | SynthesisTarget::Fuzz | SynthesisTarget::RedTeam, - needs_guest: true, - build: |tc, inputs| Box::new(SoundnessInvariant::new(tc.unwrap(), inputs)), - } -} - -/// Mutation applied to a serialized proof to test soundness. -#[derive(Debug, Clone, Arbitrary, serde::Serialize, serde::Deserialize, schemars::JsonSchema)] -pub struct ProofMutation { - pub byte_index: usize, - pub new_value: u8, -} - -/// Pre-computed honest proof and verification data. -pub struct SoundnessSetup { - proof_bytes: Vec, - io_device: JoltDevice, - verifier_preprocessing: VerifierPreprocessing, -} - -/// Soundness invariant: for a fixed program and honest prover output/proof, -/// the verifier must reject any mutated (different) proof. -pub struct SoundnessInvariant { - pub test_case: Arc, - pub default_inputs: Vec, -} - -impl SoundnessInvariant { - pub fn new(test_case: Arc, default_inputs: Vec) -> Self { - Self { - test_case, - default_inputs, - } - } -} - -impl Invariant for SoundnessInvariant { - type Setup = SoundnessSetup; - type Input = ProofMutation; - - fn name(&self) -> &str { - "soundness" - } - - fn description(&self) -> String { - "For a fixed program, input, and honest prover output/proof, \ - the verifier does not accept for any other output/proof." - .to_string() - } - - fn targets(&self) -> EnumSet { - SynthesisTarget::Test | SynthesisTarget::Fuzz | SynthesisTarget::RedTeam - } - - fn setup(&self) -> Self::Setup { - let prover_pp = self.test_case.prover_preprocessing(); - let verifier_pp = TestCase::verifier_preprocessing(&prover_pp); - let (proof, io_device) = self.test_case.prove(&prover_pp, &self.default_inputs); - let proof_bytes = serialize_proof(&proof); - SoundnessSetup { - proof_bytes, - io_device, - verifier_preprocessing: verifier_pp, - } - } - - fn check(&self, setup: &Self::Setup, input: ProofMutation) -> Result<(), InvariantViolation> { - if setup.proof_bytes.is_empty() { - return Ok(()); - } - - let idx = input.byte_index % setup.proof_bytes.len(); - - // Skip no-op mutations - if setup.proof_bytes[idx] == input.new_value { - return Ok(()); - } - - let mut mutated = setup.proof_bytes.clone(); - mutated[idx] = input.new_value; - - // If deserialization fails, the mutation was caught - let mutated_proof: Proof = match crate::deserialize_proof(&mutated) { - Ok(p) => p, - Err(_) => return Ok(()), - }; - - // Verification of a mutated proof must fail - match TestCase::verify( - &setup.verifier_preprocessing, - mutated_proof, - &setup.io_device, - ) { - Ok(()) => Err(InvariantViolation::with_details( - "Verifier accepted mutated proof", - format!( - "mutation at byte {idx}: 0x{:02x} -> 0x{:02x}", - setup.proof_bytes[idx], input.new_value - ), - )), - Err(_) => Ok(()), - } - } - - fn seed_corpus(&self) -> Vec { - vec![ - // Mutate first byte - ProofMutation { - byte_index: 0, - new_value: 0xFF, - }, - // Mutate a byte in the middle - ProofMutation { - byte_index: 1000, - new_value: 0x00, - }, - // Flip a single bit - ProofMutation { - byte_index: 42, - new_value: 0x01, - }, - ] - } -} diff --git a/jolt-eval/src/invariant/split_eq_bind.rs b/jolt-eval/src/invariant/split_eq_bind.rs new file mode 100644 index 000000000..156930066 --- /dev/null +++ b/jolt-eval/src/invariant/split_eq_bind.rs @@ -0,0 +1,199 @@ +#![allow(non_snake_case)] + +use arbitrary::Arbitrary; +use enumset::EnumSet; + +use ark_bn254::Fr; +use jolt_core::field::JoltField; +use jolt_core::poly::dense_mlpoly::DensePolynomial; +use jolt_core::poly::eq_poly::EqPolynomial; +use jolt_core::poly::multilinear_polynomial::BindingOrder; +use jolt_core::poly::split_eq_poly::GruenSplitEqPolynomial; + +use super::{Invariant, InvariantEntry, InvariantViolation, SynthesisTarget}; + +type Challenge = ::Challenge; + +inventory::submit! { + InvariantEntry { + name: "split_eq_bind_low_high", + targets: || SynthesisTarget::Test | SynthesisTarget::Fuzz, + needs_guest: false, + build: |_tc, _inputs| Box::new(SplitEqBindLowHighInvariant), + } +} + +inventory::submit! { + InvariantEntry { + name: "split_eq_bind_high_low", + targets: || SynthesisTarget::Test | SynthesisTarget::Fuzz, + needs_guest: false, + build: |_tc, _inputs| Box::new(SplitEqBindHighLowInvariant), + } +} + +/// Input for the split-eq bind invariants: a number of variables and a +/// seed from which we derive all challenge values deterministically. +#[derive(Debug, Clone, Arbitrary, serde::Serialize, serde::Deserialize, schemars::JsonSchema)] +pub struct SplitEqBindInput { + /// Number of variables (clamped to 2..=20 in check). + pub num_vars: u8, + /// Seed bytes used to derive challenge values via simple hashing. + pub seed: [u8; 32], +} + +fn challenges_from_seed(seed: &[u8; 32], count: usize) -> Vec { + use rand::SeedableRng; + use rand_chacha::ChaCha8Rng; + + let mut rng = ChaCha8Rng::from_seed(*seed); + (0..count).map(|_| Challenge::random(&mut rng)).collect() +} + +// ── LowToHigh ──────────────────────────────────────────────────────── + +pub struct SplitEqBindLowHighInvariant; + +impl Invariant for SplitEqBindLowHighInvariant { + type Setup = (); + type Input = SplitEqBindInput; + + fn name(&self) -> &str { + "split_eq_bind_low_high" + } + + fn description(&self) -> String { + "GruenSplitEqPolynomial::bind (LowToHigh) must match \ + DensePolynomial::bound_poly_var_bot at every round." + .to_string() + } + + fn targets(&self) -> EnumSet { + SynthesisTarget::Test | SynthesisTarget::Fuzz + } + + fn setup(&self) {} + + fn check(&self, _setup: &(), input: SplitEqBindInput) -> Result<(), InvariantViolation> { + let num_vars = (input.num_vars as usize).clamp(2, 20); + let challenges = challenges_from_seed(&input.seed, 2 * num_vars); + let (w, rs) = challenges.split_at(num_vars); + + let mut regular_eq = DensePolynomial::::new(EqPolynomial::evals(w)); + let mut split_eq = GruenSplitEqPolynomial::::new(w, BindingOrder::LowToHigh); + + let merged = split_eq.merge(); + if regular_eq.Z[..regular_eq.len()] != merged.Z[..merged.len()] { + return Err(InvariantViolation::with_details( + "Initial merge mismatch (LowToHigh)", + format!("num_vars={num_vars}"), + )); + } + + for (round, r) in rs.iter().enumerate() { + regular_eq.bound_poly_var_bot(r); + split_eq.bind(*r); + + let merged = split_eq.merge(); + if regular_eq.Z[..regular_eq.len()] != merged.Z[..merged.len()] { + return Err(InvariantViolation::with_details( + "Bind mismatch (LowToHigh)", + format!("num_vars={num_vars}, round={round}"), + )); + } + } + + Ok(()) + } + + fn seed_corpus(&self) -> Vec { + vec![ + SplitEqBindInput { + num_vars: 2, + seed: [0u8; 32], + }, + SplitEqBindInput { + num_vars: 10, + seed: [1u8; 32], + }, + SplitEqBindInput { + num_vars: 17, + seed: [42u8; 32], + }, + ] + } +} + +// ── HighToLow ──────────────────────────────────────────────────────── + +pub struct SplitEqBindHighLowInvariant; + +impl Invariant for SplitEqBindHighLowInvariant { + type Setup = (); + type Input = SplitEqBindInput; + + fn name(&self) -> &str { + "split_eq_bind_high_low" + } + + fn description(&self) -> String { + "GruenSplitEqPolynomial::bind (HighToLow) must match \ + DensePolynomial::bound_poly_var_top at every round." + .to_string() + } + + fn targets(&self) -> EnumSet { + SynthesisTarget::Test | SynthesisTarget::Fuzz + } + + fn setup(&self) {} + + fn check(&self, _setup: &(), input: SplitEqBindInput) -> Result<(), InvariantViolation> { + let num_vars = (input.num_vars as usize).clamp(2, 20); + let challenges = challenges_from_seed(&input.seed, 2 * num_vars); + let (w, rs) = challenges.split_at(num_vars); + + let mut regular_eq = DensePolynomial::::new(EqPolynomial::evals(w)); + let mut split_eq = GruenSplitEqPolynomial::::new(w, BindingOrder::HighToLow); + + let merged = split_eq.merge(); + if regular_eq.Z[..regular_eq.len()] != merged.Z[..merged.len()] { + return Err(InvariantViolation::with_details( + "Initial merge mismatch (HighToLow)", + format!("num_vars={num_vars}"), + )); + } + + for (round, r) in rs.iter().enumerate() { + regular_eq.bound_poly_var_top(r); + split_eq.bind(*r); + + let merged = split_eq.merge(); + if regular_eq.Z[..regular_eq.len()] != merged.Z[..merged.len()] { + return Err(InvariantViolation::with_details( + "Bind mismatch (HighToLow)", + format!("num_vars={num_vars}, round={round}"), + )); + } + } + + Ok(()) + } + + fn seed_corpus(&self) -> Vec { + vec![ + SplitEqBindInput { + num_vars: 2, + seed: [0u8; 32], + }, + SplitEqBindInput { + num_vars: 10, + seed: [1u8; 32], + }, + SplitEqBindInput { + num_vars: 17, + seed: [42u8; 32], + }, + ] + } +} diff --git a/jolt-eval/src/invariant/synthesis/fuzz.rs b/jolt-eval/src/invariant/synthesis/fuzz.rs index 8fc27b100..7467d51e8 100644 --- a/jolt-eval/src/invariant/synthesis/fuzz.rs +++ b/jolt-eval/src/invariant/synthesis/fuzz.rs @@ -20,42 +20,42 @@ use crate::TestCase; /// #![no_main] /// use libfuzzer_sys::fuzz_target; /// fuzz_target!(|data: &[u8]| { -/// jolt_eval::invariant::synthesis::fuzz::fuzz_invariant("soundness", data); +/// jolt_eval::invariant::synthesis::fuzz::fuzz_invariant("split_eq_bind_low_high", data); /// }); /// ``` /// -/// Set `JOLT_FUZZ_ELF` to the path of a pre-compiled guest ELF before -/// running `cargo fuzz`. +/// For invariants that require a guest ELF, set `JOLT_FUZZ_ELF` to the +/// path of a pre-compiled guest ELF before running `cargo fuzz`. +/// Invariants that don't need a guest work without it. pub fn fuzz_invariant(invariant_name: &str, data: &[u8]) { use std::any::Any; use std::sync::LazyLock; - // One-time: build every invariant and its setup from the ELF. struct CachedInvariant { inv: Box, setup: Box, } static CACHE: LazyLock> = LazyLock::new(|| { - let elf_path = std::env::var("JOLT_FUZZ_ELF") - .expect("Set JOLT_FUZZ_ELF to the path of a compiled guest ELF"); - let elf_bytes = - std::fs::read(&elf_path).unwrap_or_else(|e| panic!("Failed to read {elf_path}: {e}")); - let memory_config = common::jolt_device::MemoryConfig { - max_input_size: 4096, - max_output_size: 4096, - max_untrusted_advice_size: 0, - max_trusted_advice_size: 0, - stack_size: 65536, - heap_size: 32768, - program_size: None, - }; - let test_case = Arc::new(TestCase { - elf_contents: elf_bytes, - memory_config, - max_trace_length: 65536, + let test_case: Option> = std::env::var("JOLT_FUZZ_ELF").ok().map(|elf_path| { + let elf_bytes = std::fs::read(&elf_path) + .unwrap_or_else(|e| panic!("Failed to read {elf_path}: {e}")); + let memory_config = common::jolt_device::MemoryConfig { + max_input_size: 4096, + max_output_size: 4096, + max_untrusted_advice_size: common::constants::DEFAULT_MAX_UNTRUSTED_ADVICE_SIZE, + max_trusted_advice_size: common::constants::DEFAULT_MAX_TRUSTED_ADVICE_SIZE, + stack_size: 65536, + heap_size: 32768, + program_size: None, + }; + Arc::new(TestCase { + elf_contents: elf_bytes, + memory_config, + max_trace_length: 65536, + }) }); - let registry = SynthesisRegistry::from_inventory(Some(test_case), vec![]); + let registry = SynthesisRegistry::from_inventory(test_case, vec![]); registry .into_invariants() .into_iter() @@ -71,15 +71,6 @@ pub fn fuzz_invariant(invariant_name: &str, data: &[u8]) { .find(|c| c.inv.name() == invariant_name) .unwrap_or_else(|| panic!("Invariant '{invariant_name}' not found")); - // Use the fuzzer-provided bytes to produce an Input via Arbitrary, - // by going through the JSON round-trip: Arbitrary -> serde_json -> check_json_input. - // This is the most direct path that uses the fuzzer's data. - // DynInvariant erases the Input type, so we can't call Arbitrary - // on the concrete type directly. Instead, interpret the fuzz data - // as a raw JSON string and feed it through check_json_input. The - // fuzzer will mutate bytes toward valid JSON that deserializes into - // the Input type — this is the standard "structure-aware via serde" - // fuzzing pattern. if let Ok(json_str) = std::str::from_utf8(data) { match cached.inv.check_json_input(&*cached.setup, json_str) { CheckJsonResult::Violation(e) => { diff --git a/jolt-eval/src/invariant/zk_consistency.rs b/jolt-eval/src/invariant/zk_consistency.rs deleted file mode 100644 index e0aded252..000000000 --- a/jolt-eval/src/invariant/zk_consistency.rs +++ /dev/null @@ -1,109 +0,0 @@ -use std::sync::Arc; - -use arbitrary::Arbitrary; -use enumset::EnumSet; - -use super::{Invariant, InvariantEntry, InvariantViolation, SynthesisTarget}; -use crate::{ProverPreprocessing, TestCase, VerifierPreprocessing}; - -inventory::submit! { - InvariantEntry { - name: "zk_consistency", - targets: || SynthesisTarget::Test | SynthesisTarget::Fuzz, - needs_guest: true, - build: |tc, _inputs| Box::new(ZkConsistencyInvariant::new(tc.unwrap())), - } -} - -/// ZK consistency invariant: both `host` and `host,zk` compilation modes -/// produce valid proofs that pass verification. -/// -/// Since the ZK feature is compile-time, this invariant tests whichever mode -/// the binary was compiled with. Run the binary with both feature configurations -/// to get full coverage: -/// cargo nextest run -p jolt-eval --features host -/// cargo nextest run -p jolt-eval --features host,zk -pub struct ZkConsistencyInvariant { - pub test_case: Arc, -} - -pub struct ZkConsistencySetup { - test_case: Arc, - prover_preprocessing: ProverPreprocessing, - verifier_preprocessing: VerifierPreprocessing, -} - -#[derive(Debug, Clone, Arbitrary, serde::Serialize, serde::Deserialize, schemars::JsonSchema)] -pub struct ZkInputs { - pub data: Vec, -} - -impl ZkConsistencyInvariant { - pub fn new(test_case: Arc) -> Self { - Self { test_case } - } - - /// Returns which ZK mode the binary was compiled with. - pub fn current_mode() -> &'static str { - // Note: the `zk` feature is on jolt-core, not jolt-eval. - // Detect at runtime by checking if the crate was compiled with it. - "standard" - } -} - -impl Invariant for ZkConsistencyInvariant { - type Setup = ZkConsistencySetup; - type Input = ZkInputs; - - fn name(&self) -> &str { - "zk_consistency" - } - - fn description(&self) -> String { - format!( - "Both host and host+zk modes produce valid proofs. \ - Currently running in {} mode.", - Self::current_mode() - ) - } - - fn targets(&self) -> EnumSet { - SynthesisTarget::Test | SynthesisTarget::Fuzz - } - - fn setup(&self) -> Self::Setup { - let prover_pp = self.test_case.prover_preprocessing(); - let verifier_pp = TestCase::verifier_preprocessing(&prover_pp); - ZkConsistencySetup { - test_case: Arc::clone(&self.test_case), - prover_preprocessing: prover_pp, - verifier_preprocessing: verifier_pp, - } - } - - fn check(&self, setup: &Self::Setup, input: ZkInputs) -> Result<(), InvariantViolation> { - let (proof, io_device) = setup - .test_case - .prove(&setup.prover_preprocessing, &input.data); - - if io_device.panic { - return Ok(()); - } - - TestCase::verify(&setup.verifier_preprocessing, proof, &io_device).map_err(|e| { - InvariantViolation::with_details( - format!("Proof verification failed in {} mode", Self::current_mode()), - format!("inputs: {} bytes, error: {e}", input.data.len()), - ) - }) - } - - fn seed_corpus(&self) -> Vec { - vec![ - ZkInputs { data: vec![] }, - ZkInputs { - data: vec![0u8; 16], - }, - ] - } -} From a915d3dfd6357d938417666ea0ffdff57243fc09 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Thu, 2 Apr 2026 13:42:51 -0400 Subject: [PATCH 19/86] refactor(jolt-eval): remove inventory crate, use explicit registration Replace inventory::submit!/collect!/iter with explicit arrays in registered_invariants() and registered_objectives(). Each module's entry is now listed directly in its parent mod.rs. Co-Authored-By: Claude Opus 4.6 (1M context) --- Cargo.lock | 1 - jolt-eval/Cargo.toml | 1 - jolt-eval/fuzz/Cargo.lock | 2 +- jolt-eval/src/invariant/mod.rs | 26 ++++-- jolt-eval/src/invariant/split_eq_bind.rs | 20 +---- jolt-eval/src/invariant/synthesis/fuzz.rs | 6 +- jolt-eval/src/invariant/synthesis/mod.rs | 4 +- jolt-eval/src/objective/guest_cycles.rs | 13 +-- jolt-eval/src/objective/inline_lengths.rs | 11 +-- jolt-eval/src/objective/mod.rs | 103 ++++++++++++++++++++-- jolt-eval/src/objective/peak_rss.rs | 13 +-- jolt-eval/src/objective/proof_size.rs | 13 +-- jolt-eval/src/objective/prover_time.rs | 13 +-- jolt-eval/src/objective/verifier_time.rs | 14 +-- jolt-eval/src/objective/wrapping_cost.rs | 13 +-- 15 files changed, 125 insertions(+), 128 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f6f408dd4..6864257c1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2951,7 +2951,6 @@ dependencies = [ "common", "enumset", "eyre", - "inventory", "jolt-core", "jolt-eval-macros", "postcard", diff --git a/jolt-eval/Cargo.toml b/jolt-eval/Cargo.toml index 6a10ed73a..6138bed0e 100644 --- a/jolt-eval/Cargo.toml +++ b/jolt-eval/Cargo.toml @@ -26,7 +26,6 @@ tracing-subscriber = { workspace = true } arbitrary = { version = "1", features = ["derive"] } enumset = "1" schemars = "0.8" -inventory = { workspace = true } tempfile = "3" jolt-eval-macros = { path = "macros" } diff --git a/jolt-eval/fuzz/Cargo.lock b/jolt-eval/fuzz/Cargo.lock index d9d37cde4..0b70f7d61 100644 --- a/jolt-eval/fuzz/Cargo.lock +++ b/jolt-eval/fuzz/Cargo.lock @@ -1017,11 +1017,11 @@ dependencies = [ "common", "enumset", "eyre", - "inventory", "jolt-core", "jolt-eval-macros", "postcard", "rand", + "rand_chacha", "rayon", "schemars", "serde", diff --git a/jolt-eval/src/invariant/mod.rs b/jolt-eval/src/invariant/mod.rs index 477314aa0..0c303ccae 100644 --- a/jolt-eval/src/invariant/mod.rs +++ b/jolt-eval/src/invariant/mod.rs @@ -89,11 +89,6 @@ pub trait Invariant: Send + Sync { } } -/// Registration entry for the [`inventory`] crate. -/// -/// Each built-in invariant module calls `inventory::submit!` with one of -/// these, so all invariants are discoverable at runtime without manual -/// registration. /// Factory function type for constructing an invariant from an optional /// test case and default inputs. pub type InvariantBuildFn = fn(Option>, Vec) -> Box; @@ -105,11 +100,24 @@ pub struct InvariantEntry { pub needs_guest: bool, pub build: InvariantBuildFn, } -inventory::collect!(InvariantEntry); -/// Iterate all invariant entries registered via `inventory`. -pub fn registered_invariants() -> impl Iterator { - inventory::iter::() +/// All registered invariant entries. +pub fn registered_invariants() -> impl Iterator { + [ + InvariantEntry { + name: "split_eq_bind_low_high", + targets: || SynthesisTarget::Test | SynthesisTarget::Fuzz, + needs_guest: false, + build: |_tc, _inputs| Box::new(split_eq_bind::SplitEqBindLowHighInvariant), + }, + InvariantEntry { + name: "split_eq_bind_high_low", + targets: || SynthesisTarget::Test | SynthesisTarget::Fuzz, + needs_guest: false, + build: |_tc, _inputs| Box::new(split_eq_bind::SplitEqBindHighLowInvariant), + }, + ] + .into_iter() } /// A counterexample produced when an invariant is violated. diff --git a/jolt-eval/src/invariant/split_eq_bind.rs b/jolt-eval/src/invariant/split_eq_bind.rs index 156930066..087bf5e80 100644 --- a/jolt-eval/src/invariant/split_eq_bind.rs +++ b/jolt-eval/src/invariant/split_eq_bind.rs @@ -10,28 +10,10 @@ use jolt_core::poly::eq_poly::EqPolynomial; use jolt_core::poly::multilinear_polynomial::BindingOrder; use jolt_core::poly::split_eq_poly::GruenSplitEqPolynomial; -use super::{Invariant, InvariantEntry, InvariantViolation, SynthesisTarget}; +use super::{Invariant, InvariantViolation, SynthesisTarget}; type Challenge = ::Challenge; -inventory::submit! { - InvariantEntry { - name: "split_eq_bind_low_high", - targets: || SynthesisTarget::Test | SynthesisTarget::Fuzz, - needs_guest: false, - build: |_tc, _inputs| Box::new(SplitEqBindLowHighInvariant), - } -} - -inventory::submit! { - InvariantEntry { - name: "split_eq_bind_high_low", - targets: || SynthesisTarget::Test | SynthesisTarget::Fuzz, - needs_guest: false, - build: |_tc, _inputs| Box::new(SplitEqBindHighLowInvariant), - } -} - /// Input for the split-eq bind invariants: a number of variables and a /// seed from which we derive all challenge values deterministically. #[derive(Debug, Clone, Arbitrary, serde::Serialize, serde::Deserialize, schemars::JsonSchema)] diff --git a/jolt-eval/src/invariant/synthesis/fuzz.rs b/jolt-eval/src/invariant/synthesis/fuzz.rs index 7467d51e8..9d017c4c6 100644 --- a/jolt-eval/src/invariant/synthesis/fuzz.rs +++ b/jolt-eval/src/invariant/synthesis/fuzz.rs @@ -1,6 +1,6 @@ use std::sync::Arc; -use super::super::{registered_invariants, CheckJsonResult, DynInvariant, SynthesisTarget}; +use super::super::{CheckJsonResult, DynInvariant, SynthesisTarget}; use super::SynthesisRegistry; use crate::TestCase; @@ -89,10 +89,10 @@ pub fn fuzzable_invariants(registry: &SynthesisRegistry) -> Vec<&dyn DynInvarian registry.for_target(SynthesisTarget::Fuzz) } -/// Return the names of all `inventory`-registered invariants that +/// Return the names of all registered invariants that /// include [`SynthesisTarget::Fuzz`]. pub fn fuzzable_invariant_names() -> Vec<&'static str> { - registered_invariants() + super::super::registered_invariants() .filter(|e| (e.targets)().contains(SynthesisTarget::Fuzz)) .map(|e| e.name) .collect() diff --git a/jolt-eval/src/invariant/synthesis/mod.rs b/jolt-eval/src/invariant/synthesis/mod.rs index 70c875090..6916162d7 100644 --- a/jolt-eval/src/invariant/synthesis/mod.rs +++ b/jolt-eval/src/invariant/synthesis/mod.rs @@ -19,7 +19,7 @@ impl SynthesisRegistry { } } - /// Build a registry from all `inventory`-registered invariants. + /// Build a registry from all registered invariants. /// /// Pass `None` to include only invariants that don't require a guest /// program (those with `needs_guest: false`). @@ -74,7 +74,7 @@ impl Default for SynthesisRegistry { } } -/// Return the names of all `inventory`-registered invariants. +/// Return the names of all registered invariants. /// Does not require a `TestCase`. pub fn invariant_names() -> Vec<&'static str> { registered_invariants().map(|e| e.name).collect() diff --git a/jolt-eval/src/objective/guest_cycles.rs b/jolt-eval/src/objective/guest_cycles.rs index c47694621..40ff488a3 100644 --- a/jolt-eval/src/objective/guest_cycles.rs +++ b/jolt-eval/src/objective/guest_cycles.rs @@ -1,19 +1,8 @@ use std::sync::Arc; -use super::{AbstractObjective, Direction, MeasurementError, ObjectiveEntry}; +use super::{AbstractObjective, Direction, MeasurementError}; use crate::TestCase; -inventory::submit! { - ObjectiveEntry { - name: "guest_cycle_count", - direction: Direction::Minimize, - needs_guest: true, - build: |s, inputs| { let setup = s.unwrap(); Box::new(GuestCycleCountObjective::new( - setup.test_case.clone(), inputs, - )) }, - } -} - /// Measures guest instruction cycle count via program tracing. pub struct GuestCycleCountObjective { pub test_case: Arc, diff --git a/jolt-eval/src/objective/inline_lengths.rs b/jolt-eval/src/objective/inline_lengths.rs index 6eee4e5f5..54136a3b1 100644 --- a/jolt-eval/src/objective/inline_lengths.rs +++ b/jolt-eval/src/objective/inline_lengths.rs @@ -1,17 +1,8 @@ use std::sync::Arc; -use super::{AbstractObjective, Direction, MeasurementError, ObjectiveEntry}; +use super::{AbstractObjective, Direction, MeasurementError}; use crate::TestCase; -inventory::submit! { - ObjectiveEntry { - name: "inline_lengths", - direction: Direction::Maximize, - needs_guest: true, - build: |s, _inputs| { let setup = s.unwrap(); Box::new(InlineLengthsObjective::new(setup.test_case.clone())) }, - } -} - /// Measures total virtual/inline sequence length in the decoded bytecode. /// /// Inline sequences replace guest-side computation with constraint-native diff --git a/jolt-eval/src/objective/mod.rs b/jolt-eval/src/objective/mod.rs index 259940887..153962bae 100644 --- a/jolt-eval/src/objective/mod.rs +++ b/jolt-eval/src/objective/mod.rs @@ -61,10 +61,6 @@ pub trait AbstractObjective: Send + Sync { fn direction(&self) -> Direction; } -/// Registration entry for the [`inventory`] crate. -/// -/// Each built-in objective module calls `inventory::submit!` with one of -/// these, so all objectives are discoverable at runtime. pub struct ObjectiveEntry { pub name: &'static str, pub direction: Direction, @@ -72,11 +68,100 @@ pub struct ObjectiveEntry { pub needs_guest: bool, pub build: fn(Option<&SharedSetup>, Vec) -> Box, } -inventory::collect!(ObjectiveEntry); -/// Iterate all objective entries registered via `inventory`. -pub fn registered_objectives() -> impl Iterator { - inventory::iter::() +/// All registered objective entries. +pub fn registered_objectives() -> impl Iterator { + [ + ObjectiveEntry { + name: "peak_rss", + direction: Direction::Minimize, + needs_guest: true, + build: |s, inputs| { + let setup = s.unwrap(); + Box::new(peak_rss::PeakRssObjective::new( + setup.test_case.clone(), + setup.prover_preprocessing.clone(), + inputs, + )) + }, + }, + ObjectiveEntry { + name: "prover_time", + direction: Direction::Minimize, + needs_guest: true, + build: |s, inputs| { + let setup = s.unwrap(); + Box::new(prover_time::ProverTimeObjective::new( + setup.test_case.clone(), + setup.prover_preprocessing.clone(), + inputs, + )) + }, + }, + ObjectiveEntry { + name: "proof_size", + direction: Direction::Minimize, + needs_guest: true, + build: |s, inputs| { + let setup = s.unwrap(); + Box::new(proof_size::ProofSizeObjective::new( + setup.test_case.clone(), + setup.prover_preprocessing.clone(), + inputs, + )) + }, + }, + ObjectiveEntry { + name: "verifier_time", + direction: Direction::Minimize, + needs_guest: true, + build: |s, inputs| { + let setup = s.unwrap(); + Box::new(verifier_time::VerifierTimeObjective::new( + setup.test_case.clone(), + setup.prover_preprocessing.clone(), + setup.verifier_preprocessing.clone(), + inputs, + )) + }, + }, + ObjectiveEntry { + name: "guest_cycle_count", + direction: Direction::Minimize, + needs_guest: true, + build: |s, inputs| { + let setup = s.unwrap(); + Box::new(guest_cycles::GuestCycleCountObjective::new( + setup.test_case.clone(), + inputs, + )) + }, + }, + ObjectiveEntry { + name: "inline_lengths", + direction: Direction::Maximize, + needs_guest: true, + build: |s, _inputs| { + let setup = s.unwrap(); + Box::new(inline_lengths::InlineLengthsObjective::new( + setup.test_case.clone(), + )) + }, + }, + ObjectiveEntry { + name: "wrapping_cost", + direction: Direction::Minimize, + needs_guest: true, + build: |s, _inputs| { + let setup = s.unwrap(); + Box::new(wrapping_cost::WrappingCostObjective::new( + setup.test_case.clone(), + setup.prover_preprocessing.clone(), + )) + }, + }, + ] + .into_iter() } /// Build all registered objectives from a [`SharedSetup`]. @@ -86,7 +171,7 @@ pub fn build_objectives_from_inventory( setup: Option<&SharedSetup>, inputs: Vec, ) -> Vec> { - inventory::iter::() + registered_objectives() .filter(|entry| !entry.needs_guest || setup.is_some()) .map(|entry| (entry.build)(setup, inputs.clone())) .collect() diff --git a/jolt-eval/src/objective/peak_rss.rs b/jolt-eval/src/objective/peak_rss.rs index d8d8577aa..ca49f924a 100644 --- a/jolt-eval/src/objective/peak_rss.rs +++ b/jolt-eval/src/objective/peak_rss.rs @@ -2,20 +2,9 @@ use std::sync::Arc; use sysinfo::{Pid, System}; -use super::{AbstractObjective, Direction, MeasurementError, ObjectiveEntry}; +use super::{AbstractObjective, Direction, MeasurementError}; use crate::{ProverPreprocessing, TestCase}; -inventory::submit! { - ObjectiveEntry { - name: "peak_rss", - direction: Direction::Minimize, - needs_guest: true, - build: |s, inputs| { let setup = s.unwrap(); Box::new(PeakRssObjective::new( - setup.test_case.clone(), setup.prover_preprocessing.clone(), inputs, - )) }, - } -} - /// Measures peak resident set size (RSS) during proving. /// /// Uses the `sysinfo` crate to sample memory before and after proving. diff --git a/jolt-eval/src/objective/proof_size.rs b/jolt-eval/src/objective/proof_size.rs index 321d950c5..29211c2db 100644 --- a/jolt-eval/src/objective/proof_size.rs +++ b/jolt-eval/src/objective/proof_size.rs @@ -1,19 +1,8 @@ use std::sync::Arc; -use super::{AbstractObjective, Direction, MeasurementError, ObjectiveEntry}; +use super::{AbstractObjective, Direction, MeasurementError}; use crate::{serialize_proof, ProverPreprocessing, TestCase}; -inventory::submit! { - ObjectiveEntry { - name: "proof_size", - direction: Direction::Minimize, - needs_guest: true, - build: |s, inputs| { let setup = s.unwrap(); Box::new(ProofSizeObjective::new( - setup.test_case.clone(), setup.prover_preprocessing.clone(), inputs, - )) }, - } -} - /// Measures serialized proof size in bytes. pub struct ProofSizeObjective { pub test_case: Arc, diff --git a/jolt-eval/src/objective/prover_time.rs b/jolt-eval/src/objective/prover_time.rs index b083a3696..7b839f576 100644 --- a/jolt-eval/src/objective/prover_time.rs +++ b/jolt-eval/src/objective/prover_time.rs @@ -1,20 +1,9 @@ use std::sync::Arc; use std::time::Instant; -use super::{AbstractObjective, Direction, MeasurementError, ObjectiveEntry}; +use super::{AbstractObjective, Direction, MeasurementError}; use crate::{ProverPreprocessing, TestCase}; -inventory::submit! { - ObjectiveEntry { - name: "prover_time", - direction: Direction::Minimize, - needs_guest: true, - build: |s, inputs| { let setup = s.unwrap(); Box::new(ProverTimeObjective::new( - setup.test_case.clone(), setup.prover_preprocessing.clone(), inputs, - )) }, - } -} - /// Measures wall-clock prover time in seconds. pub struct ProverTimeObjective { pub test_case: Arc, diff --git a/jolt-eval/src/objective/verifier_time.rs b/jolt-eval/src/objective/verifier_time.rs index 4936799c6..1223f95a9 100644 --- a/jolt-eval/src/objective/verifier_time.rs +++ b/jolt-eval/src/objective/verifier_time.rs @@ -1,21 +1,9 @@ use std::sync::Arc; use std::time::Instant; -use super::{AbstractObjective, Direction, MeasurementError, ObjectiveEntry}; +use super::{AbstractObjective, Direction, MeasurementError}; use crate::{ProverPreprocessing, TestCase, VerifierPreprocessing}; -inventory::submit! { - ObjectiveEntry { - name: "verifier_time", - direction: Direction::Minimize, - needs_guest: true, - build: |s, inputs| { let setup = s.unwrap(); Box::new(VerifierTimeObjective::new( - setup.test_case.clone(), setup.prover_preprocessing.clone(), - setup.verifier_preprocessing.clone(), inputs, - )) }, - } -} - /// Measures wall-clock verifier time in seconds. pub struct VerifierTimeObjective { pub test_case: Arc, diff --git a/jolt-eval/src/objective/wrapping_cost.rs b/jolt-eval/src/objective/wrapping_cost.rs index 972201a90..dfcddb924 100644 --- a/jolt-eval/src/objective/wrapping_cost.rs +++ b/jolt-eval/src/objective/wrapping_cost.rs @@ -1,19 +1,8 @@ use std::sync::Arc; -use super::{AbstractObjective, Direction, MeasurementError, ObjectiveEntry}; +use super::{AbstractObjective, Direction, MeasurementError}; use crate::{ProverPreprocessing, TestCase}; -inventory::submit! { - ObjectiveEntry { - name: "wrapping_cost", - direction: Direction::Minimize, - needs_guest: true, - build: |s, _inputs| { let setup = s.unwrap(); Box::new(WrappingCostObjective::new( - setup.test_case.clone(), setup.prover_preprocessing.clone(), - )) }, - } -} - /// Measures the "wrapping cost" as the total number of constraints in the R1CS. /// /// This is derived from the preprocessing data which encodes the constraint From cc623f43ee087cb746c38f5874fc2e21d31bb535 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Thu, 2 Apr 2026 13:52:11 -0400 Subject: [PATCH 20/86] cleanup(jolt-eval): remove fuzz binary, redundant with check-invariants The fuzz binary was a random-input runner largely duplicating check-invariants --num-random. The libfuzzer scaffolding in jolt-eval/fuzz/ is kept for cargo-fuzz usage. Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/Cargo.toml | 4 - jolt-eval/bin/fuzz.rs | 194 ---------------------- jolt-eval/src/invariant/synthesis/fuzz.rs | 15 +- 3 files changed, 1 insertion(+), 212 deletions(-) delete mode 100644 jolt-eval/bin/fuzz.rs diff --git a/jolt-eval/Cargo.toml b/jolt-eval/Cargo.toml index 6138bed0e..23821ddeb 100644 --- a/jolt-eval/Cargo.toml +++ b/jolt-eval/Cargo.toml @@ -42,10 +42,6 @@ path = "bin/measure_objectives.rs" name = "redteam" path = "bin/redteam.rs" -[[bin]] -name = "fuzz" -path = "bin/fuzz.rs" - [[bin]] name = "optimize" path = "bin/optimize.rs" diff --git a/jolt-eval/bin/fuzz.rs b/jolt-eval/bin/fuzz.rs deleted file mode 100644 index 9155af4a7..000000000 --- a/jolt-eval/bin/fuzz.rs +++ /dev/null @@ -1,194 +0,0 @@ -use std::time::{Duration, Instant}; - -use clap::Parser; - -use jolt_eval::guests; -use jolt_eval::invariant::synthesis::{invariant_names, SynthesisRegistry}; -use jolt_eval::invariant::{DynInvariant, InvariantReport, SynthesisTarget}; - -#[derive(Parser)] -#[command(name = "fuzz")] -#[command(about = "Fuzz-test Jolt invariants with random inputs")] -struct Cli { - /// Guest program to evaluate (e.g. muldiv, fibonacci, sha2) - #[arg(long)] - guest: Option, - - /// Path to a pre-compiled guest ELF (alternative to --guest) - #[arg(long)] - elf: Option, - - /// Only fuzz the named invariant (default: all fuzzable) - #[arg(long)] - invariant: Option, - - /// Total number of fuzz iterations (across all invariants) - #[arg(long, default_value = "1000")] - iterations: usize, - - /// Maximum wall-clock duration (e.g. "60s", "5m", "1h") - #[arg(long)] - duration: Option, - - /// Max trace length override - #[arg(long)] - max_trace_length: Option, - - /// List available fuzzable invariants and exit - #[arg(long)] - list: bool, -} - -fn main() -> eyre::Result<()> { - tracing_subscriber::fmt::init(); - let cli = Cli::parse(); - - if cli.list { - println!("Fuzzable invariants:"); - for name in invariant_names() { - println!(" {name}"); - } - return Ok(()); - } - - let (test_case, default_inputs) = guests::resolve_test_case( - cli.guest.as_deref(), - cli.elf.as_deref(), - cli.max_trace_length, - ); - - let registry = SynthesisRegistry::from_inventory(Some(test_case), default_inputs); - - let fuzzable: Vec<&dyn DynInvariant> = if let Some(name) = &cli.invariant { - let matches: Vec<_> = registry - .for_target(SynthesisTarget::Fuzz) - .into_iter() - .filter(|inv| inv.name() == name.as_str()) - .collect(); - if matches.is_empty() { - eprintln!("Invariant '{name}' not found or not fuzzable."); - eprintln!("Run with --list to see available invariants."); - std::process::exit(1); - } - matches - } else { - registry.for_target(SynthesisTarget::Fuzz) - }; - - if fuzzable.is_empty() { - eprintln!("No fuzzable invariants registered."); - std::process::exit(1); - } - - let deadline = cli.duration.as_deref().map(|s| { - let dur = parse_duration(s).unwrap_or_else(|| { - eprintln!("Invalid duration '{s}'. Use e.g. 60s, 5m, 1h."); - std::process::exit(1); - }); - Instant::now() + dur - }); - - println!( - "Fuzzing {} invariant(s), {} iterations", - fuzzable.len(), - cli.iterations, - ); - if let Some(d) = &cli.duration { - println!("Time limit: {d}"); - } - println!(); - - let mut total_checks = 0usize; - let mut total_violations = 0usize; - let start = Instant::now(); - - for inv in &fuzzable { - println!(" {} — setting up...", inv.name()); - - let per_invariant = cli.iterations / fuzzable.len(); - let mut checks = 0usize; - let mut violations = Vec::new(); - - let batch_size = per_invariant.min(100); - let mut remaining = per_invariant; - - while remaining > 0 { - if let Some(dl) = deadline { - if Instant::now() >= dl { - println!(" (time limit reached)"); - break; - } - } - - let n = remaining.min(batch_size); - let results = inv.run_checks(n); - for r in &results { - checks += 1; - if let Err(e) = r { - violations.push(e.to_string()); - } - } - remaining = remaining.saturating_sub(n); - } - - let report = InvariantReport { - name: inv.name().to_string(), - total: checks, - passed: checks - violations.len(), - failed: violations.len(), - violations: violations.clone(), - }; - print_report(&report); - - total_checks += checks; - total_violations += violations.len(); - } - - let elapsed = start.elapsed(); - println!(); - println!( - "Done: {} checks in {:.1}s, {} violations", - total_checks, - elapsed.as_secs_f64(), - total_violations, - ); - - if total_violations > 0 { - std::process::exit(1); - } - - Ok(()) -} - -fn print_report(report: &InvariantReport) { - if report.failed == 0 { - println!( - " {} — {}/{} passed", - report.name, report.passed, report.total - ); - } else { - println!( - " {} — FAILED {}/{} checks", - report.name, report.failed, report.total - ); - for (i, v) in report.violations.iter().enumerate().take(5) { - println!(" [{i}] {v}"); - } - if report.violations.len() > 5 { - println!(" ... and {} more", report.violations.len() - 5); - } - } -} - -fn parse_duration(s: &str) -> Option { - let s = s.trim(); - if let Some(n) = s.strip_suffix('s') { - n.parse::().ok().map(Duration::from_secs) - } else if let Some(n) = s.strip_suffix('m') { - n.parse::().ok().map(|m| Duration::from_secs(m * 60)) - } else if let Some(n) = s.strip_suffix('h') { - n.parse::().ok().map(|h| Duration::from_secs(h * 3600)) - } else { - s.parse::().ok().map(Duration::from_secs) - } -} diff --git a/jolt-eval/src/invariant/synthesis/fuzz.rs b/jolt-eval/src/invariant/synthesis/fuzz.rs index 9d017c4c6..9a24c772d 100644 --- a/jolt-eval/src/invariant/synthesis/fuzz.rs +++ b/jolt-eval/src/invariant/synthesis/fuzz.rs @@ -1,6 +1,6 @@ use std::sync::Arc; -use super::super::{CheckJsonResult, DynInvariant, SynthesisTarget}; +use super::super::{CheckJsonResult, DynInvariant}; use super::SynthesisRegistry; use crate::TestCase; @@ -84,16 +84,3 @@ pub fn fuzz_invariant(invariant_name: &str, data: &[u8]) { } } -/// List all invariants suitable for fuzz target generation. -pub fn fuzzable_invariants(registry: &SynthesisRegistry) -> Vec<&dyn DynInvariant> { - registry.for_target(SynthesisTarget::Fuzz) -} - -/// Return the names of all registered invariants that -/// include [`SynthesisTarget::Fuzz`]. -pub fn fuzzable_invariant_names() -> Vec<&'static str> { - super::super::registered_invariants() - .filter(|e| (e.targets)().contains(SynthesisTarget::Fuzz)) - .map(|e| e.name) - .collect() -} From 0200fa7fe6e4e2d4574ef75750f34bca1be435ba Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Thu, 2 Apr 2026 14:16:27 -0400 Subject: [PATCH 21/86] refactor(jolt-eval): replace DynInvariant/SynthesisRegistry with JoltInvariants enum Remove the DynInvariant trait, SynthesisRegistry, and InvariantEntry in favor of a JoltInvariants enum with match-based method dispatch. auto_redteam is now generic over I: Invariant, eliminating the need for type erasure in the red-team path and keeping tests simple. Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/bin/check_invariants.rs | 62 ++---- jolt-eval/bin/optimize.rs | 10 +- jolt-eval/bin/redteam.rs | 46 ++--- jolt-eval/src/invariant/mod.rs | 206 ++++++++----------- jolt-eval/src/invariant/synthesis/fuzz.rs | 70 ++----- jolt-eval/src/invariant/synthesis/mod.rs | 78 ------- jolt-eval/src/invariant/synthesis/redteam.rs | 105 +++++----- jolt-eval/src/invariant/synthesis/test.rs | 30 +-- jolt-eval/src/lib.rs | 6 +- jolt-eval/tests/integration.rs | 36 ++-- 10 files changed, 223 insertions(+), 426 deletions(-) diff --git a/jolt-eval/bin/check_invariants.rs b/jolt-eval/bin/check_invariants.rs index a8b8e328d..3840c87fd 100644 --- a/jolt-eval/bin/check_invariants.rs +++ b/jolt-eval/bin/check_invariants.rs @@ -1,22 +1,12 @@ use clap::Parser; use tracing::info; -use jolt_eval::guests; -use jolt_eval::invariant::synthesis::{invariant_names, SynthesisRegistry}; -use jolt_eval::invariant::{DynInvariant, InvariantReport}; +use jolt_eval::invariant::{InvariantReport, JoltInvariants}; #[derive(Parser)] #[command(name = "check-invariants")] #[command(about = "Run Jolt invariant checks")] struct Cli { - /// Guest program to evaluate (e.g. muldiv, fibonacci, sha2) - #[arg(long)] - guest: Option, - - /// Path to a pre-compiled guest ELF (alternative to --guest) - #[arg(long)] - elf: Option, - /// Only run the named invariant (default: all) #[arg(long)] invariant: Option, @@ -24,48 +14,32 @@ struct Cli { /// Number of random inputs per invariant #[arg(long, default_value = "10")] num_random: usize, - - /// Max trace length override - #[arg(long)] - max_trace_length: Option, } fn main() -> eyre::Result<()> { tracing_subscriber::fmt::init(); let cli = Cli::parse(); - let (test_case, default_inputs) = guests::resolve_test_case( - cli.guest.as_deref(), - cli.elf.as_deref(), - cli.max_trace_length, - ); - - let registry = SynthesisRegistry::from_inventory(Some(test_case), default_inputs); - - let invariants: Vec<&dyn DynInvariant> = if let Some(name) = &cli.invariant { - registry - .invariants() - .iter() - .filter(|inv| inv.name() == name.as_str()) - .map(|inv| inv.as_ref()) - .collect() + let all = JoltInvariants::all(); + let invariants: Vec<_> = if let Some(name) = &cli.invariant { + let filtered: Vec<_> = all + .into_iter() + .filter(|inv| inv.name().contains(name.as_str())) + .collect(); + if filtered.is_empty() { + let all_inv = JoltInvariants::all(); + let names: Vec<_> = all_inv.iter().map(|i| i.name()).collect(); + eprintln!( + "Invariant '{name}' not found. Available: {}", + names.join(", ") + ); + std::process::exit(1); + } + filtered } else { - registry - .invariants() - .iter() - .map(|inv| inv.as_ref()) - .collect() + all }; - if invariants.is_empty() { - eprintln!("No matching invariants found."); - if let Some(name) = &cli.invariant { - eprintln!("Available: {}", invariant_names().join(", ")); - eprintln!("Requested: {name}"); - } - std::process::exit(1); - } - let mut all_passed = true; for inv in &invariants { info!("Running invariant: {}", inv.name()); diff --git a/jolt-eval/bin/optimize.rs b/jolt-eval/bin/optimize.rs index 3943490c1..f22c9dd70 100644 --- a/jolt-eval/bin/optimize.rs +++ b/jolt-eval/bin/optimize.rs @@ -5,7 +5,7 @@ use clap::Parser; use jolt_eval::agent::ClaudeCodeAgent; use jolt_eval::guests; -use jolt_eval::invariant::synthesis::SynthesisRegistry; +use jolt_eval::invariant::JoltInvariants; use jolt_eval::objective::optimize::{auto_optimize, OptimizeConfig, OptimizeEnv}; use jolt_eval::objective::{ build_objectives_from_inventory, measure_dyn, AbstractObjective, Direction, @@ -51,7 +51,7 @@ struct Cli { struct RealEnv { objectives: Vec>, - registry: SynthesisRegistry, + invariants: Vec, repo_dir: std::path::PathBuf, } @@ -61,7 +61,7 @@ impl OptimizeEnv for RealEnv { } fn check_invariants(&mut self) -> bool { - self.registry.invariants().iter().all(|inv| { + self.invariants.iter().all(|inv| { let results = inv.run_checks(0); results.iter().all(|r| r.is_ok()) }) @@ -141,12 +141,12 @@ fn main() -> eyre::Result<()> { std::process::exit(1); } - let registry = SynthesisRegistry::from_inventory(Some(test_case), default_inputs); + let invariants = JoltInvariants::all(); let repo_dir = std::env::current_dir()?; let mut env = RealEnv { objectives, - registry, + invariants, repo_dir: repo_dir.clone(), }; diff --git a/jolt-eval/bin/redteam.rs b/jolt-eval/bin/redteam.rs index 344ff3a56..583d99ea6 100644 --- a/jolt-eval/bin/redteam.rs +++ b/jolt-eval/bin/redteam.rs @@ -2,23 +2,13 @@ use clap::Parser; use tracing::info; use jolt_eval::agent::ClaudeCodeAgent; -use jolt_eval::guests; use jolt_eval::invariant::synthesis::redteam::{auto_redteam, RedTeamConfig, RedTeamResult}; -use jolt_eval::invariant::synthesis::{invariant_names, SynthesisRegistry}; -use jolt_eval::invariant::SynthesisTarget; +use jolt_eval::invariant::{JoltInvariants, SynthesisTarget}; #[derive(Parser)] #[command(name = "redteam")] #[command(about = "AI-driven red team testing of Jolt invariants")] struct Cli { - /// Guest program to evaluate (e.g. muldiv, fibonacci, sha2) - #[arg(long)] - guest: Option, - - /// Path to a pre-compiled guest ELF (alternative to --guest) - #[arg(long)] - elf: Option, - /// Name of the invariant to test #[arg(long)] invariant: String, @@ -35,10 +25,6 @@ struct Cli { #[arg(long, default_value = "30")] max_turns: usize, - /// Max trace length override - #[arg(long)] - max_trace_length: Option, - /// List available red-teamable invariants and exit #[arg(long)] list: bool, @@ -50,23 +36,18 @@ fn main() -> eyre::Result<()> { if cli.list { println!("Red-teamable invariants:"); - for name in invariant_names() { - println!(" {name}"); + for inv in &JoltInvariants::all() { + if inv.targets().contains(SynthesisTarget::RedTeam) { + println!(" {}", inv.name()); + } } return Ok(()); } - let (test_case, default_inputs) = guests::resolve_test_case( - cli.guest.as_deref(), - cli.elf.as_deref(), - cli.max_trace_length, - ); - - let registry = SynthesisRegistry::from_inventory(Some(test_case), default_inputs); - - let invariant = registry - .for_target(SynthesisTarget::RedTeam) - .into_iter() + let all = JoltInvariants::all(); + let invariant = all + .iter() + .filter(|inv| inv.targets().contains(SynthesisTarget::RedTeam)) .find(|inv| inv.name() == cli.invariant.as_str()); let Some(invariant) = invariant else { @@ -89,7 +70,14 @@ fn main() -> eyre::Result<()> { cli.invariant, cli.iterations, cli.model ); - let result = auto_redteam(invariant, &config, &agent, &repo_dir); + let result = match invariant { + JoltInvariants::SplitEqBindLowHigh(inv) => { + auto_redteam(inv, &config, &agent, &repo_dir) + } + JoltInvariants::SplitEqBindHighLow(inv) => { + auto_redteam(inv, &config, &agent, &repo_dir) + } + }; match result { RedTeamResult::Violation { diff --git a/jolt-eval/src/invariant/mod.rs b/jolt-eval/src/invariant/mod.rs index 0c303ccae..2f74c79d3 100644 --- a/jolt-eval/src/invariant/mod.rs +++ b/jolt-eval/src/invariant/mod.rs @@ -3,7 +3,6 @@ pub mod synthesis; use std::any::Any; use std::fmt; -use std::sync::Arc; use arbitrary::Arbitrary; use enumset::{EnumSet, EnumSetType}; @@ -12,8 +11,6 @@ use schemars::JsonSchema; use serde::de::DeserializeOwned; use serde::Serialize; -use crate::TestCase; - /// What to synthesize from an invariant definition. #[derive(Debug, EnumSetType)] pub enum SynthesisTarget { @@ -89,35 +86,95 @@ pub trait Invariant: Send + Sync { } } -/// Factory function type for constructing an invariant from an optional -/// test case and default inputs. -pub type InvariantBuildFn = fn(Option>, Vec) -> Box; +/// Enum collecting all Jolt invariants. Methods dispatch via match. +pub enum JoltInvariants { + SplitEqBindLowHigh(split_eq_bind::SplitEqBindLowHighInvariant), + SplitEqBindHighLow(split_eq_bind::SplitEqBindHighLowInvariant), +} + +macro_rules! dispatch { + ($self:expr, |$inv:ident| $body:expr) => { + match $self { + JoltInvariants::SplitEqBindLowHigh($inv) => $body, + JoltInvariants::SplitEqBindHighLow($inv) => $body, + } + }; +} + +impl JoltInvariants { + pub fn all() -> Vec { + vec![ + Self::SplitEqBindLowHigh(split_eq_bind::SplitEqBindLowHighInvariant), + Self::SplitEqBindHighLow(split_eq_bind::SplitEqBindHighLowInvariant), + ] + } + + pub fn name(&self) -> &str { + dispatch!(self, |inv| inv.name()) + } + + pub fn description(&self) -> String { + dispatch!(self, |inv| inv.description()) + } + + pub fn targets(&self) -> EnumSet { + dispatch!(self, |inv| inv.targets()) + } + + pub fn run_checks(&self, num_random: usize) -> Vec> { + dispatch!(self, |inv| run_checks_impl(inv, num_random)) + } + + pub fn dyn_setup(&self) -> Box { + dispatch!(self, |inv| dyn_setup_impl(inv)) + } + + pub fn check_json_input(&self, setup: &dyn Any, json: &str) -> CheckJsonResult { + dispatch!(self, |inv| check_json_input_impl(inv, setup, json)) + } +} + +fn run_checks_impl(inv: &I, num_random: usize) -> Vec> { + let setup = inv.setup(); + let mut results = Vec::new(); + + for input in inv.seed_corpus() { + results.push(inv.check(&setup, input)); + } + + let mut rng = rand::thread_rng(); + for _ in 0..num_random { + let mut raw = vec![0u8; 4096]; + rng.fill_bytes(&mut raw); + let mut u = arbitrary::Unstructured::new(&raw); + if let Ok(input) = I::Input::arbitrary(&mut u) { + results.push(inv.check(&setup, input)); + } + } + + results +} -pub struct InvariantEntry { - pub name: &'static str, - pub targets: fn() -> EnumSet, - /// Whether this invariant requires a compiled guest program. - pub needs_guest: bool, - pub build: InvariantBuildFn, +fn dyn_setup_impl(inv: &I) -> Box { + Box::new(inv.setup()) } -/// All registered invariant entries. -pub fn registered_invariants() -> impl Iterator { - [ - InvariantEntry { - name: "split_eq_bind_low_high", - targets: || SynthesisTarget::Test | SynthesisTarget::Fuzz, - needs_guest: false, - build: |_tc, _inputs| Box::new(split_eq_bind::SplitEqBindLowHighInvariant), - }, - InvariantEntry { - name: "split_eq_bind_high_low", - targets: || SynthesisTarget::Test | SynthesisTarget::Fuzz, - needs_guest: false, - build: |_tc, _inputs| Box::new(split_eq_bind::SplitEqBindHighLowInvariant), - }, - ] - .into_iter() +fn check_json_input_impl( + inv: &I, + setup: &dyn Any, + json: &str, +) -> CheckJsonResult { + let setup = setup + .downcast_ref::() + .expect("check_json_input called with wrong setup type"); + let input: I::Input = match serde_json::from_str(json) { + Ok(v) => v, + Err(e) => return CheckJsonResult::BadInput(e.to_string()), + }; + match inv.check(setup, input) { + Ok(()) => CheckJsonResult::Pass, + Err(v) => CheckJsonResult::Violation(v), + } } /// A counterexample produced when an invariant is violated. @@ -134,31 +191,7 @@ pub struct FailedAttempt { pub failure_reason: String, } -/// Object-safe wrapper for `Invariant`, enabling heterogeneous collections -/// and JSON-based counterexample checking. -pub trait DynInvariant: Send + Sync { - fn name(&self) -> &str; - fn description(&self) -> String; - fn targets(&self) -> EnumSet; - - /// Run seed corpus checks followed by `num_random` randomly-generated inputs. - fn run_checks(&self, num_random: usize) -> Vec>; - - /// Return a JSON example of the `Input` type (from the seed corpus). - fn input_json_example(&self) -> Option; - - /// Return the JSON Schema for the `Input` type. - fn input_json_schema(&self) -> serde_json::Value; - - /// Create the (type-erased) setup. Expensive — call once and reuse. - fn dyn_setup(&self) -> Box; - - /// Deserialize a JSON-encoded `Input` and check it against a - /// previously-created setup (from [`dyn_setup`]). - fn check_json_input(&self, setup: &dyn Any, json: &str) -> CheckJsonResult; -} - -/// Outcome of [`DynInvariant::check_json_input`]. +/// Outcome of [`JoltInvariants::check_json_input`]. pub enum CheckJsonResult { /// The input was valid and the invariant held. Pass, @@ -168,71 +201,6 @@ pub enum CheckJsonResult { BadInput(String), } -impl DynInvariant for I { - fn name(&self) -> &str { - Invariant::name(self) - } - - fn description(&self) -> String { - Invariant::description(self) - } - - fn targets(&self) -> EnumSet { - Invariant::targets(self) - } - - fn run_checks(&self, num_random: usize) -> Vec> { - let setup = self.setup(); - let mut results = Vec::new(); - - for input in self.seed_corpus() { - results.push(self.check(&setup, input)); - } - - let mut rng = rand::thread_rng(); - for _ in 0..num_random { - let mut raw = vec![0u8; 4096]; - rng.fill_bytes(&mut raw); - let mut u = arbitrary::Unstructured::new(&raw); - if let Ok(input) = I::Input::arbitrary(&mut u) { - results.push(self.check(&setup, input)); - } - } - - results - } - - fn input_json_example(&self) -> Option { - self.seed_corpus() - .into_iter() - .next() - .and_then(|input| serde_json::to_string_pretty(&input).ok()) - } - - fn input_json_schema(&self) -> serde_json::Value { - let schema = schemars::schema_for!(I::Input); - serde_json::to_value(schema).unwrap() - } - - fn dyn_setup(&self) -> Box { - Box::new(Invariant::setup(self)) - } - - fn check_json_input(&self, setup: &dyn Any, json: &str) -> CheckJsonResult { - let setup = setup - .downcast_ref::() - .expect("DynInvariant::check_json_input called with wrong setup type"); - let input: I::Input = match serde_json::from_str(json) { - Ok(v) => v, - Err(e) => return CheckJsonResult::BadInput(e.to_string()), - }; - match self.check(setup, input) { - Ok(()) => CheckJsonResult::Pass, - Err(v) => CheckJsonResult::Violation(v), - } - } -} - /// Result of running an invariant check suite. pub struct InvariantReport { pub name: String, diff --git a/jolt-eval/src/invariant/synthesis/fuzz.rs b/jolt-eval/src/invariant/synthesis/fuzz.rs index 9a24c772d..329594f9b 100644 --- a/jolt-eval/src/invariant/synthesis/fuzz.rs +++ b/jolt-eval/src/invariant/synthesis/fuzz.rs @@ -1,16 +1,25 @@ -use std::sync::Arc; +use std::any::Any; +use std::sync::LazyLock; -use super::super::{CheckJsonResult, DynInvariant}; -use super::SynthesisRegistry; -use crate::TestCase; +use super::super::{CheckJsonResult, JoltInvariants}; + +struct CachedInvariant { + inv: JoltInvariants, + setup: Box, +} + +static CACHE: LazyLock> = LazyLock::new(|| { + JoltInvariants::all() + .into_iter() + .map(|inv| { + let setup = inv.dyn_setup(); + CachedInvariant { inv, setup } + }) + .collect() +}); /// Fuzz a named invariant with raw byte data from libfuzzer. /// -/// `data` is fed through `arbitrary::Unstructured` to produce the -/// invariant's `Input` type, which is then checked against the -/// invariant. Setup is performed once and cached for the process -/// lifetime. -/// /// Panics on invariant violation (which is what libfuzzer needs to /// detect a finding). /// @@ -23,49 +32,7 @@ use crate::TestCase; /// jolt_eval::invariant::synthesis::fuzz::fuzz_invariant("split_eq_bind_low_high", data); /// }); /// ``` -/// -/// For invariants that require a guest ELF, set `JOLT_FUZZ_ELF` to the -/// path of a pre-compiled guest ELF before running `cargo fuzz`. -/// Invariants that don't need a guest work without it. pub fn fuzz_invariant(invariant_name: &str, data: &[u8]) { - use std::any::Any; - use std::sync::LazyLock; - - struct CachedInvariant { - inv: Box, - setup: Box, - } - - static CACHE: LazyLock> = LazyLock::new(|| { - let test_case: Option> = std::env::var("JOLT_FUZZ_ELF").ok().map(|elf_path| { - let elf_bytes = std::fs::read(&elf_path) - .unwrap_or_else(|e| panic!("Failed to read {elf_path}: {e}")); - let memory_config = common::jolt_device::MemoryConfig { - max_input_size: 4096, - max_output_size: 4096, - max_untrusted_advice_size: common::constants::DEFAULT_MAX_UNTRUSTED_ADVICE_SIZE, - max_trusted_advice_size: common::constants::DEFAULT_MAX_TRUSTED_ADVICE_SIZE, - stack_size: 65536, - heap_size: 32768, - program_size: None, - }; - Arc::new(TestCase { - elf_contents: elf_bytes, - memory_config, - max_trace_length: 65536, - }) - }); - let registry = SynthesisRegistry::from_inventory(test_case, vec![]); - registry - .into_invariants() - .into_iter() - .map(|inv| { - let setup = inv.dyn_setup(); - CachedInvariant { inv, setup } - }) - .collect() - }); - let cached = CACHE .iter() .find(|c| c.inv.name() == invariant_name) @@ -83,4 +50,3 @@ pub fn fuzz_invariant(invariant_name: &str, data: &[u8]) { } } } - diff --git a/jolt-eval/src/invariant/synthesis/mod.rs b/jolt-eval/src/invariant/synthesis/mod.rs index 6916162d7..fc9c0e601 100644 --- a/jolt-eval/src/invariant/synthesis/mod.rs +++ b/jolt-eval/src/invariant/synthesis/mod.rs @@ -1,81 +1,3 @@ pub mod fuzz; pub mod redteam; pub mod test; - -use std::sync::Arc; - -use super::{registered_invariants, DynInvariant, SynthesisTarget}; -use crate::TestCase; - -/// Registry of invariants available for synthesis. -pub struct SynthesisRegistry { - invariants: Vec>, -} - -impl SynthesisRegistry { - pub fn new() -> Self { - Self { - invariants: Vec::new(), - } - } - - /// Build a registry from all registered invariants. - /// - /// Pass `None` to include only invariants that don't require a guest - /// program (those with `needs_guest: false`). - pub fn from_inventory(test_case: Option>, default_inputs: Vec) -> Self { - let mut registry = Self::new(); - for entry in registered_invariants() { - if entry.needs_guest && test_case.is_none() { - continue; - } - registry.register((entry.build)(test_case.clone(), default_inputs.clone())); - } - registry - } - - pub fn register(&mut self, invariant: Box) { - self.invariants.push(invariant); - } - - pub fn invariants(&self) -> &[Box] { - &self.invariants - } - - /// Consume the registry and return the invariant list. - pub fn into_invariants(self) -> Vec> { - self.invariants - } - - /// Return invariants that include the given synthesis target. - pub fn for_target(&self, target: SynthesisTarget) -> Vec<&dyn DynInvariant> { - self.invariants - .iter() - .filter(|inv| inv.targets().contains(target)) - .map(|inv| inv.as_ref()) - .collect() - } - - pub fn names(&self) -> Vec<&str> { - self.invariants.iter().map(|inv| inv.name()).collect() - } - - pub fn names_for_target(&self, target: SynthesisTarget) -> Vec<&str> { - self.for_target(target) - .iter() - .map(|inv| inv.name()) - .collect() - } -} - -impl Default for SynthesisRegistry { - fn default() -> Self { - Self::new() - } -} - -/// Return the names of all registered invariants. -/// Does not require a `TestCase`. -pub fn invariant_names() -> Vec<&'static str> { - registered_invariants().map(|e| e.name).collect() -} diff --git a/jolt-eval/src/invariant/synthesis/redteam.rs b/jolt-eval/src/invariant/synthesis/redteam.rs index 133d6b4f6..de843c46d 100644 --- a/jolt-eval/src/invariant/synthesis/redteam.rs +++ b/jolt-eval/src/invariant/synthesis/redteam.rs @@ -1,7 +1,6 @@ use std::path::Path; -use super::super::{CheckJsonResult, DynInvariant, FailedAttempt, SynthesisTarget}; -use super::SynthesisRegistry; +use super::super::{FailedAttempt, Invariant, InvariantViolation}; use crate::agent::AgentHarness; /// Result of a red-team session. @@ -28,30 +27,21 @@ impl Default for RedTeamConfig { } /// Run an AI red-team session against a single invariant. -/// -/// Each iteration: -/// 1. Builds a prompt with the invariant description, a JSON example of -/// the `Input` type, and past failed attempts. -/// 2. Derives a JSON Schema for the response envelope (an object with -/// `analysis` and `counterexample` fields) and invokes the agent via -/// [`AgentHarness::invoke_structured`]. Agents that support structured -/// output (e.g. `ClaudeCodeAgent` with `--json-schema`) will guarantee -/// the response conforms; others fall back to free-form text. -/// 3. Parses the `counterexample` from the response, deserializes it into -/// the invariant's `Input` type, and runs `Invariant::check`. -/// 4. If the check fails, the counterexample is genuine — return it. -/// 5. Otherwise records the failed attempt and continues. -pub fn auto_redteam( - invariant: &dyn DynInvariant, +pub fn auto_redteam( + invariant: &I, config: &RedTeamConfig, agent: &dyn AgentHarness, repo_dir: &Path, ) -> RedTeamResult { let description = invariant.description(); - let input_example = invariant.input_json_example(); - let input_schema = invariant.input_json_schema(); + let input_example: Option = invariant + .seed_corpus() + .into_iter() + .next() + .and_then(|input| serde_json::to_string_pretty(&input).ok()); + let input_schema = serde_json::to_value(schemars::schema_for!(I::Input)).unwrap(); let envelope_schema = build_envelope_schema(&input_schema); - let setup = invariant.dyn_setup(); + let setup = invariant.setup(); let mut failed_attempts = Vec::new(); for iteration in 0..config.num_iterations { @@ -77,39 +67,24 @@ pub fn auto_redteam( } }; - // Parse the structured response envelope. - // Agents using --json-schema return validated JSON directly. - // The fallback path (default invoke_structured) returns free-form - // text, so we try structured parsing first, then extract_json. let (analysis, counterexample_json) = match parse_envelope(&response.text) { Some(pair) => pair, - None => { - // Fallback: try to find raw JSON in free-form text - match super::super::extract_json(&response.text) { - Some(json) => (response.text.clone(), json), - None => { - failed_attempts.push(FailedAttempt { - description: format!("Iteration {}", iteration + 1), - approach: response.text, - failure_reason: "Agent response did not contain a JSON counterexample" - .to_string(), - }); - continue; - } + None => match super::super::extract_json(&response.text) { + Some(json) => (response.text.clone(), json), + None => { + failed_attempts.push(FailedAttempt { + description: format!("Iteration {}", iteration + 1), + approach: response.text, + failure_reason: "Agent response did not contain a JSON counterexample" + .to_string(), + }); + continue; } - } + }, }; - match invariant.check_json_input(&*setup, &counterexample_json) { - CheckJsonResult::Violation(violation) => { - tracing::info!("Counterexample CONFIRMED: {violation}"); - return RedTeamResult::Violation { - approach: analysis, - input_json: counterexample_json, - error: violation.to_string(), - }; - } - CheckJsonResult::Pass => { + match check_counterexample(invariant, &setup, &counterexample_json) { + Ok(()) => { failed_attempts.push(FailedAttempt { description: format!("Iteration {}", iteration + 1), approach: analysis, @@ -118,7 +93,15 @@ pub fn auto_redteam( ), }); } - CheckJsonResult::BadInput(parse_err) => { + Err(CheckError::Violation(violation)) => { + tracing::info!("Counterexample CONFIRMED: {violation}"); + return RedTeamResult::Violation { + approach: analysis, + input_json: counterexample_json, + error: violation.to_string(), + }; + } + Err(CheckError::BadInput(parse_err)) => { tracing::info!("Agent produced unparseable input: {parse_err}"); failed_attempts.push(FailedAttempt { description: format!("Iteration {}", iteration + 1), @@ -136,8 +119,21 @@ pub fn auto_redteam( } } -/// Build the JSON Schema for the structured response envelope. -/// The agent's response must be `{"analysis": "", "counterexample": }`. +enum CheckError { + Violation(InvariantViolation), + BadInput(String), +} + +fn check_counterexample( + inv: &I, + setup: &I::Setup, + json: &str, +) -> Result<(), CheckError> { + let input: I::Input = + serde_json::from_str(json).map_err(|e| CheckError::BadInput(e.to_string()))?; + inv.check(setup, input).map_err(CheckError::Violation) +} + fn build_envelope_schema(input_schema: &serde_json::Value) -> serde_json::Value { serde_json::json!({ "type": "object", @@ -152,8 +148,6 @@ fn build_envelope_schema(input_schema: &serde_json::Value) -> serde_json::Value }) } -/// Try to parse the response as a structured `{"analysis", "counterexample"}` envelope. -/// Returns `(analysis, counterexample_json)` on success. fn parse_envelope(text: &str) -> Option<(String, String)> { let val: serde_json::Value = serde_json::from_str(text).ok()?; let analysis = val.get("analysis")?.as_str()?.to_string(); @@ -223,8 +217,3 @@ fn build_redteam_prompt( prompt } - -/// List all invariants suitable for red-team testing. -pub fn redteamable_invariants(registry: &SynthesisRegistry) -> Vec<&dyn DynInvariant> { - registry.for_target(SynthesisTarget::RedTeam) -} diff --git a/jolt-eval/src/invariant/synthesis/test.rs b/jolt-eval/src/invariant/synthesis/test.rs index bbee8c3a3..f0bbf4294 100644 --- a/jolt-eval/src/invariant/synthesis/test.rs +++ b/jolt-eval/src/invariant/synthesis/test.rs @@ -1,26 +1,18 @@ -use super::super::{InvariantReport, SynthesisTarget}; -use super::SynthesisRegistry; +use super::super::{InvariantReport, JoltInvariants, SynthesisTarget}; -/// Run all invariants registered for the `Test` synthesis target. -/// -/// Runs each invariant's seed corpus, then `num_random` randomly-generated -/// inputs per invariant. -pub fn run_test_suite(registry: &SynthesisRegistry, num_random: usize) -> Vec { - let test_invariants = registry.for_target(SynthesisTarget::Test); - let mut reports = Vec::new(); - - for inv in test_invariants { - let results = inv.run_checks(num_random); - reports.push(InvariantReport::from_results(inv.name(), &results)); - } - - reports +/// Run all invariants that include the `Test` synthesis target. +pub fn run_test_suite(invariants: &[JoltInvariants], num_random: usize) -> Vec { + invariants + .iter() + .filter(|inv| inv.targets().contains(SynthesisTarget::Test)) + .map(|inv| { + let results = inv.run_checks(num_random); + InvariantReport::from_results(inv.name(), &results) + }) + .collect() } /// Generate `#[test]` function source code for a named invariant. -/// -/// Produces a test module that creates the invariant, runs its seed corpus, -/// and optionally runs a configurable number of random inputs. pub fn generate_test_source(invariant_name: &str, struct_path: &str) -> String { format!( r#"#[cfg(test)] diff --git a/jolt-eval/src/lib.rs b/jolt-eval/src/lib.rs index 39688ca8a..5ff099595 100644 --- a/jolt-eval/src/lib.rs +++ b/jolt-eval/src/lib.rs @@ -14,7 +14,7 @@ use jolt_core::curve::Bn254Curve; use jolt_core::poly::commitment::dory::DoryCommitmentScheme; use jolt_core::transcripts::Blake2bTranscript; -pub use invariant::{DynInvariant, Invariant, InvariantViolation, SynthesisTarget}; +pub use invariant::{Invariant, InvariantViolation, JoltInvariants, SynthesisTarget}; pub use objective::{AbstractObjective, Direction, MeasurementError, Objective}; // Re-exports used by the #[invariant] proc macro generated code. @@ -122,9 +122,9 @@ pub fn deserialize_proof(bytes: &[u8]) -> Result], + invariants: &[JoltInvariants], num_random: usize, ) -> HashMap>> { invariants diff --git a/jolt-eval/tests/integration.rs b/jolt-eval/tests/integration.rs index 592eb8ab0..8a56a9ce5 100644 --- a/jolt-eval/tests/integration.rs +++ b/jolt-eval/tests/integration.rs @@ -1,11 +1,12 @@ -use jolt_eval::invariant::synthesis::SynthesisRegistry; -use jolt_eval::invariant::{DynInvariant, InvariantReport, InvariantViolation, SynthesisTarget}; +use jolt_eval::invariant::{ + Invariant, InvariantReport, InvariantViolation, JoltInvariants, SynthesisTarget, +}; use jolt_eval::objective::{AbstractObjective, Direction, MeasurementError}; /// A trivial invariant for testing the framework itself. struct TrivialInvariant; -impl jolt_eval::Invariant for TrivialInvariant { +impl Invariant for TrivialInvariant { type Setup = (); type Input = u8; @@ -35,7 +36,7 @@ impl jolt_eval::Invariant for TrivialInvariant { /// An invariant that always fails, for testing violation reporting. struct FailingInvariant; -impl jolt_eval::Invariant for FailingInvariant { +impl Invariant for FailingInvariant { type Setup = (); type Input = u8; @@ -86,18 +87,17 @@ impl AbstractObjective for ConstantObjective { #[test] fn test_trivial_invariant_passes() { let inv = TrivialInvariant; - let results = inv.run_checks(5); - // 3 seed corpus + 5 random - assert!(results.len() >= 3); - assert!(results.iter().all(|r| r.is_ok())); + for input in inv.seed_corpus() { + inv.check(&(), input).unwrap(); + } } #[test] fn test_failing_invariant_reports_violations() { let inv = FailingInvariant; - let results = inv.run_checks(0); - assert_eq!(results.len(), 1); // 1 seed corpus item - assert!(results[0].is_err()); + for input in inv.seed_corpus() { + assert!(inv.check(&(), input).is_err()); + } } #[test] @@ -112,14 +112,12 @@ fn test_invariant_report() { } #[test] -fn test_synthesis_registry() { - let mut registry = SynthesisRegistry::new(); - registry.register(Box::new(TrivialInvariant)); - registry.register(Box::new(FailingInvariant)); - - assert_eq!(registry.invariants().len(), 2); - assert_eq!(registry.for_target(SynthesisTarget::Test).len(), 2); - assert_eq!(registry.for_target(SynthesisTarget::Fuzz).len(), 0); +fn test_jolt_invariants_all() { + let all = JoltInvariants::all(); + assert_eq!(all.len(), 2); + let names: Vec<_> = all.iter().map(|inv| inv.name()).collect(); + assert!(names.contains(&"split_eq_bind_low_high")); + assert!(names.contains(&"split_eq_bind_high_low")); } #[test] From ccbb25684f6928832ded1b0240e314e1a51524ad Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Thu, 2 Apr 2026 14:35:09 -0400 Subject: [PATCH 22/86] cleanup(jolt-eval): remove check-invariants binary, simplify invariant macro Replace the check-invariants binary with macro-generated #[test] functions. The #[invariant] macro now unconditionally generates seed_corpus and random_inputs tests, with iteration count configurable via JOLT_RANDOM_ITERS env var (default 10). Removed the unused redteam description generation from the macro. Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/Cargo.toml | 4 - jolt-eval/bin/check_invariants.rs | 72 ---------- jolt-eval/macros/src/lib.rs | 144 +++++++------------ jolt-eval/src/invariant/split_eq_bind.rs | 4 + jolt-eval/src/invariant/synthesis/redteam.rs | 7 +- jolt-eval/src/lib.rs | 3 + jolt-eval/tests/agent_test.rs | 2 +- jolt-eval/tests/macro_test.rs | 73 +--------- 8 files changed, 67 insertions(+), 242 deletions(-) delete mode 100644 jolt-eval/bin/check_invariants.rs diff --git a/jolt-eval/Cargo.toml b/jolt-eval/Cargo.toml index 23821ddeb..2cbdbc6d2 100644 --- a/jolt-eval/Cargo.toml +++ b/jolt-eval/Cargo.toml @@ -30,10 +30,6 @@ tempfile = "3" jolt-eval-macros = { path = "macros" } -[[bin]] -name = "check-invariants" -path = "bin/check_invariants.rs" - [[bin]] name = "measure-objectives" path = "bin/measure_objectives.rs" diff --git a/jolt-eval/bin/check_invariants.rs b/jolt-eval/bin/check_invariants.rs deleted file mode 100644 index 3840c87fd..000000000 --- a/jolt-eval/bin/check_invariants.rs +++ /dev/null @@ -1,72 +0,0 @@ -use clap::Parser; -use tracing::info; - -use jolt_eval::invariant::{InvariantReport, JoltInvariants}; - -#[derive(Parser)] -#[command(name = "check-invariants")] -#[command(about = "Run Jolt invariant checks")] -struct Cli { - /// Only run the named invariant (default: all) - #[arg(long)] - invariant: Option, - - /// Number of random inputs per invariant - #[arg(long, default_value = "10")] - num_random: usize, -} - -fn main() -> eyre::Result<()> { - tracing_subscriber::fmt::init(); - let cli = Cli::parse(); - - let all = JoltInvariants::all(); - let invariants: Vec<_> = if let Some(name) = &cli.invariant { - let filtered: Vec<_> = all - .into_iter() - .filter(|inv| inv.name().contains(name.as_str())) - .collect(); - if filtered.is_empty() { - let all_inv = JoltInvariants::all(); - let names: Vec<_> = all_inv.iter().map(|i| i.name()).collect(); - eprintln!( - "Invariant '{name}' not found. Available: {}", - names.join(", ") - ); - std::process::exit(1); - } - filtered - } else { - all - }; - - let mut all_passed = true; - for inv in &invariants { - info!("Running invariant: {}", inv.name()); - let results = inv.run_checks(cli.num_random); - let report = InvariantReport::from_results(inv.name(), &results); - print_report(&report); - if report.failed > 0 { - all_passed = false; - } - } - - if all_passed { - info!("All invariants passed."); - } else { - eprintln!("Some invariants FAILED."); - std::process::exit(1); - } - - Ok(()) -} - -fn print_report(report: &InvariantReport) { - println!( - " {} — {}/{} passed", - report.name, report.passed, report.total - ); - for violation in &report.violations { - println!(" FAIL: {violation}"); - } -} diff --git a/jolt-eval/macros/src/lib.rs b/jolt-eval/macros/src/lib.rs index 55220604d..2df30486b 100644 --- a/jolt-eval/macros/src/lib.rs +++ b/jolt-eval/macros/src/lib.rs @@ -6,107 +6,79 @@ use syn::{parse_macro_input, DeriveInput, Ident}; /// Attribute macro for invariant structs. /// -/// Generates test harness and red-team description functions based on -/// the specified targets. +/// Generates a `#[cfg(test)]` module with two tests: +/// - `seed_corpus`: runs all seed corpus inputs +/// - `random_inputs`: runs randomly-generated inputs via `Arbitrary` +/// +/// The number of random iterations defaults to 10 and can be overridden +/// with the `JOLT_RANDOM_ITERS` environment variable. +/// +/// The struct must implement `Invariant + Default`. /// /// # Usage /// /// ```ignore -/// #[jolt_eval_macros::invariant(targets = [Test, RedTeam])] +/// #[jolt_eval_macros::invariant] /// #[derive(Default)] /// pub struct MySoundnessInvariant { ... } /// ``` -/// -/// Generates: -/// - For `Test`: A `#[cfg(test)]` module with seed corpus and random tests -/// - For `RedTeam`: A `redteam_description` function returning the invariant's description -/// -/// For `Fuzz`, use the `fuzz_invariant()` library function in a -/// `fuzz/fuzz_targets/` binary instead — see the fuzz directory. -/// -/// The struct must implement `Invariant + Default`. #[proc_macro_attribute] -pub fn invariant(attr: TokenStream, item: TokenStream) -> TokenStream { +pub fn invariant(_attr: TokenStream, item: TokenStream) -> TokenStream { let input = parse_macro_input!(item as DeriveInput); let struct_name = &input.ident; let snake_name = to_snake_case(&struct_name.to_string()); let test_mod_name = Ident::new(&format!("{snake_name}_synthesized"), struct_name.span()); - let targets = parse_targets(attr); - let has_test = targets.contains(&"Test".to_string()); - let has_redteam = targets.contains(&"RedTeam".to_string()); + let expanded = quote! { + #input + + #[cfg(test)] + mod #test_mod_name { + use super::*; + use jolt_eval::Invariant; - let test_block = if has_test { - quote! { - #[cfg(test)] - mod #test_mod_name { - use super::*; - use jolt_eval::Invariant; + #[test] + fn seed_corpus() { + let invariant = #struct_name::default(); + let setup = invariant.setup(); + for (i, input) in invariant.seed_corpus().into_iter().enumerate() { + invariant.check(&setup, input).unwrap_or_else(|e| { + panic!( + "Invariant '{}' violated on seed {}: {}", + invariant.name(), i, e + ); + }); + } + } - #[test] - fn seed_corpus() { - let invariant = #struct_name::default(); - let setup = invariant.setup(); - for (i, input) in invariant.seed_corpus().into_iter().enumerate() { + #[test] + fn random_inputs() { + use jolt_eval::rand::RngCore; + let num_iters: usize = std::env::var("JOLT_RANDOM_ITERS") + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(10); + let invariant = #struct_name::default(); + let setup = invariant.setup(); + let mut rng = jolt_eval::rand::thread_rng(); + for _ in 0..num_iters { + let mut raw = vec![0u8; 4096]; + rng.fill_bytes(&mut raw); + let mut u = jolt_eval::arbitrary::Unstructured::new(&raw); + if let Ok(input) = < + <#struct_name as jolt_eval::Invariant>::Input + as jolt_eval::arbitrary::Arbitrary + >::arbitrary(&mut u) { invariant.check(&setup, input).unwrap_or_else(|e| { panic!( - "Invariant '{}' violated on seed {}: {}", - invariant.name(), i, e + "Invariant '{}' violated: {}", + invariant.name(), e ); }); } } - - #[test] - fn random_inputs() { - use jolt_eval::rand::RngCore; - let invariant = #struct_name::default(); - let setup = invariant.setup(); - let mut rng = jolt_eval::rand::thread_rng(); - for _ in 0..10 { - let mut raw = vec![0u8; 4096]; - rng.fill_bytes(&mut raw); - let mut u = jolt_eval::arbitrary::Unstructured::new(&raw); - if let Ok(input) = < - <#struct_name as jolt_eval::Invariant>::Input - as jolt_eval::arbitrary::Arbitrary - >::arbitrary(&mut u) { - invariant.check(&setup, input).unwrap_or_else(|e| { - panic!( - "Invariant '{}' violated: {}", - invariant.name(), e - ); - }); - } - } - } - } - } - } else { - quote! {} - }; - - let redteam_fn_name = Ident::new( - &format!("{snake_name}_redteam_description"), - struct_name.span(), - ); - let redteam_block = if has_redteam { - quote! { - pub fn #redteam_fn_name() -> String { - use jolt_eval::Invariant; - let invariant = #struct_name::default(); - invariant.description() } } - } else { - quote! {} - }; - - let expanded = quote! { - #input - - #test_block - #redteam_block }; expanded.into() @@ -126,19 +98,3 @@ fn to_snake_case(s: &str) -> String { } result } - -fn parse_targets(attr: TokenStream) -> Vec { - let attr_str = attr.to_string(); - // Parse: targets = [Test, Fuzz, RedTeam] - if let Some(bracket_start) = attr_str.find('[') { - if let Some(bracket_end) = attr_str.find(']') { - let inner = &attr_str[bracket_start + 1..bracket_end]; - return inner - .split(',') - .map(|s| s.trim().to_string()) - .filter(|s| !s.is_empty()) - .collect(); - } - } - vec![] -} diff --git a/jolt-eval/src/invariant/split_eq_bind.rs b/jolt-eval/src/invariant/split_eq_bind.rs index 087bf5e80..fbcdbe1cc 100644 --- a/jolt-eval/src/invariant/split_eq_bind.rs +++ b/jolt-eval/src/invariant/split_eq_bind.rs @@ -34,6 +34,8 @@ fn challenges_from_seed(seed: &[u8; 32], count: usize) -> Vec { // ── LowToHigh ──────────────────────────────────────────────────────── +#[jolt_eval_macros::invariant] +#[derive(Default)] pub struct SplitEqBindLowHighInvariant; impl Invariant for SplitEqBindLowHighInvariant { @@ -108,6 +110,8 @@ impl Invariant for SplitEqBindLowHighInvariant { // ── HighToLow ──────────────────────────────────────────────────────── +#[jolt_eval_macros::invariant] +#[derive(Default)] pub struct SplitEqBindHighLowInvariant; impl Invariant for SplitEqBindHighLowInvariant { diff --git a/jolt-eval/src/invariant/synthesis/redteam.rs b/jolt-eval/src/invariant/synthesis/redteam.rs index de843c46d..d12c0e477 100644 --- a/jolt-eval/src/invariant/synthesis/redteam.rs +++ b/jolt-eval/src/invariant/synthesis/redteam.rs @@ -75,8 +75,7 @@ pub fn auto_redteam( failed_attempts.push(FailedAttempt { description: format!("Iteration {}", iteration + 1), approach: response.text, - failure_reason: "Agent response did not contain a JSON counterexample" - .to_string(), + failure_reason: "Agent response did not contain valid JSON".to_string(), }); continue; } @@ -107,7 +106,7 @@ pub fn auto_redteam( description: format!("Iteration {}", iteration + 1), approach: analysis, failure_reason: format!( - "Could not deserialize agent JSON into Input type: {parse_err}" + "Could not deserialize response JSON into Input type: {parse_err}" ), }); } @@ -197,7 +196,7 @@ fn build_redteam_prompt( prompt.push_str("## Previous failed attempts\n\n"); prompt.push_str( "The following approaches have already been tried and did NOT produce a \ - valid counterexample. Try a fundamentally different approach.\n\n", + valid counterexample.\n\n", ); for attempt in failed_attempts { prompt.push_str(&format!( diff --git a/jolt-eval/src/lib.rs b/jolt-eval/src/lib.rs index 5ff099595..7140ba788 100644 --- a/jolt-eval/src/lib.rs +++ b/jolt-eval/src/lib.rs @@ -1,5 +1,8 @@ #![allow(non_snake_case)] +// Allow `jolt_eval::` paths in macro-generated code within this crate. +extern crate self as jolt_eval; + pub mod agent; pub mod guests; pub mod invariant; diff --git a/jolt-eval/tests/agent_test.rs b/jolt-eval/tests/agent_test.rs index 714733454..fc6a641e7 100644 --- a/jolt-eval/tests/agent_test.rs +++ b/jolt-eval/tests/agent_test.rs @@ -332,7 +332,7 @@ fn redteam_handles_no_json_in_response() { assert_eq!(attempts.len(), 1); assert!(attempts[0] .failure_reason - .contains("did not contain a JSON")); + .contains("did not contain valid JSON")); } _ => panic!("Expected NoViolation"), } diff --git a/jolt-eval/tests/macro_test.rs b/jolt-eval/tests/macro_test.rs index a91217e56..91e7dca8b 100644 --- a/jolt-eval/tests/macro_test.rs +++ b/jolt-eval/tests/macro_test.rs @@ -2,10 +2,10 @@ use enumset::EnumSet; use jolt_eval::invariant::{Invariant, InvariantViolation, SynthesisTarget}; // --------------------------------------------------------------------------- -// AlwaysPass: exercises Test + RedTeam synthesis targets +// AlwaysPass: trivial invariant to test macro synthesis // --------------------------------------------------------------------------- -#[jolt_eval_macros::invariant(targets = [Test, RedTeam])] +#[jolt_eval_macros::invariant] #[derive(Default)] pub struct AlwaysPassInvariant; @@ -32,7 +32,7 @@ impl Invariant for AlwaysPassInvariant { } // --------------------------------------------------------------------------- -// BoundsCheck: Test only, uses a struct Input type +// BoundsCheck: uses a struct Input type // --------------------------------------------------------------------------- #[derive( @@ -48,7 +48,7 @@ pub struct RangeInput { pub hi: u32, } -#[jolt_eval_macros::invariant(targets = [Test])] +#[jolt_eval_macros::invariant] #[derive(Default)] pub struct BoundsCheckInvariant; @@ -91,68 +91,7 @@ impl Invariant for BoundsCheckInvariant { } } -// --------------------------------------------------------------------------- -// RedTeamOnly: only the RedTeam target -// --------------------------------------------------------------------------- - -#[jolt_eval_macros::invariant(targets = [RedTeam])] -#[derive(Default)] -pub struct RedTeamOnlyInvariant; - -impl Invariant for RedTeamOnlyInvariant { - type Setup = String; - type Input = u16; - - fn name(&self) -> &str { - "redteam_only" - } - fn description(&self) -> String { - "An invariant that only generates a red-team description.".to_string() - } - fn targets(&self) -> EnumSet { - SynthesisTarget::RedTeam.into() - } - fn setup(&self) -> String { - "setup_value".to_string() - } - fn check(&self, setup: &String, _input: u16) -> Result<(), InvariantViolation> { - if setup.is_empty() { - Err(InvariantViolation::new("empty setup")) - } else { - Ok(()) - } - } - fn seed_corpus(&self) -> Vec { - vec![0, 1000, u16::MAX] - } -} - -// =========================================================================== -// Tests that verify the macro-generated functions exist and work correctly // =========================================================================== - -// --- Red-team description functions --- - -#[test] -fn redteam_always_pass_description() { - let desc = always_pass_invariant_redteam_description(); - assert!( - desc.contains("always passes"), - "Expected description to mention 'always passes', got: {desc}" - ); -} - -#[test] -fn redteam_only_description() { - let desc = red_team_only_invariant_redteam_description(); - assert!( - desc.contains("red-team description"), - "Expected description to mention 'red-team description', got: {desc}" - ); -} - -// --- Synthesized test modules are auto-discovered by nextest --- // The #[test] functions `seed_corpus` and `random_inputs` inside the -// generated `*_synthesized` modules are run automatically. We verify -// their presence indirectly: if `cargo nextest run` reports them, the -// macro is working. +// generated `*_synthesized` modules are auto-discovered by nextest. +// =========================================================================== From 49812d023613c6fb88e9baa92d419d5379c01a22 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Thu, 2 Apr 2026 15:32:19 -0400 Subject: [PATCH 23/86] refactor(jolt-eval): simplify fuzz targets and split-eq input representation Replace fuzz_invariant() function with a fuzz_invariant!() declarative macro that generates the full fuzz_target! expansion with OnceLock-based setup caching. Each fuzz target is now 3 lines. Change SplitEqBindInput to store w/rs challenge vectors directly as Vec with a manual Arbitrary impl, removing the seed-based indirection. Remove now-dead type-erasure methods (dyn_setup, check_json_input, check_arbitrary_input, CheckJsonResult) from JoltInvariants. Co-Authored-By: Claude Opus 4.6 (1M context) --- Cargo.lock | 1 - jolt-eval/Cargo.toml | 1 - jolt-eval/fuzz/Cargo.lock | 1 - .../fuzz_targets/split_eq_bind_high_low.rs | 6 +- .../fuzz_targets/split_eq_bind_low_high.rs | 6 +- jolt-eval/src/invariant/mod.rs | 40 --------- jolt-eval/src/invariant/split_eq_bind.rs | 86 +++++++++++-------- jolt-eval/src/invariant/synthesis/fuzz.rs | 82 +++++++++--------- jolt-eval/src/invariant/synthesis/mod.rs | 1 - jolt-eval/src/invariant/synthesis/test.rs | 54 ------------ 10 files changed, 95 insertions(+), 183 deletions(-) delete mode 100644 jolt-eval/src/invariant/synthesis/test.rs diff --git a/Cargo.lock b/Cargo.lock index 6864257c1..7bbdb1ff9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2955,7 +2955,6 @@ dependencies = [ "jolt-eval-macros", "postcard", "rand 0.8.5", - "rand_chacha 0.3.1", "rayon", "schemars 0.8.22", "serde", diff --git a/jolt-eval/Cargo.toml b/jolt-eval/Cargo.toml index 2cbdbc6d2..e4ee0539a 100644 --- a/jolt-eval/Cargo.toml +++ b/jolt-eval/Cargo.toml @@ -19,7 +19,6 @@ tracing = { workspace = true } clap = { workspace = true, features = ["derive"] } rayon = { workspace = true } rand = { workspace = true } -rand_chacha = { workspace = true } sysinfo = { workspace = true } tracing-subscriber = { workspace = true } diff --git a/jolt-eval/fuzz/Cargo.lock b/jolt-eval/fuzz/Cargo.lock index 0b70f7d61..64582eecf 100644 --- a/jolt-eval/fuzz/Cargo.lock +++ b/jolt-eval/fuzz/Cargo.lock @@ -1021,7 +1021,6 @@ dependencies = [ "jolt-eval-macros", "postcard", "rand", - "rand_chacha", "rayon", "schemars", "serde", diff --git a/jolt-eval/fuzz/fuzz_targets/split_eq_bind_high_low.rs b/jolt-eval/fuzz/fuzz_targets/split_eq_bind_high_low.rs index 0dad467d6..dda196175 100644 --- a/jolt-eval/fuzz/fuzz_targets/split_eq_bind_high_low.rs +++ b/jolt-eval/fuzz/fuzz_targets/split_eq_bind_high_low.rs @@ -1,5 +1,3 @@ #![no_main] -use libfuzzer_sys::fuzz_target; -fuzz_target!(|data: &[u8]| { - jolt_eval::invariant::synthesis::fuzz::fuzz_invariant("split_eq_bind_high_low", data); -}); +use jolt_eval::invariant::split_eq_bind::SplitEqBindHighLowInvariant; +jolt_eval::fuzz_invariant!(SplitEqBindHighLowInvariant::default()); diff --git a/jolt-eval/fuzz/fuzz_targets/split_eq_bind_low_high.rs b/jolt-eval/fuzz/fuzz_targets/split_eq_bind_low_high.rs index e55116849..7057af400 100644 --- a/jolt-eval/fuzz/fuzz_targets/split_eq_bind_low_high.rs +++ b/jolt-eval/fuzz/fuzz_targets/split_eq_bind_low_high.rs @@ -1,5 +1,3 @@ #![no_main] -use libfuzzer_sys::fuzz_target; -fuzz_target!(|data: &[u8]| { - jolt_eval::invariant::synthesis::fuzz::fuzz_invariant("split_eq_bind_low_high", data); -}); +use jolt_eval::invariant::split_eq_bind::SplitEqBindLowHighInvariant; +jolt_eval::fuzz_invariant!(SplitEqBindLowHighInvariant::default()); diff --git a/jolt-eval/src/invariant/mod.rs b/jolt-eval/src/invariant/mod.rs index 2f74c79d3..ffd2c36fe 100644 --- a/jolt-eval/src/invariant/mod.rs +++ b/jolt-eval/src/invariant/mod.rs @@ -1,7 +1,6 @@ pub mod split_eq_bind; pub mod synthesis; -use std::any::Any; use std::fmt; use arbitrary::Arbitrary; @@ -125,13 +124,6 @@ impl JoltInvariants { dispatch!(self, |inv| run_checks_impl(inv, num_random)) } - pub fn dyn_setup(&self) -> Box { - dispatch!(self, |inv| dyn_setup_impl(inv)) - } - - pub fn check_json_input(&self, setup: &dyn Any, json: &str) -> CheckJsonResult { - dispatch!(self, |inv| check_json_input_impl(inv, setup, json)) - } } fn run_checks_impl(inv: &I, num_random: usize) -> Vec> { @@ -155,28 +147,6 @@ fn run_checks_impl(inv: &I, num_random: usize) -> Vec(inv: &I) -> Box { - Box::new(inv.setup()) -} - -fn check_json_input_impl( - inv: &I, - setup: &dyn Any, - json: &str, -) -> CheckJsonResult { - let setup = setup - .downcast_ref::() - .expect("check_json_input called with wrong setup type"); - let input: I::Input = match serde_json::from_str(json) { - Ok(v) => v, - Err(e) => return CheckJsonResult::BadInput(e.to_string()), - }; - match inv.check(setup, input) { - Ok(()) => CheckJsonResult::Pass, - Err(v) => CheckJsonResult::Violation(v), - } -} - /// A counterexample produced when an invariant is violated. pub struct InvariantCounterexample { pub description: String, @@ -191,16 +161,6 @@ pub struct FailedAttempt { pub failure_reason: String, } -/// Outcome of [`JoltInvariants::check_json_input`]. -pub enum CheckJsonResult { - /// The input was valid and the invariant held. - Pass, - /// The input was valid and the invariant was violated. - Violation(InvariantViolation), - /// The JSON could not be deserialized into the expected `Input` type. - BadInput(String), -} - /// Result of running an invariant check suite. pub struct InvariantReport { pub name: String, diff --git a/jolt-eval/src/invariant/split_eq_bind.rs b/jolt-eval/src/invariant/split_eq_bind.rs index fbcdbe1cc..fefcf8ec0 100644 --- a/jolt-eval/src/invariant/split_eq_bind.rs +++ b/jolt-eval/src/invariant/split_eq_bind.rs @@ -1,6 +1,6 @@ #![allow(non_snake_case)] -use arbitrary::Arbitrary; +use arbitrary::{Arbitrary, Unstructured}; use enumset::EnumSet; use ark_bn254::Fr; @@ -14,22 +14,32 @@ use super::{Invariant, InvariantViolation, SynthesisTarget}; type Challenge = ::Challenge; -/// Input for the split-eq bind invariants: a number of variables and a -/// seed from which we derive all challenge values deterministically. -#[derive(Debug, Clone, Arbitrary, serde::Serialize, serde::Deserialize, schemars::JsonSchema)] +/// Input for the split-eq bind invariants. +/// +/// `w` are the initial eq-polynomial challenges, `rs` are the binding +/// round challenges. Stored as `u128` for serde/Arbitrary compatibility; +/// converted to `Challenge` via `From` in the check methods. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, schemars::JsonSchema)] pub struct SplitEqBindInput { - /// Number of variables (clamped to 2..=20 in check). - pub num_vars: u8, - /// Seed bytes used to derive challenge values via simple hashing. - pub seed: [u8; 32], + pub w: Vec, + pub rs: Vec, } -fn challenges_from_seed(seed: &[u8; 32], count: usize) -> Vec { - use rand::SeedableRng; - use rand_chacha::ChaCha8Rng; +impl<'a> Arbitrary<'a> for SplitEqBindInput { + fn arbitrary(u: &mut Unstructured<'a>) -> arbitrary::Result { + let num_vars = u.int_in_range(2u8..=16)? as usize; + let w: Vec = (0..num_vars) + .map(|_| u.arbitrary()) + .collect::>()?; + let rs: Vec = (0..num_vars) + .map(|_| u.arbitrary()) + .collect::>()?; + Ok(Self { w, rs }) + } +} - let mut rng = ChaCha8Rng::from_seed(*seed); - (0..count).map(|_| Challenge::random(&mut rng)).collect() +fn to_challenges(vals: &[u128]) -> Vec { + vals.iter().copied().map(Challenge::from).collect() } // ── LowToHigh ──────────────────────────────────────────────────────── @@ -59,12 +69,15 @@ impl Invariant for SplitEqBindLowHighInvariant { fn setup(&self) {} fn check(&self, _setup: &(), input: SplitEqBindInput) -> Result<(), InvariantViolation> { - let num_vars = (input.num_vars as usize).clamp(2, 20); - let challenges = challenges_from_seed(&input.seed, 2 * num_vars); - let (w, rs) = challenges.split_at(num_vars); + if input.w.len() < 2 { + return Ok(()); + } + let w = to_challenges(&input.w); + let rs = to_challenges(&input.rs); + let num_vars = w.len(); - let mut regular_eq = DensePolynomial::::new(EqPolynomial::evals(w)); - let mut split_eq = GruenSplitEqPolynomial::::new(w, BindingOrder::LowToHigh); + let mut regular_eq = DensePolynomial::::new(EqPolynomial::evals(&w)); + let mut split_eq = GruenSplitEqPolynomial::::new(&w, BindingOrder::LowToHigh); let merged = split_eq.merge(); if regular_eq.Z[..regular_eq.len()] != merged.Z[..merged.len()] { @@ -93,16 +106,16 @@ impl Invariant for SplitEqBindLowHighInvariant { fn seed_corpus(&self) -> Vec { vec![ SplitEqBindInput { - num_vars: 2, - seed: [0u8; 32], + w: vec![0, 1], + rs: vec![2, 3], }, SplitEqBindInput { - num_vars: 10, - seed: [1u8; 32], + w: (0..10).collect(), + rs: (10..20).collect(), }, SplitEqBindInput { - num_vars: 17, - seed: [42u8; 32], + w: (0..17).map(|i| u128::MAX - i).collect(), + rs: (0..17).map(|i| i * 1000).collect(), }, ] } @@ -135,12 +148,15 @@ impl Invariant for SplitEqBindHighLowInvariant { fn setup(&self) {} fn check(&self, _setup: &(), input: SplitEqBindInput) -> Result<(), InvariantViolation> { - let num_vars = (input.num_vars as usize).clamp(2, 20); - let challenges = challenges_from_seed(&input.seed, 2 * num_vars); - let (w, rs) = challenges.split_at(num_vars); + if input.w.len() < 2 { + return Ok(()); + } + let w = to_challenges(&input.w); + let rs = to_challenges(&input.rs); + let num_vars = w.len(); - let mut regular_eq = DensePolynomial::::new(EqPolynomial::evals(w)); - let mut split_eq = GruenSplitEqPolynomial::::new(w, BindingOrder::HighToLow); + let mut regular_eq = DensePolynomial::::new(EqPolynomial::evals(&w)); + let mut split_eq = GruenSplitEqPolynomial::::new(&w, BindingOrder::HighToLow); let merged = split_eq.merge(); if regular_eq.Z[..regular_eq.len()] != merged.Z[..merged.len()] { @@ -169,16 +185,16 @@ impl Invariant for SplitEqBindHighLowInvariant { fn seed_corpus(&self) -> Vec { vec![ SplitEqBindInput { - num_vars: 2, - seed: [0u8; 32], + w: vec![0, 1], + rs: vec![2, 3], }, SplitEqBindInput { - num_vars: 10, - seed: [1u8; 32], + w: (0..10).collect(), + rs: (10..20).collect(), }, SplitEqBindInput { - num_vars: 17, - seed: [42u8; 32], + w: (0..16).map(|i| u128::MAX - i).collect(), + rs: (0..16).map(|i| i * 1000).collect(), }, ] } diff --git a/jolt-eval/src/invariant/synthesis/fuzz.rs b/jolt-eval/src/invariant/synthesis/fuzz.rs index 329594f9b..662559b04 100644 --- a/jolt-eval/src/invariant/synthesis/fuzz.rs +++ b/jolt-eval/src/invariant/synthesis/fuzz.rs @@ -1,52 +1,50 @@ -use std::any::Any; -use std::sync::LazyLock; - -use super::super::{CheckJsonResult, JoltInvariants}; - -struct CachedInvariant { - inv: JoltInvariants, - setup: Box, -} - -static CACHE: LazyLock> = LazyLock::new(|| { - JoltInvariants::all() - .into_iter() - .map(|inv| { - let setup = inv.dyn_setup(); - CachedInvariant { inv, setup } - }) - .collect() -}); - -/// Fuzz a named invariant with raw byte data from libfuzzer. +/// Macro that generates a libfuzzer fuzz target for an invariant. /// -/// Panics on invariant violation (which is what libfuzzer needs to -/// detect a finding). +/// Takes a concrete invariant expression. Setup is performed once; +/// each fuzz iteration produces an `Input` via `Arbitrary` and checks it. /// -/// # Usage in a fuzz target +/// # Usage /// /// ```ignore /// #![no_main] -/// use libfuzzer_sys::fuzz_target; -/// fuzz_target!(|data: &[u8]| { -/// jolt_eval::invariant::synthesis::fuzz::fuzz_invariant("split_eq_bind_low_high", data); -/// }); +/// use jolt_eval::invariant::split_eq_bind::SplitEqBindLowHighInvariant; +/// jolt_eval::fuzz_invariant!(SplitEqBindLowHighInvariant::default()); /// ``` -pub fn fuzz_invariant(invariant_name: &str, data: &[u8]) { - let cached = CACHE - .iter() - .find(|c| c.inv.name() == invariant_name) - .unwrap_or_else(|| panic!("Invariant '{invariant_name}' not found")); +#[macro_export] +macro_rules! fuzz_invariant { + ($inv:expr) => { + use $crate::Invariant as _; - if let Ok(json_str) = std::str::from_utf8(data) { - match cached.inv.check_json_input(&*cached.setup, json_str) { - CheckJsonResult::Violation(e) => { - panic!( - "Invariant '{}' violated: {e}\nInput JSON: {json_str}", - cached.inv.name() - ); + static __FUZZ_SETUP: ::std::sync::OnceLock< + ::std::boxed::Box, + > = ::std::sync::OnceLock::new(); + + fn __fuzz_init(inv: &I) { + __FUZZ_SETUP + .set(::std::boxed::Box::new(inv.setup())) + .ok(); + } + + fn __fuzz_check(inv: &I, data: &[u8]) { + let setup = __FUZZ_SETUP + .get() + .expect("SETUP not initialized") + .downcast_ref::() + .expect("wrong setup type"); + let mut u = $crate::arbitrary::Unstructured::new(data); + if let Ok(input) = ::arbitrary(&mut u) { + inv.check(setup, input) + .unwrap_or_else(|e| panic!("Invariant violated: {e}")); } - CheckJsonResult::Pass | CheckJsonResult::BadInput(_) => {} } - } + + ::libfuzzer_sys::fuzz_target!( + init: { + __fuzz_init(&$inv); + }, + |data: &[u8]| { + __fuzz_check(&$inv, data); + } + ); + }; } diff --git a/jolt-eval/src/invariant/synthesis/mod.rs b/jolt-eval/src/invariant/synthesis/mod.rs index fc9c0e601..7b79988f8 100644 --- a/jolt-eval/src/invariant/synthesis/mod.rs +++ b/jolt-eval/src/invariant/synthesis/mod.rs @@ -1,3 +1,2 @@ pub mod fuzz; pub mod redteam; -pub mod test; diff --git a/jolt-eval/src/invariant/synthesis/test.rs b/jolt-eval/src/invariant/synthesis/test.rs deleted file mode 100644 index f0bbf4294..000000000 --- a/jolt-eval/src/invariant/synthesis/test.rs +++ /dev/null @@ -1,54 +0,0 @@ -use super::super::{InvariantReport, JoltInvariants, SynthesisTarget}; - -/// Run all invariants that include the `Test` synthesis target. -pub fn run_test_suite(invariants: &[JoltInvariants], num_random: usize) -> Vec { - invariants - .iter() - .filter(|inv| inv.targets().contains(SynthesisTarget::Test)) - .map(|inv| { - let results = inv.run_checks(num_random); - InvariantReport::from_results(inv.name(), &results) - }) - .collect() -} - -/// Generate `#[test]` function source code for a named invariant. -pub fn generate_test_source(invariant_name: &str, struct_path: &str) -> String { - format!( - r#"#[cfg(test)] -mod {invariant_name}_tests {{ - use super::*; - use jolt_eval::Invariant; - - #[test] - fn test_{invariant_name}_seed_corpus() {{ - let invariant = {struct_path}::default(); - let setup = invariant.setup(); - for (i, input) in invariant.seed_corpus().into_iter().enumerate() {{ - invariant.check(&setup, input).unwrap_or_else(|e| {{ - panic!("Invariant '{{}}' violated on seed {{}}: {{}}", invariant.name(), i, e); - }}); - }} - }} - - #[test] - fn test_{invariant_name}_random() {{ - use rand::RngCore; - let invariant = {struct_path}::default(); - let setup = invariant.setup(); - let mut rng = rand::thread_rng(); - for _ in 0..10 {{ - let mut raw = vec![0u8; 4096]; - rng.fill_bytes(&mut raw); - let mut u = arbitrary::Unstructured::new(&raw); - if let Ok(input) = <_ as arbitrary::Arbitrary>::arbitrary(&mut u) {{ - invariant.check(&setup, input).unwrap_or_else(|e| {{ - panic!("Invariant '{{}}' violated: {{}}", invariant.name(), e); - }}); - }} - }} - }} -}} -"# - ) -} From bb5c17e322a9e9ea6c4e8f0acb225c41d3bad51a Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Thu, 2 Apr 2026 16:02:48 -0400 Subject: [PATCH 24/86] refactor(jolt-eval): split targets() into InvariantTargets trait, macro-driven Move Invariant::targets() to a separate InvariantTargets trait with a default empty-set implementation. The #[invariant] macro now accepts target lists (e.g. #[invariant(Test, Fuzz)]) and generates the InvariantTargets impl and conditionally emits #[test] blocks only when Test is listed. The fuzz_invariant! macro asserts that the invariant includes SynthesisTarget::Fuzz at init time. Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/macros/src/lib.rs | 136 ++++++++++++++-------- jolt-eval/src/invariant/mod.rs | 61 +++------- jolt-eval/src/invariant/split_eq_bind.rs | 15 +-- jolt-eval/src/invariant/synthesis/fuzz.rs | 11 ++ jolt-eval/src/lib.rs | 4 +- jolt-eval/tests/agent_test.rs | 26 +++-- jolt-eval/tests/integration.rs | 25 +--- jolt-eval/tests/macro_test.rs | 13 +-- 8 files changed, 144 insertions(+), 147 deletions(-) diff --git a/jolt-eval/macros/src/lib.rs b/jolt-eval/macros/src/lib.rs index 2df30486b..5fa573131 100644 --- a/jolt-eval/macros/src/lib.rs +++ b/jolt-eval/macros/src/lib.rs @@ -6,79 +6,115 @@ use syn::{parse_macro_input, DeriveInput, Ident}; /// Attribute macro for invariant structs. /// -/// Generates a `#[cfg(test)]` module with two tests: -/// - `seed_corpus`: runs all seed corpus inputs -/// - `random_inputs`: runs randomly-generated inputs via `Arbitrary` +/// Generates an `InvariantTargets` implementation from the listed targets, +/// and optionally a `#[cfg(test)]` module with tests if `Test` is included. /// -/// The number of random iterations defaults to 10 and can be overridden -/// with the `JOLT_RANDOM_ITERS` environment variable. +/// The number of random test iterations defaults to 10 and can be +/// overridden with the `JOLT_RANDOM_ITERS` environment variable. /// /// The struct must implement `Invariant + Default`. /// /// # Usage /// /// ```ignore -/// #[jolt_eval_macros::invariant] +/// #[jolt_eval_macros::invariant(Test, Fuzz)] /// #[derive(Default)] -/// pub struct MySoundnessInvariant { ... } +/// pub struct MyInvariant; /// ``` #[proc_macro_attribute] -pub fn invariant(_attr: TokenStream, item: TokenStream) -> TokenStream { +pub fn invariant(attr: TokenStream, item: TokenStream) -> TokenStream { let input = parse_macro_input!(item as DeriveInput); let struct_name = &input.ident; let snake_name = to_snake_case(&struct_name.to_string()); let test_mod_name = Ident::new(&format!("{snake_name}_synthesized"), struct_name.span()); - let expanded = quote! { - #input + let targets = parse_targets(attr); + let has_test = targets.contains(&"Test".to_string()); - #[cfg(test)] - mod #test_mod_name { - use super::*; - use jolt_eval::Invariant; + // Build the EnumSet expression for InvariantTargets::targets() + let target_exprs: Vec = targets + .iter() + .map(|t| { + let ident = Ident::new(t, proc_macro2::Span::call_site()); + quote! { jolt_eval::SynthesisTarget::#ident } + }) + .collect(); - #[test] - fn seed_corpus() { - let invariant = #struct_name::default(); - let setup = invariant.setup(); - for (i, input) in invariant.seed_corpus().into_iter().enumerate() { - invariant.check(&setup, input).unwrap_or_else(|e| { - panic!( - "Invariant '{}' violated on seed {}: {}", - invariant.name(), i, e - ); - }); - } + let targets_body = if target_exprs.is_empty() { + quote! { enumset::EnumSet::empty() } + } else { + let first = &target_exprs[0]; + let rest = &target_exprs[1..]; + quote! { #first #(| #rest)* } + }; + + let targets_impl = quote! { + impl jolt_eval::InvariantTargets for #struct_name { + fn targets(&self) -> enumset::EnumSet { + #targets_body } + } + }; - #[test] - fn random_inputs() { - use jolt_eval::rand::RngCore; - let num_iters: usize = std::env::var("JOLT_RANDOM_ITERS") - .ok() - .and_then(|s| s.parse().ok()) - .unwrap_or(10); - let invariant = #struct_name::default(); - let setup = invariant.setup(); - let mut rng = jolt_eval::rand::thread_rng(); - for _ in 0..num_iters { - let mut raw = vec![0u8; 4096]; - rng.fill_bytes(&mut raw); - let mut u = jolt_eval::arbitrary::Unstructured::new(&raw); - if let Ok(input) = < - <#struct_name as jolt_eval::Invariant>::Input - as jolt_eval::arbitrary::Arbitrary - >::arbitrary(&mut u) { + let test_block = if has_test { + quote! { + #[cfg(test)] + mod #test_mod_name { + use super::*; + use jolt_eval::Invariant; + + #[test] + fn seed_corpus() { + let invariant = #struct_name::default(); + let setup = invariant.setup(); + for (i, input) in invariant.seed_corpus().into_iter().enumerate() { invariant.check(&setup, input).unwrap_or_else(|e| { panic!( - "Invariant '{}' violated: {}", - invariant.name(), e + "Invariant '{}' violated on seed {}: {}", + invariant.name(), i, e ); }); } } + + #[test] + fn random_inputs() { + use jolt_eval::rand::RngCore; + let num_iters: usize = std::env::var("JOLT_RANDOM_ITERS") + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(10); + let invariant = #struct_name::default(); + let setup = invariant.setup(); + let mut rng = jolt_eval::rand::thread_rng(); + for _ in 0..num_iters { + let mut raw = vec![0u8; 4096]; + rng.fill_bytes(&mut raw); + let mut u = jolt_eval::arbitrary::Unstructured::new(&raw); + if let Ok(input) = < + <#struct_name as jolt_eval::Invariant>::Input + as jolt_eval::arbitrary::Arbitrary + >::arbitrary(&mut u) { + invariant.check(&setup, input).unwrap_or_else(|e| { + panic!( + "Invariant '{}' violated: {}", + invariant.name(), e + ); + }); + } + } + } } } + } else { + quote! {} + }; + + let expanded = quote! { + #input + + #targets_impl + #test_block }; expanded.into() @@ -98,3 +134,11 @@ fn to_snake_case(s: &str) -> String { } result } + +fn parse_targets(attr: TokenStream) -> Vec { + attr.to_string() + .split(',') + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) + .collect() +} diff --git a/jolt-eval/src/invariant/mod.rs b/jolt-eval/src/invariant/mod.rs index ffd2c36fe..633872a4d 100644 --- a/jolt-eval/src/invariant/mod.rs +++ b/jolt-eval/src/invariant/mod.rs @@ -71,8 +71,6 @@ pub trait Invariant: Send + Sync { /// Human-readable description, also used as context for AI red-teaming. fn description(&self) -> String; - fn targets(&self) -> EnumSet; - /// One-time setup (e.g. preprocessing, generating an honest proof). fn setup(&self) -> Self::Setup; @@ -85,6 +83,16 @@ pub trait Invariant: Send + Sync { } } +/// Declares which synthesis targets an invariant supports. +/// +/// Defaults to an empty set. Use the `#[invariant(Test, Fuzz)]` macro +/// attribute to generate the implementation automatically. +pub trait InvariantTargets { + fn targets(&self) -> EnumSet { + EnumSet::empty() + } +} + /// Enum collecting all Jolt invariants. Methods dispatch via match. pub enum JoltInvariants { SplitEqBindLowHigh(split_eq_bind::SplitEqBindLowHighInvariant), @@ -117,16 +125,18 @@ impl JoltInvariants { } pub fn targets(&self) -> EnumSet { - dispatch!(self, |inv| inv.targets()) + dispatch!(self, |inv| InvariantTargets::targets(inv)) } pub fn run_checks(&self, num_random: usize) -> Vec> { dispatch!(self, |inv| run_checks_impl(inv, num_random)) } - } -fn run_checks_impl(inv: &I, num_random: usize) -> Vec> { +fn run_checks_impl( + inv: &I, + num_random: usize, +) -> Vec> { let setup = inv.setup(); let mut results = Vec::new(); @@ -147,13 +157,6 @@ fn run_checks_impl(inv: &I, num_random: usize) -> Vec { - pub description: String, - pub input: I::Input, - pub error: InvariantViolation, -} - /// Record of a red-team attempt that failed to find a violation. pub struct FailedAttempt { pub description: String, @@ -161,40 +164,6 @@ pub struct FailedAttempt { pub failure_reason: String, } -/// Result of running an invariant check suite. -pub struct InvariantReport { - pub name: String, - pub total: usize, - pub passed: usize, - pub failed: usize, - pub violations: Vec, -} - -impl InvariantReport { - pub fn from_results(name: &str, results: &[Result<(), InvariantViolation>]) -> Self { - let total = results.len(); - let mut passed = 0; - let mut failed = 0; - let mut violations = Vec::new(); - for r in results { - match r { - Ok(()) => passed += 1, - Err(e) => { - failed += 1; - violations.push(e.to_string()); - } - } - } - Self { - name: name.to_string(), - total, - passed, - failed, - violations, - } - } -} - /// Try to extract a JSON object from free-form text. Looks for a /// ````json` code block first, then falls back to the last `{…}` that /// parses as valid JSON. diff --git a/jolt-eval/src/invariant/split_eq_bind.rs b/jolt-eval/src/invariant/split_eq_bind.rs index fefcf8ec0..a33d417b9 100644 --- a/jolt-eval/src/invariant/split_eq_bind.rs +++ b/jolt-eval/src/invariant/split_eq_bind.rs @@ -1,7 +1,6 @@ #![allow(non_snake_case)] use arbitrary::{Arbitrary, Unstructured}; -use enumset::EnumSet; use ark_bn254::Fr; use jolt_core::field::JoltField; @@ -10,7 +9,7 @@ use jolt_core::poly::eq_poly::EqPolynomial; use jolt_core::poly::multilinear_polynomial::BindingOrder; use jolt_core::poly::split_eq_poly::GruenSplitEqPolynomial; -use super::{Invariant, InvariantViolation, SynthesisTarget}; +use super::{Invariant, InvariantViolation}; type Challenge = ::Challenge; @@ -44,7 +43,7 @@ fn to_challenges(vals: &[u128]) -> Vec { // ── LowToHigh ──────────────────────────────────────────────────────── -#[jolt_eval_macros::invariant] +#[jolt_eval_macros::invariant(Test, Fuzz)] #[derive(Default)] pub struct SplitEqBindLowHighInvariant; @@ -62,10 +61,6 @@ impl Invariant for SplitEqBindLowHighInvariant { .to_string() } - fn targets(&self) -> EnumSet { - SynthesisTarget::Test | SynthesisTarget::Fuzz - } - fn setup(&self) {} fn check(&self, _setup: &(), input: SplitEqBindInput) -> Result<(), InvariantViolation> { @@ -123,7 +118,7 @@ impl Invariant for SplitEqBindLowHighInvariant { // ── HighToLow ──────────────────────────────────────────────────────── -#[jolt_eval_macros::invariant] +#[jolt_eval_macros::invariant(Test, Fuzz)] #[derive(Default)] pub struct SplitEqBindHighLowInvariant; @@ -141,10 +136,6 @@ impl Invariant for SplitEqBindHighLowInvariant { .to_string() } - fn targets(&self) -> EnumSet { - SynthesisTarget::Test | SynthesisTarget::Fuzz - } - fn setup(&self) {} fn check(&self, _setup: &(), input: SplitEqBindInput) -> Result<(), InvariantViolation> { diff --git a/jolt-eval/src/invariant/synthesis/fuzz.rs b/jolt-eval/src/invariant/synthesis/fuzz.rs index 662559b04..350ceb437 100644 --- a/jolt-eval/src/invariant/synthesis/fuzz.rs +++ b/jolt-eval/src/invariant/synthesis/fuzz.rs @@ -14,6 +14,16 @@ macro_rules! fuzz_invariant { ($inv:expr) => { use $crate::Invariant as _; + use $crate::InvariantTargets as _; + + // Assert at init time that this invariant includes the Fuzz target. + fn __assert_fuzz_target(inv: &I) { + assert!( + inv.targets() + .contains($crate::SynthesisTarget::Fuzz), + "Invariant does not include SynthesisTarget::Fuzz" + ); + } static __FUZZ_SETUP: ::std::sync::OnceLock< ::std::boxed::Box, @@ -40,6 +50,7 @@ macro_rules! fuzz_invariant { ::libfuzzer_sys::fuzz_target!( init: { + __assert_fuzz_target(&$inv); __fuzz_init(&$inv); }, |data: &[u8]| { diff --git a/jolt-eval/src/lib.rs b/jolt-eval/src/lib.rs index 7140ba788..347b81a4a 100644 --- a/jolt-eval/src/lib.rs +++ b/jolt-eval/src/lib.rs @@ -17,7 +17,9 @@ use jolt_core::curve::Bn254Curve; use jolt_core::poly::commitment::dory::DoryCommitmentScheme; use jolt_core::transcripts::Blake2bTranscript; -pub use invariant::{Invariant, InvariantViolation, JoltInvariants, SynthesisTarget}; +pub use invariant::{ + Invariant, InvariantTargets, InvariantViolation, JoltInvariants, SynthesisTarget, +}; pub use objective::{AbstractObjective, Direction, MeasurementError, Objective}; // Re-exports used by the #[invariant] proc macro generated code. diff --git a/jolt-eval/tests/agent_test.rs b/jolt-eval/tests/agent_test.rs index fc6a641e7..c0e2c4e00 100644 --- a/jolt-eval/tests/agent_test.rs +++ b/jolt-eval/tests/agent_test.rs @@ -4,7 +4,7 @@ use std::path::Path; use enumset::EnumSet; use jolt_eval::agent::{AgentError, AgentHarness, AgentResponse, MockAgent}; use jolt_eval::invariant::synthesis::redteam::{auto_redteam, RedTeamConfig, RedTeamResult}; -use jolt_eval::invariant::{Invariant, InvariantViolation, SynthesisTarget}; +use jolt_eval::invariant::{Invariant, InvariantTargets, InvariantViolation, SynthesisTarget}; use jolt_eval::objective::optimize::{auto_optimize, OptimizeConfig, OptimizeEnv}; use jolt_eval::objective::Direction; @@ -14,6 +14,11 @@ use jolt_eval::objective::Direction; /// Always passes -- the red-team loop should never find a violation. struct AlwaysPassInvariant; +impl InvariantTargets for AlwaysPassInvariant { + fn targets(&self) -> EnumSet { + SynthesisTarget::Test | SynthesisTarget::RedTeam + } +} impl Invariant for AlwaysPassInvariant { type Setup = (); type Input = u8; @@ -23,9 +28,6 @@ impl Invariant for AlwaysPassInvariant { fn description(&self) -> String { "This invariant always passes.".into() } - fn targets(&self) -> EnumSet { - SynthesisTarget::Test | SynthesisTarget::RedTeam - } fn setup(&self) {} fn check(&self, _: &(), _: u8) -> Result<(), InvariantViolation> { Ok(()) @@ -37,6 +39,11 @@ impl Invariant for AlwaysPassInvariant { /// Always fails -- the red-team loop should find a violation immediately. struct AlwaysFailInvariant; +impl InvariantTargets for AlwaysFailInvariant { + fn targets(&self) -> EnumSet { + SynthesisTarget::Test | SynthesisTarget::RedTeam + } +} impl Invariant for AlwaysFailInvariant { type Setup = (); type Input = u8; @@ -46,9 +53,6 @@ impl Invariant for AlwaysFailInvariant { fn description(&self) -> String { "This invariant always fails.".into() } - fn targets(&self) -> EnumSet { - SynthesisTarget::Test | SynthesisTarget::RedTeam - } fn setup(&self) {} fn check(&self, _: &(), input: u8) -> Result<(), InvariantViolation> { Err(InvariantViolation::new(format!("always fails ({input})"))) @@ -60,6 +64,11 @@ impl Invariant for AlwaysFailInvariant { /// Fails only when the input is 0 -- tests that fuzz inputs can trigger it. struct FailsOnZeroInvariant; +impl InvariantTargets for FailsOnZeroInvariant { + fn targets(&self) -> EnumSet { + SynthesisTarget::Test | SynthesisTarget::RedTeam + } +} impl Invariant for FailsOnZeroInvariant { type Setup = (); type Input = u8; @@ -69,9 +78,6 @@ impl Invariant for FailsOnZeroInvariant { fn description(&self) -> String { "Fails when input is 0.".into() } - fn targets(&self) -> EnumSet { - SynthesisTarget::Test | SynthesisTarget::RedTeam - } fn setup(&self) {} fn check(&self, _: &(), input: u8) -> Result<(), InvariantViolation> { if input == 0 { diff --git a/jolt-eval/tests/integration.rs b/jolt-eval/tests/integration.rs index 8a56a9ce5..e63f643b3 100644 --- a/jolt-eval/tests/integration.rs +++ b/jolt-eval/tests/integration.rs @@ -1,10 +1,9 @@ -use jolt_eval::invariant::{ - Invariant, InvariantReport, InvariantViolation, JoltInvariants, SynthesisTarget, -}; +use jolt_eval::invariant::{Invariant, InvariantTargets, InvariantViolation, JoltInvariants}; use jolt_eval::objective::{AbstractObjective, Direction, MeasurementError}; /// A trivial invariant for testing the framework itself. struct TrivialInvariant; +impl InvariantTargets for TrivialInvariant {} impl Invariant for TrivialInvariant { type Setup = (); @@ -18,10 +17,6 @@ impl Invariant for TrivialInvariant { "Always passes".to_string() } - fn targets(&self) -> enumset::EnumSet { - SynthesisTarget::Test.into() - } - fn setup(&self) -> Self::Setup {} fn check(&self, _setup: &Self::Setup, _input: u8) -> Result<(), InvariantViolation> { @@ -35,6 +30,7 @@ impl Invariant for TrivialInvariant { /// An invariant that always fails, for testing violation reporting. struct FailingInvariant; +impl InvariantTargets for FailingInvariant {} impl Invariant for FailingInvariant { type Setup = (); @@ -48,10 +44,6 @@ impl Invariant for FailingInvariant { "Always fails".to_string() } - fn targets(&self) -> enumset::EnumSet { - SynthesisTarget::Test.into() - } - fn setup(&self) -> Self::Setup {} fn check(&self, _setup: &Self::Setup, input: u8) -> Result<(), InvariantViolation> { @@ -100,17 +92,6 @@ fn test_failing_invariant_reports_violations() { } } -#[test] -fn test_invariant_report() { - let results: Vec> = - vec![Ok(()), Ok(()), Err(InvariantViolation::new("bad"))]; - let report = InvariantReport::from_results("test", &results); - assert_eq!(report.total, 3); - assert_eq!(report.passed, 2); - assert_eq!(report.failed, 1); - assert_eq!(report.violations.len(), 1); -} - #[test] fn test_jolt_invariants_all() { let all = JoltInvariants::all(); diff --git a/jolt-eval/tests/macro_test.rs b/jolt-eval/tests/macro_test.rs index 91e7dca8b..557be0952 100644 --- a/jolt-eval/tests/macro_test.rs +++ b/jolt-eval/tests/macro_test.rs @@ -1,11 +1,10 @@ -use enumset::EnumSet; -use jolt_eval::invariant::{Invariant, InvariantViolation, SynthesisTarget}; +use jolt_eval::invariant::{Invariant, InvariantViolation}; // --------------------------------------------------------------------------- // AlwaysPass: trivial invariant to test macro synthesis // --------------------------------------------------------------------------- -#[jolt_eval_macros::invariant] +#[jolt_eval_macros::invariant(Test, Fuzz, RedTeam)] #[derive(Default)] pub struct AlwaysPassInvariant; @@ -19,9 +18,6 @@ impl Invariant for AlwaysPassInvariant { fn description(&self) -> String { "Trivial invariant that always passes — used to test macro synthesis.".to_string() } - fn targets(&self) -> EnumSet { - SynthesisTarget::Test | SynthesisTarget::Fuzz | SynthesisTarget::RedTeam - } fn setup(&self) -> Self::Setup {} fn check(&self, _: &(), _input: u8) -> Result<(), InvariantViolation> { Ok(()) @@ -48,7 +44,7 @@ pub struct RangeInput { pub hi: u32, } -#[jolt_eval_macros::invariant] +#[jolt_eval_macros::invariant(Test, Fuzz)] #[derive(Default)] pub struct BoundsCheckInvariant; @@ -62,9 +58,6 @@ impl Invariant for BoundsCheckInvariant { fn description(&self) -> String { "Checks that max(lo,hi) >= min(lo,hi).".to_string() } - fn targets(&self) -> EnumSet { - SynthesisTarget::Test | SynthesisTarget::Fuzz - } fn setup(&self) -> Self::Setup {} fn check(&self, _: &(), input: RangeInput) -> Result<(), InvariantViolation> { let lo = input.lo.min(input.hi); From 291f98a30018bbaf92f19195aad3d6ff9c0f06db Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Thu, 2 Apr 2026 16:11:30 -0400 Subject: [PATCH 25/86] refactor(jolt-eval): split agent.rs into agent/ directory - agent/mod.rs: AgentHarness trait, types, apply_diff, truncate - agent/claude.rs: ClaudeCodeAgent, worktree helpers - agent/mock.rs: MockAgent for testing Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/src/agent.rs | 371 ----------------------- jolt-eval/src/agent/claude.rs | 188 ++++++++++++ jolt-eval/src/agent/mock.rs | 86 ++++++ jolt-eval/src/agent/mod.rs | 101 ++++++ jolt-eval/src/invariant/split_eq_bind.rs | 2 +- 5 files changed, 376 insertions(+), 372 deletions(-) delete mode 100644 jolt-eval/src/agent.rs create mode 100644 jolt-eval/src/agent/claude.rs create mode 100644 jolt-eval/src/agent/mock.rs create mode 100644 jolt-eval/src/agent/mod.rs diff --git a/jolt-eval/src/agent.rs b/jolt-eval/src/agent.rs deleted file mode 100644 index 6e1270522..000000000 --- a/jolt-eval/src/agent.rs +++ /dev/null @@ -1,371 +0,0 @@ -use std::fmt; -use std::path::{Path, PathBuf}; -use std::process::Command; - -/// Output from an agent invocation. -#[derive(Debug)] -pub struct AgentResponse { - /// The agent's textual output/analysis. - pub text: String, - /// A unified diff of code changes the agent produced, if any. - pub diff: Option, -} - -/// Error during agent invocation. -#[derive(Debug, Clone)] -pub struct AgentError { - pub message: String, -} - -impl fmt::Display for AgentError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.message) - } -} - -impl std::error::Error for AgentError {} - -impl AgentError { - pub fn new(message: impl Into) -> Self { - Self { - message: message.into(), - } - } -} - -/// A coding agent that can analyze or modify a repository given a prompt. -/// -/// Implementors are responsible for their own isolation strategy (worktrees, -/// containers, API calls, etc.). The `repo_dir` parameter indicates the -/// repository root so the agent can set up whatever sandbox it needs. -/// -/// # Examples -/// -/// The built-in [`ClaudeCodeAgent`] creates a git worktree and invokes the -/// `claude` CLI. A multi-agent harness could fan out to several agents in -/// parallel and merge results. An API-based agent could call a remote -/// service without any local isolation. -pub trait AgentHarness: Send + Sync { - fn invoke(&self, repo_dir: &Path, prompt: &str) -> Result; - - /// Invoke the agent with a JSON Schema constraint on the response. - /// - /// Agents that support structured output (e.g. Claude Code with - /// `--output-format json --json-schema`) should override this to - /// guarantee the response conforms to `schema`. The returned - /// [`AgentResponse::text`] must be the validated JSON string. - /// - /// The default falls back to [`invoke`](Self::invoke). - fn invoke_structured( - &self, - repo_dir: &Path, - prompt: &str, - _schema: &serde_json::Value, - ) -> Result { - self.invoke(repo_dir, prompt) - } -} - -/// Agent implementation that invokes the Claude Code CLI in an isolated -/// git worktree. -pub struct ClaudeCodeAgent { - pub model: String, - pub max_turns: usize, -} - -impl ClaudeCodeAgent { - pub fn new(model: impl Into, max_turns: usize) -> Self { - Self { - model: model.into(), - max_turns, - } - } -} - -impl ClaudeCodeAgent { - fn run_cli( - &self, - worktree_dir: &Path, - prompt: &str, - extra_args: &[&str], - ) -> Result { - tracing::info!( - "Invoking claude (model={}, max_turns={})...", - self.model, - self.max_turns - ); - let mut cmd = Command::new("claude"); - cmd.current_dir(worktree_dir) - .arg("-p") - .arg(prompt) - .arg("--model") - .arg(&self.model) - .arg("--max-turns") - .arg(self.max_turns.to_string()) - .arg("--verbose"); - for arg in extra_args { - cmd.arg(arg); - } - cmd.output().map_err(|e| { - AgentError::new(format!( - "Failed to invoke claude: {e}. \ - Make sure the `claude` CLI is installed and on your PATH. \ - Install via: npm install -g @anthropic-ai/claude-code" - )) - }) - } -} - -impl AgentHarness for ClaudeCodeAgent { - fn invoke(&self, repo_dir: &Path, prompt: &str) -> Result { - let worktree_dir = create_worktree(repo_dir)?; - tracing::info!("Created worktree at {}", worktree_dir.display()); - - let result = self.run_cli(&worktree_dir, prompt, &[]); - - // Capture diff before cleanup - let diff = Command::new("git") - .current_dir(&worktree_dir) - .args(["diff", "HEAD"]) - .output() - .ok() - .and_then(|o| { - let s = String::from_utf8_lossy(&o.stdout).to_string(); - if s.trim().is_empty() { - None - } else { - Some(s) - } - }); - - tracing::info!("Cleaning up worktree..."); - remove_worktree(repo_dir, &worktree_dir); - let _ = std::fs::remove_dir_all(&worktree_dir); - - let output = result?; - let stdout = String::from_utf8_lossy(&output.stdout); - let stderr = String::from_utf8_lossy(&output.stderr); - - if !output.status.success() { - tracing::warn!("claude exited with status {}", output.status); - if !stderr.is_empty() { - tracing::warn!("stderr: {}", truncate(&stderr, 500)); - } - } - - let text = if stdout.trim().is_empty() { - stderr.to_string() - } else { - stdout.to_string() - }; - - if text.trim().is_empty() && diff.is_none() { - return Err(AgentError::new("Agent produced no output")); - } - - Ok(AgentResponse { text, diff }) - } - - fn invoke_structured( - &self, - repo_dir: &Path, - prompt: &str, - schema: &serde_json::Value, - ) -> Result { - let worktree_dir = create_worktree(repo_dir)?; - tracing::info!("Created worktree at {}", worktree_dir.display()); - - let schema_str = serde_json::to_string(schema) - .map_err(|e| AgentError::new(format!("schema serialization: {e}")))?; - - let result = self.run_cli( - &worktree_dir, - prompt, - &["--output-format", "json", "--json-schema", &schema_str], - ); - - tracing::info!("Cleaning up worktree..."); - remove_worktree(repo_dir, &worktree_dir); - let _ = std::fs::remove_dir_all(&worktree_dir); - - let output = result?; - let stdout = String::from_utf8_lossy(&output.stdout); - - if !output.status.success() { - let stderr = String::from_utf8_lossy(&output.stderr); - return Err(AgentError::new(format!( - "claude exited with status {}: {}", - output.status, - truncate(&stderr, 500) - ))); - } - - // Parse the CLI JSON envelope and extract structured_output - let envelope: serde_json::Value = serde_json::from_str(&stdout) - .map_err(|e| AgentError::new(format!("failed to parse CLI JSON envelope: {e}")))?; - - let text = if let Some(structured) = envelope.get("structured_output") { - serde_json::to_string(structured) - .map_err(|e| AgentError::new(format!("re-serialize structured_output: {e}")))? - } else if let Some(result) = envelope.get("result") { - match result { - serde_json::Value::String(s) => s.clone(), - other => serde_json::to_string(other) - .map_err(|e| AgentError::new(format!("re-serialize result: {e}")))?, - } - } else { - return Err(AgentError::new( - "CLI JSON envelope contained neither structured_output nor result", - )); - }; - - Ok(AgentResponse { text, diff: None }) - } -} - -/// Create an isolated detached git worktree from `repo_dir`. -pub fn create_worktree(repo_dir: &Path) -> Result { - let tmp = tempfile::tempdir().map_err(|e| AgentError::new(format!("tempdir: {e}")))?; - let worktree_dir = tmp.path().to_path_buf(); - std::mem::forget(tmp); - - let status = Command::new("git") - .current_dir(repo_dir) - .args(["worktree", "add", "--detach"]) - .arg(&worktree_dir) - .status() - .map_err(|e| AgentError::new(format!("git worktree: {e}")))?; - - if !status.success() { - return Err(AgentError::new("git worktree add failed")); - } - - Ok(worktree_dir) -} - -/// Remove a git worktree. -pub fn remove_worktree(repo_dir: &Path, worktree_dir: &Path) { - let _ = Command::new("git") - .current_dir(repo_dir) - .args(["worktree", "remove", "--force"]) - .arg(worktree_dir) - .status(); -} - -/// Apply a unified diff to `repo_dir`. -pub fn apply_diff(repo_dir: &Path, diff: &str) -> Result<(), AgentError> { - let mut child = Command::new("git") - .current_dir(repo_dir) - .args(["apply", "--allow-empty"]) - .stdin(std::process::Stdio::piped()) - .spawn() - .map_err(|e| AgentError::new(format!("git apply spawn: {e}")))?; - - if let Some(stdin) = child.stdin.as_mut() { - use std::io::Write; - let _ = stdin.write_all(diff.as_bytes()); - } - - let status = child - .wait() - .map_err(|e| AgentError::new(format!("git apply wait: {e}")))?; - - if !status.success() { - return Err(AgentError::new("git apply failed")); - } - Ok(()) -} - -/// A mock agent for testing. Returns pre-configured responses and records -/// every prompt it receives. -/// -/// # Usage -/// -/// ```ignore -/// use jolt_eval::agent::{MockAgent, AgentResponse}; -/// -/// // Agent that always succeeds with a fixed response -/// let agent = MockAgent::always_ok("I found nothing."); -/// -/// // Agent that returns a sequence of responses -/// let agent = MockAgent::from_responses(vec![ -/// Ok(AgentResponse { text: "attempt 1".into(), diff: None }), -/// Err(AgentError::new("network timeout")), -/// Ok(AgentResponse { text: "attempt 3".into(), diff: Some("diff".into()) }), -/// ]); -/// -/// // After invoking, inspect the prompts the agent received -/// let prompts = agent.recorded_prompts(); -/// ``` -pub struct MockAgent { - responses: std::sync::Mutex>>, - prompts: std::sync::Mutex>, -} - -impl MockAgent { - /// Create a mock that always returns `Ok` with the given text and no diff. - pub fn always_ok(text: &str) -> Self { - let text = text.to_string(); - Self { - responses: std::sync::Mutex::new(vec![Ok(AgentResponse { text, diff: None })]), - prompts: std::sync::Mutex::new(Vec::new()), - } - } - - /// Create a mock that always returns `Err`. - pub fn always_err(message: &str) -> Self { - Self { - responses: std::sync::Mutex::new(vec![Err(AgentError::new(message))]), - prompts: std::sync::Mutex::new(Vec::new()), - } - } - - /// Create a mock that returns responses from a queue. - /// After the queue is exhausted, subsequent calls return an error. - pub fn from_responses(responses: Vec>) -> Self { - let mut reversed = responses; - reversed.reverse(); // so we can pop from the back - Self { - responses: std::sync::Mutex::new(reversed), - prompts: std::sync::Mutex::new(Vec::new()), - } - } - - /// Return all prompts that were passed to `invoke`, in order. - pub fn recorded_prompts(&self) -> Vec { - self.prompts.lock().unwrap().clone() - } -} - -impl AgentHarness for MockAgent { - fn invoke(&self, _repo_dir: &Path, prompt: &str) -> Result { - self.prompts.lock().unwrap().push(prompt.to_string()); - - let mut responses = self.responses.lock().unwrap(); - if responses.is_empty() { - return Err(AgentError::new("MockAgent: no more responses")); - } - // If only one response left, clone it (repeating) instead of popping - if responses.len() == 1 { - return match &responses[0] { - Ok(r) => Ok(AgentResponse { - text: r.text.clone(), - diff: r.diff.clone(), - }), - Err(e) => Err(AgentError::new(&e.message)), - }; - } - responses.pop().unwrap() - } -} - -pub fn truncate(s: &str, max_len: usize) -> &str { - if s.len() <= max_len { - return s; - } - let mut end = max_len; - while end > 0 && !s.is_char_boundary(end) { - end -= 1; - } - &s[..end] -} diff --git a/jolt-eval/src/agent/claude.rs b/jolt-eval/src/agent/claude.rs new file mode 100644 index 000000000..458de9a7d --- /dev/null +++ b/jolt-eval/src/agent/claude.rs @@ -0,0 +1,188 @@ +use std::path::{Path, PathBuf}; +use std::process::Command; + +use super::{AgentError, AgentHarness, AgentResponse}; + +/// Agent implementation that invokes the Claude Code CLI in an isolated +/// git worktree. +pub struct ClaudeCodeAgent { + pub model: String, + pub max_turns: usize, +} + +impl ClaudeCodeAgent { + pub fn new(model: impl Into, max_turns: usize) -> Self { + Self { + model: model.into(), + max_turns, + } + } + + fn run_cli( + &self, + worktree_dir: &Path, + prompt: &str, + extra_args: &[&str], + ) -> Result { + tracing::info!( + "Invoking claude (model={}, max_turns={})...", + self.model, + self.max_turns + ); + let mut cmd = Command::new("claude"); + cmd.current_dir(worktree_dir) + .arg("-p") + .arg(prompt) + .arg("--model") + .arg(&self.model) + .arg("--max-turns") + .arg(self.max_turns.to_string()) + .arg("--verbose"); + for arg in extra_args { + cmd.arg(arg); + } + cmd.output().map_err(|e| { + AgentError::new(format!( + "Failed to invoke claude: {e}. \ + Make sure the `claude` CLI is installed and on your PATH. \ + Install via: npm install -g @anthropic-ai/claude-code" + )) + }) + } +} + +impl AgentHarness for ClaudeCodeAgent { + fn invoke(&self, repo_dir: &Path, prompt: &str) -> Result { + let worktree_dir = create_worktree(repo_dir)?; + tracing::info!("Created worktree at {}", worktree_dir.display()); + + let result = self.run_cli(&worktree_dir, prompt, &[]); + + // Capture diff before cleanup + let diff = Command::new("git") + .current_dir(&worktree_dir) + .args(["diff", "HEAD"]) + .output() + .ok() + .and_then(|o| { + let s = String::from_utf8_lossy(&o.stdout).to_string(); + if s.trim().is_empty() { + None + } else { + Some(s) + } + }); + + tracing::info!("Cleaning up worktree..."); + remove_worktree(repo_dir, &worktree_dir); + let _ = std::fs::remove_dir_all(&worktree_dir); + + let output = result?; + let stdout = String::from_utf8_lossy(&output.stdout); + let stderr = String::from_utf8_lossy(&output.stderr); + + if !output.status.success() { + tracing::warn!("claude exited with status {}", output.status); + if !stderr.is_empty() { + tracing::warn!("stderr: {}", super::truncate(&stderr, 500)); + } + } + + let text = if stdout.trim().is_empty() { + stderr.to_string() + } else { + stdout.to_string() + }; + + if text.trim().is_empty() && diff.is_none() { + return Err(AgentError::new("Agent produced no output")); + } + + Ok(AgentResponse { text, diff }) + } + + fn invoke_structured( + &self, + repo_dir: &Path, + prompt: &str, + schema: &serde_json::Value, + ) -> Result { + let worktree_dir = create_worktree(repo_dir)?; + tracing::info!("Created worktree at {}", worktree_dir.display()); + + let schema_str = serde_json::to_string(schema) + .map_err(|e| AgentError::new(format!("schema serialization: {e}")))?; + + let result = self.run_cli( + &worktree_dir, + prompt, + &["--output-format", "json", "--json-schema", &schema_str], + ); + + tracing::info!("Cleaning up worktree..."); + remove_worktree(repo_dir, &worktree_dir); + let _ = std::fs::remove_dir_all(&worktree_dir); + + let output = result?; + let stdout = String::from_utf8_lossy(&output.stdout); + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(AgentError::new(format!( + "claude exited with status {}: {}", + output.status, + super::truncate(&stderr, 500) + ))); + } + + // Parse the CLI JSON envelope and extract structured_output + let envelope: serde_json::Value = serde_json::from_str(&stdout) + .map_err(|e| AgentError::new(format!("failed to parse CLI JSON envelope: {e}")))?; + + let text = if let Some(structured) = envelope.get("structured_output") { + serde_json::to_string(structured) + .map_err(|e| AgentError::new(format!("re-serialize structured_output: {e}")))? + } else if let Some(result) = envelope.get("result") { + match result { + serde_json::Value::String(s) => s.clone(), + other => serde_json::to_string(other) + .map_err(|e| AgentError::new(format!("re-serialize result: {e}")))?, + } + } else { + return Err(AgentError::new( + "CLI JSON envelope contained neither structured_output nor result", + )); + }; + + Ok(AgentResponse { text, diff: None }) + } +} + +/// Create an isolated detached git worktree from `repo_dir`. +pub fn create_worktree(repo_dir: &Path) -> Result { + let tmp = tempfile::tempdir().map_err(|e| AgentError::new(format!("tempdir: {e}")))?; + let worktree_dir = tmp.path().to_path_buf(); + std::mem::forget(tmp); + + let status = Command::new("git") + .current_dir(repo_dir) + .args(["worktree", "add", "--detach"]) + .arg(&worktree_dir) + .status() + .map_err(|e| AgentError::new(format!("git worktree: {e}")))?; + + if !status.success() { + return Err(AgentError::new("git worktree add failed")); + } + + Ok(worktree_dir) +} + +/// Remove a git worktree. +pub fn remove_worktree(repo_dir: &Path, worktree_dir: &Path) { + let _ = Command::new("git") + .current_dir(repo_dir) + .args(["worktree", "remove", "--force"]) + .arg(worktree_dir) + .status(); +} diff --git a/jolt-eval/src/agent/mock.rs b/jolt-eval/src/agent/mock.rs new file mode 100644 index 000000000..a814dcbfe --- /dev/null +++ b/jolt-eval/src/agent/mock.rs @@ -0,0 +1,86 @@ +use std::path::Path; + +use super::{AgentError, AgentHarness, AgentResponse}; + +/// A mock agent for testing. Returns pre-configured responses and records +/// every prompt it receives. +/// +/// # Usage +/// +/// ```ignore +/// use jolt_eval::agent::{MockAgent, AgentResponse}; +/// +/// // Agent that always succeeds with a fixed response +/// let agent = MockAgent::always_ok("I found nothing."); +/// +/// // Agent that returns a sequence of responses +/// let agent = MockAgent::from_responses(vec![ +/// Ok(AgentResponse { text: "attempt 1".into(), diff: None }), +/// Err(AgentError::new("network timeout")), +/// Ok(AgentResponse { text: "attempt 3".into(), diff: Some("diff".into()) }), +/// ]); +/// +/// // After invoking, inspect the prompts the agent received +/// let prompts = agent.recorded_prompts(); +/// ``` +pub struct MockAgent { + responses: std::sync::Mutex>>, + prompts: std::sync::Mutex>, +} + +impl MockAgent { + /// Create a mock that always returns `Ok` with the given text and no diff. + pub fn always_ok(text: &str) -> Self { + let text = text.to_string(); + Self { + responses: std::sync::Mutex::new(vec![Ok(AgentResponse { text, diff: None })]), + prompts: std::sync::Mutex::new(Vec::new()), + } + } + + /// Create a mock that always returns `Err`. + pub fn always_err(message: &str) -> Self { + Self { + responses: std::sync::Mutex::new(vec![Err(AgentError::new(message))]), + prompts: std::sync::Mutex::new(Vec::new()), + } + } + + /// Create a mock that returns responses from a queue. + /// After the queue is exhausted, subsequent calls return an error. + pub fn from_responses(responses: Vec>) -> Self { + let mut reversed = responses; + reversed.reverse(); // so we can pop from the back + Self { + responses: std::sync::Mutex::new(reversed), + prompts: std::sync::Mutex::new(Vec::new()), + } + } + + /// Return all prompts that were passed to `invoke`, in order. + pub fn recorded_prompts(&self) -> Vec { + self.prompts.lock().unwrap().clone() + } +} + +impl AgentHarness for MockAgent { + fn invoke(&self, _repo_dir: &Path, prompt: &str) -> Result { + self.prompts.lock().unwrap().push(prompt.to_string()); + + let mut responses = self.responses.lock().unwrap(); + if responses.is_empty() { + return Err(AgentError::new("MockAgent: no more responses")); + } + // If only one response left, clone it (repeating) instead of popping + if responses.len() == 1 { + return match &responses[0] { + Ok(r) => Ok(AgentResponse { + text: r.text.clone(), + diff: r.diff.clone(), + }), + Err(e) => Err(AgentError::new(&e.message)), + }; + } + responses.pop().unwrap() + } +} diff --git a/jolt-eval/src/agent/mod.rs b/jolt-eval/src/agent/mod.rs new file mode 100644 index 000000000..ecb9ec663 --- /dev/null +++ b/jolt-eval/src/agent/mod.rs @@ -0,0 +1,101 @@ +pub mod claude; +pub mod mock; + +use std::fmt; +use std::path::Path; + +pub use claude::ClaudeCodeAgent; +pub use mock::MockAgent; + +/// Output from an agent invocation. +#[derive(Debug)] +pub struct AgentResponse { + /// The agent's textual output/analysis. + pub text: String, + /// A unified diff of code changes the agent produced, if any. + pub diff: Option, +} + +/// Error during agent invocation. +#[derive(Debug, Clone)] +pub struct AgentError { + pub message: String, +} + +impl fmt::Display for AgentError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.message) + } +} + +impl std::error::Error for AgentError {} + +impl AgentError { + pub fn new(message: impl Into) -> Self { + Self { + message: message.into(), + } + } +} + +/// A coding agent that can analyze or modify a repository given a prompt. +/// +/// Implementors are responsible for their own isolation strategy (worktrees, +/// containers, API calls, etc.). The `repo_dir` parameter indicates the +/// repository root so the agent can set up whatever sandbox it needs. +pub trait AgentHarness: Send + Sync { + fn invoke(&self, repo_dir: &Path, prompt: &str) -> Result; + + /// Invoke the agent with a JSON Schema constraint on the response. + /// + /// Agents that support structured output (e.g. Claude Code with + /// `--output-format json --json-schema`) should override this to + /// guarantee the response conforms to `schema`. + /// + /// The default falls back to [`invoke`](Self::invoke). + fn invoke_structured( + &self, + repo_dir: &Path, + prompt: &str, + _schema: &serde_json::Value, + ) -> Result { + self.invoke(repo_dir, prompt) + } +} + +/// Apply a unified diff to `repo_dir`. +pub fn apply_diff(repo_dir: &Path, diff: &str) -> Result<(), AgentError> { + use std::process::Command; + + let mut child = Command::new("git") + .current_dir(repo_dir) + .args(["apply", "--allow-empty"]) + .stdin(std::process::Stdio::piped()) + .spawn() + .map_err(|e| AgentError::new(format!("git apply spawn: {e}")))?; + + if let Some(stdin) = child.stdin.as_mut() { + use std::io::Write; + let _ = stdin.write_all(diff.as_bytes()); + } + + let status = child + .wait() + .map_err(|e| AgentError::new(format!("git apply wait: {e}")))?; + + if !status.success() { + return Err(AgentError::new("git apply failed")); + } + Ok(()) +} + +pub fn truncate(s: &str, max_len: usize) -> &str { + if s.len() <= max_len { + return s; + } + let mut end = max_len; + while end > 0 && !s.is_char_boundary(end) { + end -= 1; + } + &s[..end] +} diff --git a/jolt-eval/src/invariant/split_eq_bind.rs b/jolt-eval/src/invariant/split_eq_bind.rs index a33d417b9..830b37b3f 100644 --- a/jolt-eval/src/invariant/split_eq_bind.rs +++ b/jolt-eval/src/invariant/split_eq_bind.rs @@ -43,7 +43,7 @@ fn to_challenges(vals: &[u128]) -> Vec { // ── LowToHigh ──────────────────────────────────────────────────────── -#[jolt_eval_macros::invariant(Test, Fuzz)] +#[jolt_eval_macros::invariant(Test, Fuzz, RedTeam)] #[derive(Default)] pub struct SplitEqBindLowHighInvariant; From d89e092c4f02e196ebf12c7101f25f9e4bb987c2 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Thu, 2 Apr 2026 16:21:15 -0400 Subject: [PATCH 26/86] fix(jolt-eval): fix Claude CLI invocation for structured output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't pass --verbose with --output-format json — it turns stdout into a JSONL event stream that can't be parsed as a single result object. Also parse the JSON envelope even on non-zero exit codes, since Claude may still produce structured_output on soft errors like max_turns. Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/src/agent/claude.rs | 53 +++++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 15 deletions(-) diff --git a/jolt-eval/src/agent/claude.rs b/jolt-eval/src/agent/claude.rs index 458de9a7d..6bb032431 100644 --- a/jolt-eval/src/agent/claude.rs +++ b/jolt-eval/src/agent/claude.rs @@ -23,6 +23,7 @@ impl ClaudeCodeAgent { worktree_dir: &Path, prompt: &str, extra_args: &[&str], + verbose: bool, ) -> Result { tracing::info!( "Invoking claude (model={}, max_turns={})...", @@ -36,8 +37,10 @@ impl ClaudeCodeAgent { .arg("--model") .arg(&self.model) .arg("--max-turns") - .arg(self.max_turns.to_string()) - .arg("--verbose"); + .arg(self.max_turns.to_string()); + if verbose { + cmd.arg("--verbose"); + } for arg in extra_args { cmd.arg(arg); } @@ -56,7 +59,7 @@ impl AgentHarness for ClaudeCodeAgent { let worktree_dir = create_worktree(repo_dir)?; tracing::info!("Created worktree at {}", worktree_dir.display()); - let result = self.run_cli(&worktree_dir, prompt, &[]); + let result = self.run_cli(&worktree_dir, prompt, &[], true); // Capture diff before cleanup let diff = Command::new("git") @@ -117,6 +120,7 @@ impl AgentHarness for ClaudeCodeAgent { &worktree_dir, prompt, &["--output-format", "json", "--json-schema", &schema_str], + false, ); tracing::info!("Cleaning up worktree..."); @@ -126,18 +130,28 @@ impl AgentHarness for ClaudeCodeAgent { let output = result?; let stdout = String::from_utf8_lossy(&output.stdout); - if !output.status.success() { - let stderr = String::from_utf8_lossy(&output.stderr); - return Err(AgentError::new(format!( - "claude exited with status {}: {}", - output.status, - super::truncate(&stderr, 500) - ))); - } - - // Parse the CLI JSON envelope and extract structured_output - let envelope: serde_json::Value = serde_json::from_str(&stdout) - .map_err(|e| AgentError::new(format!("failed to parse CLI JSON envelope: {e}")))?; + // Parse the JSON envelope — even on non-zero exit (e.g. max_turns + // reached), Claude may still have produced structured output. + let envelope: serde_json::Value = match serde_json::from_str(&stdout) { + Ok(v) => v, + Err(e) => { + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + let detail = if stderr.trim().is_empty() { + super::truncate(&stdout, 1000) + } else { + super::truncate(&stderr, 1000) + }; + return Err(AgentError::new(format!( + "claude exited with status {}: {}", + output.status, detail + ))); + } + return Err(AgentError::new(format!( + "failed to parse CLI JSON envelope: {e}" + ))); + } + }; let text = if let Some(structured) = envelope.get("structured_output") { serde_json::to_string(structured) @@ -148,6 +162,15 @@ impl AgentHarness for ClaudeCodeAgent { other => serde_json::to_string(other) .map_err(|e| AgentError::new(format!("re-serialize result: {e}")))?, } + } else if !output.status.success() { + let errors = envelope + .get("errors") + .and_then(|e| serde_json::to_string(e).ok()) + .unwrap_or_default(); + return Err(AgentError::new(format!( + "claude exited with status {}: {}", + output.status, errors + ))); } else { return Err(AgentError::new( "CLI JSON envelope contained neither structured_output nor result", From 43c1ea4eae4168a3ed69e35c2dc43b360766f2af Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Thu, 2 Apr 2026 17:02:15 -0400 Subject: [PATCH 27/86] feat(jolt-eval): add --hint flag to redteam CLI Also fix envelope parsing fallback: when extract_json finds the full envelope JSON from markdown, try parsing it as an envelope first to correctly extract the counterexample field. Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/bin/redteam.rs | 5 ++++ jolt-eval/src/invariant/synthesis/redteam.rs | 25 ++++++++++++++--- jolt-eval/tests/agent_test.rs | 28 ++++++++++---------- 3 files changed, 41 insertions(+), 17 deletions(-) diff --git a/jolt-eval/bin/redteam.rs b/jolt-eval/bin/redteam.rs index 583d99ea6..4afbbb638 100644 --- a/jolt-eval/bin/redteam.rs +++ b/jolt-eval/bin/redteam.rs @@ -25,6 +25,10 @@ struct Cli { #[arg(long, default_value = "30")] max_turns: usize, + /// Extra context or guidance for the red-team agent + #[arg(long)] + hint: Option, + /// List available red-teamable invariants and exit #[arg(long)] list: bool, @@ -61,6 +65,7 @@ fn main() -> eyre::Result<()> { let config = RedTeamConfig { num_iterations: cli.iterations, + hint: cli.hint, }; let agent = ClaudeCodeAgent::new(&cli.model, cli.max_turns); let repo_dir = std::env::current_dir()?; diff --git a/jolt-eval/src/invariant/synthesis/redteam.rs b/jolt-eval/src/invariant/synthesis/redteam.rs index d12c0e477..1f9027ad4 100644 --- a/jolt-eval/src/invariant/synthesis/redteam.rs +++ b/jolt-eval/src/invariant/synthesis/redteam.rs @@ -18,11 +18,15 @@ pub enum RedTeamResult { /// Configuration for an AI red-team session. pub struct RedTeamConfig { pub num_iterations: usize, + pub hint: Option, } impl Default for RedTeamConfig { fn default() -> Self { - Self { num_iterations: 10 } + Self { + num_iterations: 10, + hint: None, + } } } @@ -52,7 +56,12 @@ pub fn auto_redteam( invariant.name() ); - let prompt = build_redteam_prompt(&description, input_example.as_deref(), &failed_attempts); + let prompt = build_redteam_prompt( + &description, + input_example.as_deref(), + config.hint.as_deref(), + &failed_attempts, + ); let response = match agent.invoke_structured(repo_dir, &prompt, &envelope_schema) { Ok(r) => r, @@ -70,7 +79,10 @@ pub fn auto_redteam( let (analysis, counterexample_json) = match parse_envelope(&response.text) { Some(pair) => pair, None => match super::super::extract_json(&response.text) { - Some(json) => (response.text.clone(), json), + Some(json) => match parse_envelope(&json) { + Some(pair) => pair, + None => (response.text.clone(), json), + }, None => { failed_attempts.push(FailedAttempt { description: format!("Iteration {}", iteration + 1), @@ -157,6 +169,7 @@ fn parse_envelope(text: &str) -> Option<(String, String)> { fn build_redteam_prompt( invariant_description: &str, input_example: Option<&str>, + hint: Option<&str>, failed_attempts: &[FailedAttempt], ) -> String { let mut prompt = String::new(); @@ -192,6 +205,12 @@ fn build_redteam_prompt( prompt.push_str("\n```\n\n"); } + if let Some(hint) = hint { + prompt.push_str("## Hint\n\n"); + prompt.push_str(hint); + prompt.push_str("\n\n"); + } + if !failed_attempts.is_empty() { prompt.push_str("## Previous failed attempts\n\n"); prompt.push_str( diff --git a/jolt-eval/tests/agent_test.rs b/jolt-eval/tests/agent_test.rs index c0e2c4e00..f7fa0b5fc 100644 --- a/jolt-eval/tests/agent_test.rs +++ b/jolt-eval/tests/agent_test.rs @@ -218,7 +218,7 @@ fn envelope(analysis: &str, counterexample: impl serde::Serialize) -> String { fn redteam_no_violation_when_invariant_always_passes() { let invariant = AlwaysPassInvariant; let agent = MockAgent::always_ok(&envelope("I analyzed the code.", 42)); - let config = RedTeamConfig { num_iterations: 3 }; + let config = RedTeamConfig { num_iterations: 3, ..Default::default() }; let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); @@ -242,7 +242,7 @@ fn redteam_finds_violation_with_structured_response() { // AlwaysFailInvariant rejects every input. let invariant = AlwaysFailInvariant; let agent = MockAgent::always_ok(&envelope("I found a bug!", 99)); - let config = RedTeamConfig { num_iterations: 10 }; + let config = RedTeamConfig { num_iterations: 10, ..Default::default() }; let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); @@ -266,7 +266,7 @@ fn redteam_finds_violation_with_targeted_input() { // FailsOnZeroInvariant only fails for input 0. let invariant = FailsOnZeroInvariant; let agent = MockAgent::always_ok(&envelope("Try zero", 0)); - let config = RedTeamConfig { num_iterations: 5 }; + let config = RedTeamConfig { num_iterations: 5, ..Default::default() }; let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); @@ -287,7 +287,7 @@ fn redteam_finds_violation_with_targeted_input() { fn redteam_no_violation_when_agent_misses() { let invariant = FailsOnZeroInvariant; let agent = MockAgent::always_ok(&envelope("Trying 1", 1)); - let config = RedTeamConfig { num_iterations: 2 }; + let config = RedTeamConfig { num_iterations: 2, ..Default::default() }; let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); @@ -306,7 +306,7 @@ fn redteam_no_violation_when_agent_misses() { fn redteam_handles_agent_errors_gracefully() { let invariant = AlwaysPassInvariant; let agent = MockAgent::always_err("network timeout"); - let config = RedTeamConfig { num_iterations: 3 }; + let config = RedTeamConfig { num_iterations: 3, ..Default::default() }; let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); @@ -329,7 +329,7 @@ fn redteam_handles_no_json_in_response() { // Agent returns plain text (no envelope, no code block) let invariant = AlwaysPassInvariant; let agent = MockAgent::always_ok("I looked around but have no candidate to offer."); - let config = RedTeamConfig { num_iterations: 1 }; + let config = RedTeamConfig { num_iterations: 1, ..Default::default() }; let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); @@ -349,7 +349,7 @@ fn redteam_handles_invalid_counterexample_type() { // Structured envelope with wrong counterexample type for Input=u8 let invariant = AlwaysPassInvariant; let agent = MockAgent::always_ok(&envelope("Here", "not_a_number")); - let config = RedTeamConfig { num_iterations: 1 }; + let config = RedTeamConfig { num_iterations: 1, ..Default::default() }; let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); @@ -369,7 +369,7 @@ fn redteam_fallback_extracts_json_from_freeform_text() { // Agent doesn't return structured envelope, but has a code block let invariant = AlwaysFailInvariant; let agent = MockAgent::always_ok("Found it!\n```json\n77\n```"); - let config = RedTeamConfig { num_iterations: 1 }; + let config = RedTeamConfig { num_iterations: 1, ..Default::default() }; let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); @@ -385,7 +385,7 @@ fn redteam_fallback_extracts_json_from_freeform_text() { fn redteam_prompt_includes_invariant_description() { let invariant = AlwaysPassInvariant; let agent = MockAgent::always_ok(&envelope("ok", 0)); - let config = RedTeamConfig { num_iterations: 1 }; + let config = RedTeamConfig { num_iterations: 1, ..Default::default() }; auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); @@ -399,7 +399,7 @@ fn redteam_prompt_includes_invariant_description() { fn redteam_prompt_includes_input_example() { let invariant = AlwaysPassInvariant; let agent = MockAgent::always_ok(&envelope("ok", 0)); - let config = RedTeamConfig { num_iterations: 1 }; + let config = RedTeamConfig { num_iterations: 1, ..Default::default() }; auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); @@ -412,7 +412,7 @@ fn redteam_prompt_includes_input_example() { fn redteam_prompt_includes_failed_attempts_after_first_iteration() { let invariant = AlwaysPassInvariant; let agent = MockAgent::always_ok(&envelope("Tried something", 42)); - let config = RedTeamConfig { num_iterations: 3 }; + let config = RedTeamConfig { num_iterations: 3, ..Default::default() }; auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); @@ -429,7 +429,7 @@ fn redteam_prompt_includes_failed_attempts_after_first_iteration() { fn redteam_zero_iterations_returns_immediately() { let invariant = AlwaysPassInvariant; let agent = MockAgent::always_ok("should not be called"); - let config = RedTeamConfig { num_iterations: 0 }; + let config = RedTeamConfig { num_iterations: 0, ..Default::default() }; let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); @@ -457,7 +457,7 @@ fn redteam_mixed_agent_responses() { diff: None, }), ]); - let config = RedTeamConfig { num_iterations: 3 }; + let config = RedTeamConfig { num_iterations: 3, ..Default::default() }; let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); @@ -546,7 +546,7 @@ fn custom_harness_plugs_into_auto_redteam() { }; let invariant = AlwaysPassInvariant; - let config = RedTeamConfig { num_iterations: 2 }; + let config = RedTeamConfig { num_iterations: 2, ..Default::default() }; let result = auto_redteam(&invariant, &config, &harness, Path::new("/tmp")); From ab109ef369f0e29cd7796cf75b13ddf8fe17c65b Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Thu, 2 Apr 2026 18:14:44 -0400 Subject: [PATCH 28/86] feat(jolt-eval): add soundness invariant with guest-sandbox template The soundness invariant lets a red-team agent produce a unified diff against guest-sandbox/, which is applied in-place (with RAII revert), compiled via jolt build, proved honestly, then verified against the agent's dishonest (output, panic) claim. The verifier must reject. Adds TestCase::verify_with_claims to override output bytes and panic flag independently. Patch filtering drops hunks with '..' paths to prevent sandbox escapes. Includes filter_patch unit tests. Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/bin/redteam.rs | 1 + jolt-eval/guest-sandbox/Cargo.toml | 10 + jolt-eval/guest-sandbox/README.md | 21 ++ jolt-eval/guest-sandbox/src/lib.rs | 8 + jolt-eval/guest-sandbox/src/main.rs | 5 + jolt-eval/macros/src/lib.rs | 2 +- jolt-eval/src/invariant/mod.rs | 4 + jolt-eval/src/invariant/soundness.rs | 339 +++++++++++++++++++++++++++ jolt-eval/src/lib.rs | 35 +++ jolt-eval/tests/integration.rs | 11 +- 10 files changed, 425 insertions(+), 11 deletions(-) create mode 100644 jolt-eval/guest-sandbox/Cargo.toml create mode 100644 jolt-eval/guest-sandbox/README.md create mode 100644 jolt-eval/guest-sandbox/src/lib.rs create mode 100644 jolt-eval/guest-sandbox/src/main.rs create mode 100644 jolt-eval/src/invariant/soundness.rs diff --git a/jolt-eval/bin/redteam.rs b/jolt-eval/bin/redteam.rs index 4afbbb638..2c59d7a67 100644 --- a/jolt-eval/bin/redteam.rs +++ b/jolt-eval/bin/redteam.rs @@ -82,6 +82,7 @@ fn main() -> eyre::Result<()> { JoltInvariants::SplitEqBindHighLow(inv) => { auto_redteam(inv, &config, &agent, &repo_dir) } + JoltInvariants::Soundness(inv) => auto_redteam(inv, &config, &agent, &repo_dir), }; match result { diff --git a/jolt-eval/guest-sandbox/Cargo.toml b/jolt-eval/guest-sandbox/Cargo.toml new file mode 100644 index 000000000..b5ab76e51 --- /dev/null +++ b/jolt-eval/guest-sandbox/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "sandbox-guest" +version = "0.1.0" +edition = "2021" + +[features] +guest = [] + +[dependencies] +jolt = { package = "jolt-sdk", path = "../../jolt-sdk" } diff --git a/jolt-eval/guest-sandbox/README.md b/jolt-eval/guest-sandbox/README.md new file mode 100644 index 000000000..6e028fa3e --- /dev/null +++ b/jolt-eval/guest-sandbox/README.md @@ -0,0 +1,21 @@ +# guest-sandbox + +Template guest program for the soundness invariant's red-team harness. + +During a red-team session (`cargo run --bin redteam -- --invariant soundness`), an AI agent produces a **unified diff** against this directory. The harness copies the template to a temp directory, applies the patch, compiles the patched guest with `jolt build`, then proves execution and checks that the verifier rejects any dishonest output/panic claim. + +The default guest is an identity function (`input → input`). The agent's goal is to patch it into a program that exposes a soundness bug in Jolt — i.e. one where the verifier accepts a proof paired with an incorrect output or panic flag. + +## Structure + +``` +guest-sandbox/ +├── Cargo.toml # depends on jolt-sdk +└── src/ + ├── lib.rs # #[jolt::provable] function (patch target) + └── main.rs # no_main entry point (rarely needs patching) +``` + +## Safety + +The harness filters patches before applying them: any diff hunk referencing a path containing `..` is dropped, so the agent cannot modify files outside this directory. diff --git a/jolt-eval/guest-sandbox/src/lib.rs b/jolt-eval/guest-sandbox/src/lib.rs new file mode 100644 index 000000000..c3b8403c0 --- /dev/null +++ b/jolt-eval/guest-sandbox/src/lib.rs @@ -0,0 +1,8 @@ +#![cfg_attr(feature = "guest", no_std)] + +#[jolt::provable(heap_size = 32768, stack_size = 65536, max_trace_length = 1048576)] +fn sandbox(input: &[u8]) -> Vec { + // Identity function — the red-team agent patches this to explore + // code paths that might break soundness. + input.to_vec() +} diff --git a/jolt-eval/guest-sandbox/src/main.rs b/jolt-eval/guest-sandbox/src/main.rs new file mode 100644 index 000000000..6d8f0a47b --- /dev/null +++ b/jolt-eval/guest-sandbox/src/main.rs @@ -0,0 +1,5 @@ +#![cfg_attr(feature = "guest", no_std)] +#![no_main] + +#[allow(unused_imports)] +use sandbox_guest::*; diff --git a/jolt-eval/macros/src/lib.rs b/jolt-eval/macros/src/lib.rs index 5fa573131..d10ab1653 100644 --- a/jolt-eval/macros/src/lib.rs +++ b/jolt-eval/macros/src/lib.rs @@ -45,7 +45,7 @@ pub fn invariant(attr: TokenStream, item: TokenStream) -> TokenStream { } else { let first = &target_exprs[0]; let rest = &target_exprs[1..]; - quote! { #first #(| #rest)* } + quote! { enumset::EnumSet::only(#first) #(| #rest)* } }; let targets_impl = quote! { diff --git a/jolt-eval/src/invariant/mod.rs b/jolt-eval/src/invariant/mod.rs index 633872a4d..e29f6817f 100644 --- a/jolt-eval/src/invariant/mod.rs +++ b/jolt-eval/src/invariant/mod.rs @@ -1,3 +1,4 @@ +pub mod soundness; pub mod split_eq_bind; pub mod synthesis; @@ -97,6 +98,7 @@ pub trait InvariantTargets { pub enum JoltInvariants { SplitEqBindLowHigh(split_eq_bind::SplitEqBindLowHighInvariant), SplitEqBindHighLow(split_eq_bind::SplitEqBindHighLowInvariant), + Soundness(soundness::SoundnessInvariant), } macro_rules! dispatch { @@ -104,6 +106,7 @@ macro_rules! dispatch { match $self { JoltInvariants::SplitEqBindLowHigh($inv) => $body, JoltInvariants::SplitEqBindHighLow($inv) => $body, + JoltInvariants::Soundness($inv) => $body, } }; } @@ -113,6 +116,7 @@ impl JoltInvariants { vec![ Self::SplitEqBindLowHigh(split_eq_bind::SplitEqBindLowHighInvariant), Self::SplitEqBindHighLow(split_eq_bind::SplitEqBindHighLowInvariant), + Self::Soundness(soundness::SoundnessInvariant), ] } diff --git a/jolt-eval/src/invariant/soundness.rs b/jolt-eval/src/invariant/soundness.rs new file mode 100644 index 000000000..ca3a129c2 --- /dev/null +++ b/jolt-eval/src/invariant/soundness.rs @@ -0,0 +1,339 @@ +use std::path::{Path, PathBuf}; +use std::process::Command; + +use arbitrary::{Arbitrary, Unstructured}; + +use common::constants::{DEFAULT_MAX_TRUSTED_ADVICE_SIZE, DEFAULT_MAX_UNTRUSTED_ADVICE_SIZE}; +use common::jolt_device::MemoryConfig; + +use super::{Invariant, InvariantViolation}; +use crate::TestCase; + +/// Input for the soundness invariant. +/// +/// The red-team agent produces a `patch` (unified diff) to apply to +/// the `guest-sandbox/` template project, along with the program input +/// and a dishonest claim about the output and panic flag. +/// +/// The invariant proves the patched guest honestly, then checks that +/// the verifier rejects the dishonest claim. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, schemars::JsonSchema)] +pub struct SoundnessInput { + /// Unified diff to apply to `guest-sandbox/`. + /// Only hunks touching files within the sandbox are applied. + pub patch: String, + /// Input bytes fed to the guest program. + pub program_input: Vec, + /// The output the malicious prover claims. + pub claimed_output: Vec, + /// The panic flag the malicious prover claims. + pub claimed_panic: bool, +} + +impl<'a> Arbitrary<'a> for SoundnessInput { + fn arbitrary(_u: &mut Unstructured<'a>) -> arbitrary::Result { + // Soundness is RedTeam-only; Arbitrary is not meaningful. + Err(arbitrary::Error::IncorrectFormat) + } +} + +/// Cached paths resolved once during setup. +pub struct SoundnessSetup { + sandbox_dir: PathBuf, +} + +#[jolt_eval_macros::invariant(RedTeam)] +#[derive(Default)] +pub struct SoundnessInvariant; + +impl Invariant for SoundnessInvariant { + type Setup = SoundnessSetup; + type Input = SoundnessInput; + + fn name(&self) -> &str { + "soundness" + } + + fn description(&self) -> String { + "For any deterministic guest program (no advice) and fixed input, \ + there is only one (output, panic) pair that the verifier accepts. \ + A counterexample is a guest patch + input + dishonest (output, panic) \ + claim that the verifier incorrectly accepts." + .to_string() + } + + fn setup(&self) -> SoundnessSetup { + let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + SoundnessSetup { + sandbox_dir: manifest_dir.join("guest-sandbox"), + } + } + + fn check( + &self, + setup: &SoundnessSetup, + input: SoundnessInput, + ) -> Result<(), InvariantViolation> { + // 1. Apply patch to sandbox in-place, revert on exit + let _guard = apply_patch(&setup.sandbox_dir, &input.patch)?; + + // 2. Compile the patched guest + let elf_bytes = compile_guest(&setup.sandbox_dir)?; + + // _guard drops here (or on early return), reverting the patch + + // 3. Build a TestCase and prove + let memory_config = MemoryConfig { + max_input_size: 4096, + max_output_size: 4096, + max_untrusted_advice_size: DEFAULT_MAX_UNTRUSTED_ADVICE_SIZE, + max_trusted_advice_size: DEFAULT_MAX_TRUSTED_ADVICE_SIZE, + stack_size: 65536, + heap_size: 32768, + program_size: None, + }; + let test_case = TestCase { + elf_contents: elf_bytes, + memory_config, + max_trace_length: 1048576, + }; + let prover_pp = test_case.prover_preprocessing(); + let verifier_pp = TestCase::verifier_preprocessing(&prover_pp); + let (proof, honest_device) = test_case.prove(&prover_pp, &input.program_input); + + // 4. Skip no-op claims (the claim matches the honest execution) + if input.claimed_output == honest_device.outputs + && input.claimed_panic == honest_device.panic + { + return Ok(()); + } + + // 5. Verify with the dishonest claim — this SHOULD fail + match TestCase::verify_with_claims( + &verifier_pp, + proof, + &honest_device.inputs, + &input.claimed_output, + input.claimed_panic, + ) { + Ok(()) => Err(InvariantViolation::with_details( + "Verifier accepted dishonest claim", + format!( + "honest_output={} bytes (panic={}), claimed_output={} bytes (panic={})", + honest_device.outputs.len(), + honest_device.panic, + input.claimed_output.len(), + input.claimed_panic, + ), + )), + Err(_) => Ok(()), + } + } + + fn seed_corpus(&self) -> Vec { + vec![SoundnessInput { + patch: String::new(), + program_input: vec![1, 2, 3], + claimed_output: vec![0xFF], + claimed_panic: false, + }] + } +} + +/// RAII guard that reverts a patch on drop via `git checkout`. +struct PatchGuard { + dir: PathBuf, + applied: bool, +} + +impl Drop for PatchGuard { + fn drop(&mut self) { + if self.applied { + let _ = Command::new("git") + .current_dir(&self.dir) + .args(["checkout", "."]) + .status(); + } + } +} + +/// Apply a filtered patch to `sandbox_dir` in-place. Returns a guard +/// that reverts the changes on drop (even on panic). +fn apply_patch(sandbox_dir: &Path, patch: &str) -> Result { + let guard = PatchGuard { + dir: sandbox_dir.to_path_buf(), + applied: false, + }; + + if patch.trim().is_empty() { + return Ok(guard); + } + + let safe_patch = filter_patch(patch); + if safe_patch.trim().is_empty() { + return Ok(guard); + } + + let mut child = Command::new("git") + .current_dir(sandbox_dir) + .args(["apply", "--allow-empty", "-"]) + .stdin(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + .spawn() + .map_err(|e| InvariantViolation::new(format!("git apply spawn: {e}")))?; + + if let Some(stdin) = child.stdin.as_mut() { + use std::io::Write; + let _ = stdin.write_all(safe_patch.as_bytes()); + } + + let output = child + .wait_with_output() + .map_err(|e| InvariantViolation::new(format!("git apply wait: {e}")))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(InvariantViolation::with_details( + "Patch failed to apply", + stderr.to_string(), + )); + } + + Ok(PatchGuard { + dir: sandbox_dir.to_path_buf(), + applied: true, + }) +} + +/// Remove diff hunks that reference paths containing `..` to prevent +/// escaping the sandbox. +pub fn filter_patch(patch: &str) -> String { + let mut result = String::new(); + let mut include_hunk = true; + + for line in patch.lines() { + if line.starts_with("diff --git") || line.starts_with("--- ") || line.starts_with("+++ ") + { + include_hunk = !line.contains(".."); + } + if include_hunk { + result.push_str(line); + result.push('\n'); + } + } + + result +} + +/// Compile the guest and return the ELF bytes. +fn compile_guest(sandbox_dir: &Path) -> Result, InvariantViolation> { + let jolt_cmd = std::env::var("JOLT_PATH").unwrap_or_else(|_| "jolt".to_string()); + let target_dir = sandbox_dir.join("target"); + + let output = Command::new(&jolt_cmd) + .args([ + "build", + "-p", + "sandbox-guest", + "--", + "--release", + "--target-dir", + ]) + .arg(target_dir.as_os_str()) + .arg("--features") + .arg("guest") + .current_dir(sandbox_dir) + .output() + .map_err(|e| { + InvariantViolation::new(format!( + "jolt build: {e}. Make sure `jolt` is installed (cargo install --path .)" + )) + })?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(InvariantViolation::with_details( + "Guest compilation failed", + stderr.to_string(), + )); + } + + let elf_path = target_dir + .join("riscv64imac-unknown-none-elf") + .join("release") + .join("sandbox-guest"); + + std::fs::read(&elf_path).map_err(|e| { + InvariantViolation::with_details( + "ELF not found after compilation", + format!("{}: {e}", elf_path.display()), + ) + }) +} + +#[cfg(test)] +mod tests { + use super::filter_patch; + + #[test] + fn keeps_safe_hunks() { + let patch = "\ +diff --git a/src/lib.rs b/src/lib.rs +--- a/src/lib.rs ++++ b/src/lib.rs +@@ -1,3 +1,3 @@ +-fn foo() {} ++fn bar() {} +"; + let filtered = filter_patch(patch); + assert!(filtered.contains("+fn bar() {}")); + } + + #[test] + fn drops_hunks_with_path_traversal() { + let patch = "\ +diff --git a/../../jolt-core/src/lib.rs b/../../jolt-core/src/lib.rs +--- a/../../jolt-core/src/lib.rs ++++ b/../../jolt-core/src/lib.rs +@@ -1 +1 @@ +-safe ++malicious +"; + let filtered = filter_patch(patch); + assert!(!filtered.contains("malicious")); + } + + #[test] + fn mixed_safe_and_unsafe_hunks() { + let patch = "\ +diff --git a/src/lib.rs b/src/lib.rs +--- a/src/lib.rs ++++ b/src/lib.rs +@@ -1 +1 @@ +-old ++new +diff --git a/../../../etc/passwd b/../../../etc/passwd +--- a/../../../etc/passwd ++++ b/../../../etc/passwd +@@ -1 +1 @@ +-root ++hacked +diff --git a/Cargo.toml b/Cargo.toml +--- a/Cargo.toml ++++ b/Cargo.toml +@@ -1 +1 @@ +-v1 ++v2 +"; + let filtered = filter_patch(patch); + assert!(filtered.contains("+new")); + assert!(!filtered.contains("hacked")); + assert!(filtered.contains("+v2")); + } + + #[test] + fn empty_patch_stays_empty() { + assert!(filter_patch("").is_empty()); + assert!(filter_patch(" \n ").trim().is_empty()); + } +} diff --git a/jolt-eval/src/lib.rs b/jolt-eval/src/lib.rs index 347b81a4a..785b95bfd 100644 --- a/jolt-eval/src/lib.rs +++ b/jolt-eval/src/lib.rs @@ -111,6 +111,41 @@ impl TestCase { verifier_pp, ) } + + /// Verify a proof against claimed (potentially malicious) outputs and panic flag. + /// + /// Unlike [`verify`], this lets the caller override the output bytes and + /// panic flag independently, for testing that the verifier rejects + /// dishonest claims. + pub fn verify_with_claims( + verifier_pp: &VerifierPreprocessing, + proof: Proof, + inputs: &[u8], + claimed_outputs: &[u8], + claimed_panic: bool, + ) -> Result<(), ProofVerifyError> { + use common::jolt_device::MemoryConfig; + use jolt_core::zkvm::verifier::JoltVerifier; + + let memory_layout = &verifier_pp.shared.memory_layout; + let memory_config = MemoryConfig { + max_untrusted_advice_size: memory_layout.max_untrusted_advice_size, + max_trusted_advice_size: memory_layout.max_trusted_advice_size, + max_input_size: memory_layout.max_input_size, + max_output_size: memory_layout.max_output_size, + stack_size: memory_layout.stack_size, + heap_size: memory_layout.heap_size, + program_size: Some(memory_layout.program_size), + }; + let mut io_device = JoltDevice::new(&memory_config); + io_device.inputs = inputs.to_vec(); + io_device.outputs = claimed_outputs.to_vec(); + io_device.panic = claimed_panic; + + let verifier = + JoltVerifier::::new(verifier_pp, proof, io_device, None, None)?; + verifier.verify() + } } /// Serialize a proof to bytes. diff --git a/jolt-eval/tests/integration.rs b/jolt-eval/tests/integration.rs index e63f643b3..739b69d20 100644 --- a/jolt-eval/tests/integration.rs +++ b/jolt-eval/tests/integration.rs @@ -1,4 +1,4 @@ -use jolt_eval::invariant::{Invariant, InvariantTargets, InvariantViolation, JoltInvariants}; +use jolt_eval::invariant::{Invariant, InvariantTargets, InvariantViolation}; use jolt_eval::objective::{AbstractObjective, Direction, MeasurementError}; /// A trivial invariant for testing the framework itself. @@ -92,15 +92,6 @@ fn test_failing_invariant_reports_violations() { } } -#[test] -fn test_jolt_invariants_all() { - let all = JoltInvariants::all(); - assert_eq!(all.len(), 2); - let names: Vec<_> = all.iter().map(|inv| inv.name()).collect(); - assert!(names.contains(&"split_eq_bind_low_high")); - assert!(names.contains(&"split_eq_bind_high_low")); -} - #[test] fn test_constant_objective() { let obj = ConstantObjective { From 58cbc007c6508ea7221c2f4ddcfe140587248c25 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Thu, 2 Apr 2026 18:49:01 -0400 Subject: [PATCH 29/86] refactor(jolt-eval): add CheckError, refactor guests into directory Change Invariant::check to return Result<(), CheckError> where CheckError has Violation and InvalidInput variants. InvalidInput is silently skipped in fuzz targets, macro-generated tests, and run_checks; recorded as a failed attempt in redteam. Also refactor guests.rs into guests/ directory (mod.rs + catalog.rs) and add GuestMemoryConfig to SoundnessInput with validation limits. Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/macros/src/lib.rs | 35 ++-- .../src/{guests.rs => guests/catalog.rs} | 20 +- jolt-eval/src/guests/mod.rs | 158 +++++++++++++++ jolt-eval/src/invariant/mod.rs | 36 +++- jolt-eval/src/invariant/soundness.rs | 186 +++++++++++------- jolt-eval/src/invariant/split_eq_bind.rs | 26 +-- jolt-eval/src/invariant/synthesis/fuzz.rs | 8 +- jolt-eval/src/invariant/synthesis/redteam.rs | 41 ++-- jolt-eval/src/lib.rs | 180 +---------------- jolt-eval/tests/agent_test.rs | 18 +- jolt-eval/tests/integration.rs | 10 +- jolt-eval/tests/macro_test.rs | 10 +- 12 files changed, 399 insertions(+), 329 deletions(-) rename jolt-eval/src/{guests.rs => guests/catalog.rs} (85%) create mode 100644 jolt-eval/src/guests/mod.rs diff --git a/jolt-eval/macros/src/lib.rs b/jolt-eval/macros/src/lib.rs index d10ab1653..c577e4c08 100644 --- a/jolt-eval/macros/src/lib.rs +++ b/jolt-eval/macros/src/lib.rs @@ -63,17 +63,29 @@ pub fn invariant(attr: TokenStream, item: TokenStream) -> TokenStream { use super::*; use jolt_eval::Invariant; + fn assert_no_violation( + name: &str, + result: Result<(), jolt_eval::CheckError>, + context: &str, + ) { + match result { + Ok(()) | Err(jolt_eval::CheckError::InvalidInput(_)) => {} + Err(jolt_eval::CheckError::Violation(v)) => { + panic!("Invariant '{name}' violated {context}: {v}"); + } + } + } + #[test] fn seed_corpus() { let invariant = #struct_name::default(); let setup = invariant.setup(); for (i, input) in invariant.seed_corpus().into_iter().enumerate() { - invariant.check(&setup, input).unwrap_or_else(|e| { - panic!( - "Invariant '{}' violated on seed {}: {}", - invariant.name(), i, e - ); - }); + assert_no_violation( + invariant.name(), + invariant.check(&setup, input), + &format!("on seed {i}"), + ); } } @@ -95,12 +107,11 @@ pub fn invariant(attr: TokenStream, item: TokenStream) -> TokenStream { <#struct_name as jolt_eval::Invariant>::Input as jolt_eval::arbitrary::Arbitrary >::arbitrary(&mut u) { - invariant.check(&setup, input).unwrap_or_else(|e| { - panic!( - "Invariant '{}' violated: {}", - invariant.name(), e - ); - }); + assert_no_violation( + invariant.name(), + invariant.check(&setup, input), + "", + ); } } } diff --git a/jolt-eval/src/guests.rs b/jolt-eval/src/guests/catalog.rs similarity index 85% rename from jolt-eval/src/guests.rs rename to jolt-eval/src/guests/catalog.rs index 5f5ab86ea..18901669d 100644 --- a/jolt-eval/src/guests.rs +++ b/jolt-eval/src/guests/catalog.rs @@ -2,8 +2,9 @@ use std::sync::Arc; use common::constants::{DEFAULT_MAX_TRUSTED_ADVICE_SIZE, DEFAULT_MAX_UNTRUSTED_ADVICE_SIZE}; use common::jolt_device::MemoryConfig; +use jolt_core::host::Program; -use crate::TestCase; +use super::TestCase; /// A known guest program that jolt-eval can compile and run. pub struct GuestSpec { @@ -33,12 +34,8 @@ impl GuestSpec { } } - /// Compile the guest and return a `TestCase`. - /// - /// Invokes the `jolt` CLI to cross-compile the guest crate to - /// RISC-V, then wraps the resulting ELF bytes in a `TestCase`. pub fn compile(&self, target_dir: &str) -> TestCase { - let mut program = jolt_core::host::Program::new(self.package); + let mut program = Program::new(self.package); program.set_memory_config(self.memory_config()); program.build(target_dir); let elf_bytes = program @@ -52,11 +49,6 @@ impl GuestSpec { } } -/// The fixed catalog of guest programs available for evaluation. -/// -/// Modeled after the benchmark suite in `jolt-core/benches/e2e_profiling.rs`. -/// Each entry carries the memory config and default inputs extracted from -/// the `#[jolt::provable(...)]` attributes in the guest crate. pub static GUESTS: &[GuestSpec] = &[ GuestSpec { package: "muldiv-guest", @@ -120,21 +112,15 @@ pub static GUESTS: &[GuestSpec] = &[ }, ]; -/// Look up a guest by its short name. pub fn find_guest(name: &str) -> Option<&'static GuestSpec> { GUESTS.iter().find(|g| g.name == name) } -/// Return the short names of all known guests. pub fn guest_names() -> Vec<&'static str> { GUESTS.iter().map(|g| g.name).collect() } /// Resolve a `TestCase` from either `--guest ` or `--elf `. -/// -/// If `guest` is `Some`, compiles the named guest. If `elf` is `Some`, -/// reads the ELF from disk with a default memory config. Exits the -/// process with a helpful message if neither is provided. pub fn resolve_test_case( guest: Option<&str>, elf: Option<&str>, diff --git a/jolt-eval/src/guests/mod.rs b/jolt-eval/src/guests/mod.rs new file mode 100644 index 000000000..93dcb0548 --- /dev/null +++ b/jolt-eval/src/guests/mod.rs @@ -0,0 +1,158 @@ +mod catalog; + +use std::sync::Arc; + +use ark_bn254::Fr; +use ark_serialize::{CanonicalDeserialize, CanonicalSerialize}; +use jolt_core::curve::Bn254Curve; +use jolt_core::poly::commitment::dory::DoryCommitmentScheme; +use jolt_core::transcripts::Blake2bTranscript; + +pub use catalog::{find_guest, guest_names, resolve_test_case, GuestSpec, GUESTS}; +pub use jolt_core::guest::program::Program as GuestProgram; +pub use jolt_core::poly::commitment::commitment_scheme::CommitmentScheme; +pub use jolt_core::utils::errors::ProofVerifyError; +pub use jolt_core::zkvm::Serializable; +pub use tracer::JoltDevice; + +pub type F = Fr; +pub type C = Bn254Curve; +pub type PCS = DoryCommitmentScheme; +pub type FS = Blake2bTranscript; + +pub type Proof = jolt_core::zkvm::proof_serialization::JoltProof; +pub type ProverPreprocessing = jolt_core::zkvm::prover::JoltProverPreprocessing; +pub type VerifierPreprocessing = jolt_core::zkvm::verifier::JoltVerifierPreprocessing; + +/// A self-contained test case wrapping a compiled guest program. +/// +/// Stores raw ELF bytes and memory configuration so it can reconstruct +/// a `GuestProgram` on demand without requiring `Clone` on the program. +pub struct TestCase { + pub elf_contents: Vec, + pub memory_config: common::jolt_device::MemoryConfig, + pub max_trace_length: usize, +} + +impl TestCase { + pub fn new(program: GuestProgram, max_trace_length: usize) -> Self { + Self { + elf_contents: program.elf_contents, + memory_config: program.memory_config, + max_trace_length, + } + } + + pub fn make_program(&self) -> GuestProgram { + GuestProgram::new(&self.elf_contents, &self.memory_config) + } + + pub fn prover_preprocessing(&self) -> ProverPreprocessing { + let program = self.make_program(); + jolt_core::guest::prover::preprocess(&program, self.max_trace_length) + .expect("prover preprocessing failed") + } + + pub fn verifier_preprocessing(prover_pp: &ProverPreprocessing) -> VerifierPreprocessing { + VerifierPreprocessing::from(prover_pp) + } + + pub fn prove(&self, prover_pp: &ProverPreprocessing, inputs: &[u8]) -> (Proof, JoltDevice) { + let program = self.make_program(); + let mut output_bytes = vec![0u8; self.memory_config.max_output_size as usize]; + let (proof, io_device, _debug) = jolt_core::guest::prover::prove::( + &program, + inputs, + &[], + &[], + None, + None, + &mut output_bytes, + prover_pp, + ); + (proof, io_device) + } + + pub fn verify( + verifier_pp: &VerifierPreprocessing, + proof: Proof, + io_device: &JoltDevice, + ) -> Result<(), ProofVerifyError> { + jolt_core::guest::verifier::verify::( + &io_device.inputs, + None, + &io_device.outputs, + proof, + verifier_pp, + ) + } + + /// Verify a proof against claimed (potentially malicious) outputs and panic flag. + pub fn verify_with_claims( + verifier_pp: &VerifierPreprocessing, + proof: Proof, + inputs: &[u8], + claimed_outputs: &[u8], + claimed_panic: bool, + ) -> Result<(), ProofVerifyError> { + use common::jolt_device::MemoryConfig; + use jolt_core::zkvm::verifier::JoltVerifier; + + let memory_layout = &verifier_pp.shared.memory_layout; + let memory_config = MemoryConfig { + max_untrusted_advice_size: memory_layout.max_untrusted_advice_size, + max_trusted_advice_size: memory_layout.max_trusted_advice_size, + max_input_size: memory_layout.max_input_size, + max_output_size: memory_layout.max_output_size, + stack_size: memory_layout.stack_size, + heap_size: memory_layout.heap_size, + program_size: Some(memory_layout.program_size), + }; + let mut io_device = JoltDevice::new(&memory_config); + io_device.inputs = inputs.to_vec(); + io_device.outputs = claimed_outputs.to_vec(); + io_device.panic = claimed_panic; + + let verifier = + JoltVerifier::::new(verifier_pp, proof, io_device, None, None)?; + verifier.verify() + } +} + +/// Serialize a proof to bytes. +pub fn serialize_proof(proof: &Proof) -> Vec { + let mut buf = Vec::new(); + proof + .serialize_compressed(&mut buf) + .expect("proof serialization failed"); + buf +} + +/// Deserialize a proof from bytes. +pub fn deserialize_proof(bytes: &[u8]) -> Result { + Proof::deserialize_compressed(bytes) +} + +/// Shared setup reusable across multiple invariants/objectives +/// operating on the same program. +pub struct SharedSetup { + pub test_case: Arc, + pub prover_preprocessing: Arc, + pub verifier_preprocessing: Arc, +} + +impl SharedSetup { + pub fn new(test_case: TestCase) -> Self { + Self::new_from_arc(Arc::new(test_case)) + } + + pub fn new_from_arc(test_case: Arc) -> Self { + let prover_pp = test_case.prover_preprocessing(); + let verifier_pp = TestCase::verifier_preprocessing(&prover_pp); + Self { + test_case, + prover_preprocessing: Arc::new(prover_pp), + verifier_preprocessing: Arc::new(verifier_pp), + } + } +} diff --git a/jolt-eval/src/invariant/mod.rs b/jolt-eval/src/invariant/mod.rs index e29f6817f..0be7098dd 100644 --- a/jolt-eval/src/invariant/mod.rs +++ b/jolt-eval/src/invariant/mod.rs @@ -54,6 +54,26 @@ impl InvariantViolation { } } +/// Result of checking an invariant against a single input. +#[derive(Debug)] +pub enum CheckError { + /// The invariant was violated. + Violation(InvariantViolation), + /// The input is degenerate or uninteresting and should be skipped. + InvalidInput(String), +} + +impl fmt::Display for CheckError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Violation(v) => write!(f, "violation: {v}"), + Self::InvalidInput(msg) => write!(f, "invalid input: {msg}"), + } + } +} + +impl std::error::Error for CheckError {} + /// Core invariant trait. Each invariant defines a setup phase (run once) /// and a check phase (run per input). The `Input` type must support /// `Arbitrary` for fuzzing, and `Serialize`/`DeserializeOwned` so an AI @@ -76,7 +96,11 @@ pub trait Invariant: Send + Sync { fn setup(&self) -> Self::Setup; /// Check the invariant for a single input against the pre-computed setup. - fn check(&self, setup: &Self::Setup, input: Self::Input) -> Result<(), InvariantViolation>; + /// + /// Returns `Ok(())` if the invariant holds, `Err(CheckError::Violation)` + /// if it is violated, or `Err(CheckError::InvalidInput)` if the input + /// is degenerate and should be skipped. + fn check(&self, setup: &Self::Setup, input: Self::Input) -> Result<(), CheckError>; /// Known-interesting inputs for deterministic test generation. fn seed_corpus(&self) -> Vec { @@ -144,8 +168,14 @@ fn run_checks_impl( let setup = inv.setup(); let mut results = Vec::new(); + let mut record = |r: Result<(), CheckError>| match r { + Ok(()) => results.push(Ok(())), + Err(CheckError::Violation(v)) => results.push(Err(v)), + Err(CheckError::InvalidInput(_)) => {} + }; + for input in inv.seed_corpus() { - results.push(inv.check(&setup, input)); + record(inv.check(&setup, input)); } let mut rng = rand::thread_rng(); @@ -154,7 +184,7 @@ fn run_checks_impl( rng.fill_bytes(&mut raw); let mut u = arbitrary::Unstructured::new(&raw); if let Ok(input) = I::Input::arbitrary(&mut u) { - results.push(inv.check(&setup, input)); + record(inv.check(&setup, input)); } } diff --git a/jolt-eval/src/invariant/soundness.rs b/jolt-eval/src/invariant/soundness.rs index ca3a129c2..72890c133 100644 --- a/jolt-eval/src/invariant/soundness.rs +++ b/jolt-eval/src/invariant/soundness.rs @@ -5,10 +5,81 @@ use arbitrary::{Arbitrary, Unstructured}; use common::constants::{DEFAULT_MAX_TRUSTED_ADVICE_SIZE, DEFAULT_MAX_UNTRUSTED_ADVICE_SIZE}; use common::jolt_device::MemoryConfig; +use jolt_core::host::Program; -use super::{Invariant, InvariantViolation}; +use super::{CheckError, Invariant, InvariantViolation}; use crate::TestCase; +/// Guest memory layout parameters. +/// +/// Serializable mirror of `common::jolt_device::MemoryConfig` for use +/// in JSON-based counterexamples. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, schemars::JsonSchema)] +pub struct GuestMemoryConfig { + pub max_input_size: u64, + pub max_output_size: u64, + pub stack_size: u64, + pub heap_size: u64, + pub max_trace_length: usize, +} + +impl Default for GuestMemoryConfig { + fn default() -> Self { + Self { + max_input_size: 4096, + max_output_size: 4096, + stack_size: 65536, + heap_size: 32768, + max_trace_length: 1048576, + } + } +} + +/// Maximum allowed values for memory config parameters to prevent +/// the red-team agent from requesting absurd resource usage. +const MAX_INPUT_SIZE: u64 = 1 << 16; +const MAX_OUTPUT_SIZE: u64 = 1 << 16; +const MAX_STACK_SIZE: u64 = 1 << 16; +const MAX_HEAP_SIZE: u64 = 1 << 20; +const MAX_TRACE_LENGTH: usize = 1 << 20; // ~1M steps + +impl GuestMemoryConfig { + fn validate(&self) -> Result<(), CheckError> { + if self.max_input_size > MAX_INPUT_SIZE + || self.max_output_size > MAX_OUTPUT_SIZE + || self.stack_size > MAX_STACK_SIZE + || self.heap_size > MAX_HEAP_SIZE + || self.max_trace_length > MAX_TRACE_LENGTH + { + return Err(CheckError::InvalidInput(format!( + "memory config exceeds limits: \ + input={}, output={}, stack={}, heap={}, trace={}; \ + limits: input/output/stack/heap<={}, trace<={}", + self.max_input_size, + self.max_output_size, + self.stack_size, + self.heap_size, + self.max_trace_length, + MAX_HEAP_SIZE, + MAX_TRACE_LENGTH, + ))); + } + Ok(()) + } + + fn to_memory_config(&self) -> MemoryConfig { + MemoryConfig { + max_input_size: self.max_input_size, + max_output_size: self.max_output_size, + max_untrusted_advice_size: DEFAULT_MAX_UNTRUSTED_ADVICE_SIZE, + max_trusted_advice_size: DEFAULT_MAX_TRUSTED_ADVICE_SIZE, + stack_size: self.stack_size, + heap_size: self.heap_size, + program_size: None, + } + } +} + /// Input for the soundness invariant. /// /// The red-team agent produces a `patch` (unified diff) to apply to @@ -22,6 +93,9 @@ pub struct SoundnessInput { /// Unified diff to apply to `guest-sandbox/`. /// Only hunks touching files within the sandbox are applied. pub patch: String, + /// Guest memory layout. Defaults are reasonable for most programs. + #[serde(default)] + pub memory: GuestMemoryConfig, /// Input bytes fed to the guest program. pub program_input: Vec, /// The output the malicious prover claims. @@ -73,42 +147,39 @@ impl Invariant for SoundnessInvariant { &self, setup: &SoundnessSetup, input: SoundnessInput, - ) -> Result<(), InvariantViolation> { - // 1. Apply patch to sandbox in-place, revert on exit + ) -> Result<(), CheckError> { + // 1. Validate memory config + input.memory.validate()?; + let memory_config = input.memory.to_memory_config(); + + // 2. Apply patch to sandbox in-place, revert on exit let _guard = apply_patch(&setup.sandbox_dir, &input.patch)?; - // 2. Compile the patched guest - let elf_bytes = compile_guest(&setup.sandbox_dir)?; + // 3. Compile the patched guest + let elf_bytes = compile_guest(&setup.sandbox_dir, &memory_config)?; // _guard drops here (or on early return), reverting the patch - // 3. Build a TestCase and prove - let memory_config = MemoryConfig { - max_input_size: 4096, - max_output_size: 4096, - max_untrusted_advice_size: DEFAULT_MAX_UNTRUSTED_ADVICE_SIZE, - max_trusted_advice_size: DEFAULT_MAX_TRUSTED_ADVICE_SIZE, - stack_size: 65536, - heap_size: 32768, - program_size: None, - }; + // 4. Build a TestCase and prove let test_case = TestCase { elf_contents: elf_bytes, memory_config, - max_trace_length: 1048576, + max_trace_length: input.memory.max_trace_length, }; let prover_pp = test_case.prover_preprocessing(); let verifier_pp = TestCase::verifier_preprocessing(&prover_pp); let (proof, honest_device) = test_case.prove(&prover_pp, &input.program_input); - // 4. Skip no-op claims (the claim matches the honest execution) + // 5. Skip no-op claims (the claim matches the honest execution) if input.claimed_output == honest_device.outputs && input.claimed_panic == honest_device.panic { - return Ok(()); + return Err(CheckError::InvalidInput( + "claimed output/panic matches honest execution".into(), + )); } - // 5. Verify with the dishonest claim — this SHOULD fail + // 6. Verify with the dishonest claim — this SHOULD fail match TestCase::verify_with_claims( &verifier_pp, proof, @@ -116,7 +187,7 @@ impl Invariant for SoundnessInvariant { &input.claimed_output, input.claimed_panic, ) { - Ok(()) => Err(InvariantViolation::with_details( + Ok(()) => Err(CheckError::Violation(InvariantViolation::with_details( "Verifier accepted dishonest claim", format!( "honest_output={} bytes (panic={}), claimed_output={} bytes (panic={})", @@ -125,7 +196,7 @@ impl Invariant for SoundnessInvariant { input.claimed_output.len(), input.claimed_panic, ), - )), + ))), Err(_) => Ok(()), } } @@ -133,6 +204,7 @@ impl Invariant for SoundnessInvariant { fn seed_corpus(&self) -> Vec { vec![SoundnessInput { patch: String::new(), + memory: GuestMemoryConfig::default(), program_input: vec![1, 2, 3], claimed_output: vec![0xFF], claimed_panic: false, @@ -159,7 +231,7 @@ impl Drop for PatchGuard { /// Apply a filtered patch to `sandbox_dir` in-place. Returns a guard /// that reverts the changes on drop (even on panic). -fn apply_patch(sandbox_dir: &Path, patch: &str) -> Result { +fn apply_patch(sandbox_dir: &Path, patch: &str) -> Result { let guard = PatchGuard { dir: sandbox_dir.to_path_buf(), applied: false, @@ -180,7 +252,7 @@ fn apply_patch(sandbox_dir: &Path, patch: &str) -> Result Result String { let mut include_hunk = true; for line in patch.lines() { - if line.starts_with("diff --git") || line.starts_with("--- ") || line.starts_with("+++ ") - { + if line.starts_with("diff --git") || line.starts_with("--- ") || line.starts_with("+++ ") { include_hunk = !line.contains(".."); } if include_hunk { @@ -225,50 +295,18 @@ pub fn filter_patch(patch: &str) -> String { result } -/// Compile the guest and return the ELF bytes. -fn compile_guest(sandbox_dir: &Path) -> Result, InvariantViolation> { - let jolt_cmd = std::env::var("JOLT_PATH").unwrap_or_else(|_| "jolt".to_string()); - let target_dir = sandbox_dir.join("target"); - - let output = Command::new(&jolt_cmd) - .args([ - "build", - "-p", - "sandbox-guest", - "--", - "--release", - "--target-dir", - ]) - .arg(target_dir.as_os_str()) - .arg("--features") - .arg("guest") - .current_dir(sandbox_dir) - .output() - .map_err(|e| { - InvariantViolation::new(format!( - "jolt build: {e}. Make sure `jolt` is installed (cargo install --path .)" - )) - })?; - - if !output.status.success() { - let stderr = String::from_utf8_lossy(&output.stderr); - return Err(InvariantViolation::with_details( - "Guest compilation failed", - stderr.to_string(), - )); - } - - let elf_path = target_dir - .join("riscv64imac-unknown-none-elf") - .join("release") - .join("sandbox-guest"); - - std::fs::read(&elf_path).map_err(|e| { - InvariantViolation::with_details( - "ELF not found after compilation", - format!("{}: {e}", elf_path.display()), - ) - }) +/// Compile the sandbox guest and return the ELF bytes. +fn compile_guest( + sandbox_dir: &Path, + memory_config: &MemoryConfig, +) -> Result, CheckError> { + let target_dir = sandbox_dir.join("target").to_string_lossy().to_string(); + let mut program = Program::new("sandbox-guest"); + program.set_memory_config(*memory_config); + program.build(&target_dir); + program + .get_elf_contents() + .ok_or_else(|| CheckError::InvalidInput("guest ELF not found after build".into())) } #[cfg(test)] diff --git a/jolt-eval/src/invariant/split_eq_bind.rs b/jolt-eval/src/invariant/split_eq_bind.rs index 830b37b3f..f9a65516e 100644 --- a/jolt-eval/src/invariant/split_eq_bind.rs +++ b/jolt-eval/src/invariant/split_eq_bind.rs @@ -9,7 +9,7 @@ use jolt_core::poly::eq_poly::EqPolynomial; use jolt_core::poly::multilinear_polynomial::BindingOrder; use jolt_core::poly::split_eq_poly::GruenSplitEqPolynomial; -use super::{Invariant, InvariantViolation}; +use super::{CheckError, Invariant, InvariantViolation}; type Challenge = ::Challenge; @@ -63,9 +63,9 @@ impl Invariant for SplitEqBindLowHighInvariant { fn setup(&self) {} - fn check(&self, _setup: &(), input: SplitEqBindInput) -> Result<(), InvariantViolation> { + fn check(&self, _setup: &(), input: SplitEqBindInput) -> Result<(), CheckError> { if input.w.len() < 2 { - return Ok(()); + return Err(CheckError::InvalidInput("w.len() < 2".into())); } let w = to_challenges(&input.w); let rs = to_challenges(&input.rs); @@ -76,10 +76,10 @@ impl Invariant for SplitEqBindLowHighInvariant { let merged = split_eq.merge(); if regular_eq.Z[..regular_eq.len()] != merged.Z[..merged.len()] { - return Err(InvariantViolation::with_details( + return Err(CheckError::Violation(InvariantViolation::with_details( "Initial merge mismatch (LowToHigh)", format!("num_vars={num_vars}"), - )); + ))); } for (round, r) in rs.iter().enumerate() { @@ -88,10 +88,10 @@ impl Invariant for SplitEqBindLowHighInvariant { let merged = split_eq.merge(); if regular_eq.Z[..regular_eq.len()] != merged.Z[..merged.len()] { - return Err(InvariantViolation::with_details( + return Err(CheckError::Violation(InvariantViolation::with_details( "Bind mismatch (LowToHigh)", format!("num_vars={num_vars}, round={round}"), - )); + ))); } } @@ -138,9 +138,9 @@ impl Invariant for SplitEqBindHighLowInvariant { fn setup(&self) {} - fn check(&self, _setup: &(), input: SplitEqBindInput) -> Result<(), InvariantViolation> { + fn check(&self, _setup: &(), input: SplitEqBindInput) -> Result<(), CheckError> { if input.w.len() < 2 { - return Ok(()); + return Err(CheckError::InvalidInput("w.len() < 2".into())); } let w = to_challenges(&input.w); let rs = to_challenges(&input.rs); @@ -151,10 +151,10 @@ impl Invariant for SplitEqBindHighLowInvariant { let merged = split_eq.merge(); if regular_eq.Z[..regular_eq.len()] != merged.Z[..merged.len()] { - return Err(InvariantViolation::with_details( + return Err(CheckError::Violation(InvariantViolation::with_details( "Initial merge mismatch (HighToLow)", format!("num_vars={num_vars}"), - )); + ))); } for (round, r) in rs.iter().enumerate() { @@ -163,10 +163,10 @@ impl Invariant for SplitEqBindHighLowInvariant { let merged = split_eq.merge(); if regular_eq.Z[..regular_eq.len()] != merged.Z[..merged.len()] { - return Err(InvariantViolation::with_details( + return Err(CheckError::Violation(InvariantViolation::with_details( "Bind mismatch (HighToLow)", format!("num_vars={num_vars}, round={round}"), - )); + ))); } } diff --git a/jolt-eval/src/invariant/synthesis/fuzz.rs b/jolt-eval/src/invariant/synthesis/fuzz.rs index 350ceb437..2335ccae4 100644 --- a/jolt-eval/src/invariant/synthesis/fuzz.rs +++ b/jolt-eval/src/invariant/synthesis/fuzz.rs @@ -43,8 +43,12 @@ macro_rules! fuzz_invariant { .expect("wrong setup type"); let mut u = $crate::arbitrary::Unstructured::new(data); if let Ok(input) = ::arbitrary(&mut u) { - inv.check(setup, input) - .unwrap_or_else(|e| panic!("Invariant violated: {e}")); + match inv.check(setup, input) { + Ok(()) | Err($crate::CheckError::InvalidInput(_)) => {} + Err($crate::CheckError::Violation(v)) => { + panic!("Invariant violated: {v}"); + } + } } } diff --git a/jolt-eval/src/invariant/synthesis/redteam.rs b/jolt-eval/src/invariant/synthesis/redteam.rs index 1f9027ad4..39364e9a0 100644 --- a/jolt-eval/src/invariant/synthesis/redteam.rs +++ b/jolt-eval/src/invariant/synthesis/redteam.rs @@ -1,6 +1,6 @@ use std::path::Path; -use super::super::{FailedAttempt, Invariant, InvariantViolation}; +use super::super::{CheckError, FailedAttempt, Invariant}; use crate::agent::AgentHarness; /// Result of a red-team session. @@ -94,7 +94,22 @@ pub fn auto_redteam( }, }; - match check_counterexample(invariant, &setup, &counterexample_json) { + let input: I::Input = match serde_json::from_str(&counterexample_json) { + Ok(v) => v, + Err(e) => { + tracing::info!("Agent produced unparseable input: {e}"); + failed_attempts.push(FailedAttempt { + description: format!("Iteration {}", iteration + 1), + approach: analysis, + failure_reason: format!( + "Could not deserialize response JSON into Input type: {e}" + ), + }); + continue; + } + }; + + match invariant.check(&setup, input) { Ok(()) => { failed_attempts.push(FailedAttempt { description: format!("Iteration {}", iteration + 1), @@ -112,14 +127,11 @@ pub fn auto_redteam( error: violation.to_string(), }; } - Err(CheckError::BadInput(parse_err)) => { - tracing::info!("Agent produced unparseable input: {parse_err}"); + Err(CheckError::InvalidInput(reason)) => { failed_attempts.push(FailedAttempt { description: format!("Iteration {}", iteration + 1), approach: analysis, - failure_reason: format!( - "Could not deserialize response JSON into Input type: {parse_err}" - ), + failure_reason: format!("Invalid input: {reason}"), }); } } @@ -130,21 +142,6 @@ pub fn auto_redteam( } } -enum CheckError { - Violation(InvariantViolation), - BadInput(String), -} - -fn check_counterexample( - inv: &I, - setup: &I::Setup, - json: &str, -) -> Result<(), CheckError> { - let input: I::Input = - serde_json::from_str(json).map_err(|e| CheckError::BadInput(e.to_string()))?; - inv.check(setup, input).map_err(CheckError::Violation) -} - fn build_envelope_schema(input_schema: &serde_json::Value) -> serde_json::Value { serde_json::json!({ "type": "object", diff --git a/jolt-eval/src/lib.rs b/jolt-eval/src/lib.rs index 785b95bfd..c2905555f 100644 --- a/jolt-eval/src/lib.rs +++ b/jolt-eval/src/lib.rs @@ -8,165 +8,25 @@ pub mod guests; pub mod invariant; pub mod objective; -use std::collections::HashMap; -use std::sync::Arc; - -use ark_bn254::Fr; -use ark_serialize::{CanonicalDeserialize, CanonicalSerialize}; -use jolt_core::curve::Bn254Curve; -use jolt_core::poly::commitment::dory::DoryCommitmentScheme; -use jolt_core::transcripts::Blake2bTranscript; - +pub use guests::{ + deserialize_proof, serialize_proof, CommitmentScheme, GuestProgram, JoltDevice, Proof, + ProofVerifyError, ProverPreprocessing, Serializable, SharedSetup, TestCase, + VerifierPreprocessing, F, FS, PCS, +}; pub use invariant::{ - Invariant, InvariantTargets, InvariantViolation, JoltInvariants, SynthesisTarget, + CheckError, Invariant, InvariantTargets, InvariantViolation, JoltInvariants, SynthesisTarget, }; pub use objective::{AbstractObjective, Direction, MeasurementError, Objective}; // Re-exports used by the #[invariant] proc macro generated code. -// Users of the macro don't need to add these to their own Cargo.toml. pub use arbitrary; pub use rand; -pub type F = Fr; -pub type C = Bn254Curve; -pub type PCS = DoryCommitmentScheme; -pub type FS = Blake2bTranscript; - -pub type Proof = jolt_core::zkvm::proof_serialization::JoltProof; -pub type ProverPreprocessing = jolt_core::zkvm::prover::JoltProverPreprocessing; -pub type VerifierPreprocessing = jolt_core::zkvm::verifier::JoltVerifierPreprocessing; -pub type SharedPreprocessing = jolt_core::zkvm::verifier::JoltSharedPreprocessing; - -pub use jolt_core::guest::program::Program as GuestProgram; -pub use jolt_core::poly::commitment::commitment_scheme::CommitmentScheme; -pub use jolt_core::utils::errors::ProofVerifyError; -pub use jolt_core::zkvm::Serializable; -pub use tracer::JoltDevice; - -/// A self-contained test case wrapping a compiled guest program. -/// -/// `TestCase` stores the raw ELF bytes and memory configuration so it can -/// reconstruct a `GuestProgram` on demand without requiring `Clone` on the -/// program itself. -pub struct TestCase { - pub elf_contents: Vec, - pub memory_config: common::jolt_device::MemoryConfig, - pub max_trace_length: usize, -} - -impl TestCase { - pub fn new(program: GuestProgram, max_trace_length: usize) -> Self { - Self { - elf_contents: program.elf_contents, - memory_config: program.memory_config, - max_trace_length, - } - } - - pub fn make_program(&self) -> GuestProgram { - GuestProgram::new(&self.elf_contents, &self.memory_config) - } - - /// Create prover preprocessing for this test case. - pub fn prover_preprocessing(&self) -> ProverPreprocessing { - let program = self.make_program(); - jolt_core::guest::prover::preprocess(&program, self.max_trace_length) - .expect("prover preprocessing failed") - } - - /// Create verifier preprocessing from prover preprocessing. - pub fn verifier_preprocessing(prover_pp: &ProverPreprocessing) -> VerifierPreprocessing { - VerifierPreprocessing::from(prover_pp) - } - - /// Prove execution of this program with the given inputs. - /// Returns (proof, io_device). - pub fn prove(&self, prover_pp: &ProverPreprocessing, inputs: &[u8]) -> (Proof, JoltDevice) { - let program = self.make_program(); - let mut output_bytes = vec![0u8; self.memory_config.max_output_size as usize]; - let (proof, io_device, _debug) = jolt_core::guest::prover::prove::( - &program, - inputs, - &[], - &[], - None, - None, - &mut output_bytes, - prover_pp, - ); - (proof, io_device) - } - - /// Verify a proof against the given preprocessing and I/O. - pub fn verify( - verifier_pp: &VerifierPreprocessing, - proof: Proof, - io_device: &JoltDevice, - ) -> Result<(), ProofVerifyError> { - jolt_core::guest::verifier::verify::( - &io_device.inputs, - None, - &io_device.outputs, - proof, - verifier_pp, - ) - } - - /// Verify a proof against claimed (potentially malicious) outputs and panic flag. - /// - /// Unlike [`verify`], this lets the caller override the output bytes and - /// panic flag independently, for testing that the verifier rejects - /// dishonest claims. - pub fn verify_with_claims( - verifier_pp: &VerifierPreprocessing, - proof: Proof, - inputs: &[u8], - claimed_outputs: &[u8], - claimed_panic: bool, - ) -> Result<(), ProofVerifyError> { - use common::jolt_device::MemoryConfig; - use jolt_core::zkvm::verifier::JoltVerifier; - - let memory_layout = &verifier_pp.shared.memory_layout; - let memory_config = MemoryConfig { - max_untrusted_advice_size: memory_layout.max_untrusted_advice_size, - max_trusted_advice_size: memory_layout.max_trusted_advice_size, - max_input_size: memory_layout.max_input_size, - max_output_size: memory_layout.max_output_size, - stack_size: memory_layout.stack_size, - heap_size: memory_layout.heap_size, - program_size: Some(memory_layout.program_size), - }; - let mut io_device = JoltDevice::new(&memory_config); - io_device.inputs = inputs.to_vec(); - io_device.outputs = claimed_outputs.to_vec(); - io_device.panic = claimed_panic; - - let verifier = - JoltVerifier::::new(verifier_pp, proof, io_device, None, None)?; - verifier.verify() - } -} - -/// Serialize a proof to bytes. -pub fn serialize_proof(proof: &Proof) -> Vec { - let mut buf = Vec::new(); - proof - .serialize_compressed(&mut buf) - .expect("proof serialization failed"); - buf -} - -/// Deserialize a proof from bytes. -pub fn deserialize_proof(bytes: &[u8]) -> Result { - Proof::deserialize_compressed(bytes) -} - /// Run all provided invariants, returning results keyed by name. pub fn check_all_invariants( invariants: &[JoltInvariants], num_random: usize, -) -> HashMap>> { +) -> std::collections::HashMap>> { invariants .iter() .map(|inv| { @@ -180,7 +40,7 @@ pub fn check_all_invariants( /// Measure all provided objectives, returning results keyed by name. pub fn measure_all_objectives( objectives: &[Objective], -) -> HashMap> { +) -> std::collections::HashMap> { objectives .iter() .map(|obj| { @@ -190,27 +50,3 @@ pub fn measure_all_objectives( }) .collect() } - -/// Shared setup that can be reused across multiple invariants/objectives -/// operating on the same program. -pub struct SharedSetup { - pub test_case: Arc, - pub prover_preprocessing: Arc, - pub verifier_preprocessing: Arc, -} - -impl SharedSetup { - pub fn new(test_case: TestCase) -> Self { - Self::new_from_arc(Arc::new(test_case)) - } - - pub fn new_from_arc(test_case: Arc) -> Self { - let prover_pp = test_case.prover_preprocessing(); - let verifier_pp = TestCase::verifier_preprocessing(&prover_pp); - Self { - test_case, - prover_preprocessing: Arc::new(prover_pp), - verifier_preprocessing: Arc::new(verifier_pp), - } - } -} diff --git a/jolt-eval/tests/agent_test.rs b/jolt-eval/tests/agent_test.rs index f7fa0b5fc..e388a54c5 100644 --- a/jolt-eval/tests/agent_test.rs +++ b/jolt-eval/tests/agent_test.rs @@ -4,7 +4,9 @@ use std::path::Path; use enumset::EnumSet; use jolt_eval::agent::{AgentError, AgentHarness, AgentResponse, MockAgent}; use jolt_eval::invariant::synthesis::redteam::{auto_redteam, RedTeamConfig, RedTeamResult}; -use jolt_eval::invariant::{Invariant, InvariantTargets, InvariantViolation, SynthesisTarget}; +use jolt_eval::invariant::{ + CheckError, Invariant, InvariantTargets, InvariantViolation, SynthesisTarget, +}; use jolt_eval::objective::optimize::{auto_optimize, OptimizeConfig, OptimizeEnv}; use jolt_eval::objective::Direction; @@ -29,7 +31,7 @@ impl Invariant for AlwaysPassInvariant { "This invariant always passes.".into() } fn setup(&self) {} - fn check(&self, _: &(), _: u8) -> Result<(), InvariantViolation> { + fn check(&self, _: &(), _: u8) -> Result<(), CheckError> { Ok(()) } fn seed_corpus(&self) -> Vec { @@ -54,8 +56,10 @@ impl Invariant for AlwaysFailInvariant { "This invariant always fails.".into() } fn setup(&self) {} - fn check(&self, _: &(), input: u8) -> Result<(), InvariantViolation> { - Err(InvariantViolation::new(format!("always fails ({input})"))) + fn check(&self, _: &(), input: u8) -> Result<(), CheckError> { + Err(CheckError::Violation(InvariantViolation::new(format!( + "always fails ({input})" + )))) } fn seed_corpus(&self) -> Vec { vec![42] @@ -79,9 +83,11 @@ impl Invariant for FailsOnZeroInvariant { "Fails when input is 0.".into() } fn setup(&self) {} - fn check(&self, _: &(), input: u8) -> Result<(), InvariantViolation> { + fn check(&self, _: &(), input: u8) -> Result<(), CheckError> { if input == 0 { - Err(InvariantViolation::new("input was zero")) + Err(CheckError::Violation(InvariantViolation::new( + "input was zero", + ))) } else { Ok(()) } diff --git a/jolt-eval/tests/integration.rs b/jolt-eval/tests/integration.rs index 739b69d20..23517e49f 100644 --- a/jolt-eval/tests/integration.rs +++ b/jolt-eval/tests/integration.rs @@ -1,4 +1,4 @@ -use jolt_eval::invariant::{Invariant, InvariantTargets, InvariantViolation}; +use jolt_eval::invariant::{CheckError, Invariant, InvariantTargets, InvariantViolation}; use jolt_eval::objective::{AbstractObjective, Direction, MeasurementError}; /// A trivial invariant for testing the framework itself. @@ -19,7 +19,7 @@ impl Invariant for TrivialInvariant { fn setup(&self) -> Self::Setup {} - fn check(&self, _setup: &Self::Setup, _input: u8) -> Result<(), InvariantViolation> { + fn check(&self, _setup: &Self::Setup, _input: u8) -> Result<(), CheckError> { Ok(()) } @@ -46,8 +46,10 @@ impl Invariant for FailingInvariant { fn setup(&self) -> Self::Setup {} - fn check(&self, _setup: &Self::Setup, input: u8) -> Result<(), InvariantViolation> { - Err(InvariantViolation::new(format!("failed for input {input}"))) + fn check(&self, _setup: &Self::Setup, input: u8) -> Result<(), CheckError> { + Err(CheckError::Violation(InvariantViolation::new(format!( + "failed for input {input}" + )))) } fn seed_corpus(&self) -> Vec { diff --git a/jolt-eval/tests/macro_test.rs b/jolt-eval/tests/macro_test.rs index 557be0952..e935a78cb 100644 --- a/jolt-eval/tests/macro_test.rs +++ b/jolt-eval/tests/macro_test.rs @@ -1,4 +1,4 @@ -use jolt_eval::invariant::{Invariant, InvariantViolation}; +use jolt_eval::invariant::{CheckError, Invariant, InvariantViolation}; // --------------------------------------------------------------------------- // AlwaysPass: trivial invariant to test macro synthesis @@ -19,7 +19,7 @@ impl Invariant for AlwaysPassInvariant { "Trivial invariant that always passes — used to test macro synthesis.".to_string() } fn setup(&self) -> Self::Setup {} - fn check(&self, _: &(), _input: u8) -> Result<(), InvariantViolation> { + fn check(&self, _: &(), _input: u8) -> Result<(), CheckError> { Ok(()) } fn seed_corpus(&self) -> Vec { @@ -59,13 +59,15 @@ impl Invariant for BoundsCheckInvariant { "Checks that max(lo,hi) >= min(lo,hi).".to_string() } fn setup(&self) -> Self::Setup {} - fn check(&self, _: &(), input: RangeInput) -> Result<(), InvariantViolation> { + fn check(&self, _: &(), input: RangeInput) -> Result<(), CheckError> { let lo = input.lo.min(input.hi); let hi = input.lo.max(input.hi); if hi >= lo { Ok(()) } else { - Err(InvariantViolation::new("max < min — impossible")) + Err(CheckError::Violation(InvariantViolation::new( + "max < min — impossible", + ))) } } fn seed_corpus(&self) -> Vec { From 010e8cb93033088e08bcd71749f2fd1a7ebc970e Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Thu, 2 Apr 2026 19:07:01 -0400 Subject: [PATCH 30/86] feat(jolt-eval): add soundness invariant tests, fix guest compilation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add guest-sandbox to workspace members so jolt build can find it. Fix guest to return u32 (Vec lacks Serialize in no_std). Wrap Program::build in catch_unwind to handle compilation panics. Add tests for memory config validation, patch filtering, and the full compile→prove→verify path. Co-Authored-By: Claude Opus 4.6 (1M context) --- Cargo.toml | 1 + jolt-eval/guest-sandbox/src/lib.rs | 10 +- jolt-eval/src/invariant/soundness.rs | 162 ++++++++++++++++++++++++--- jolt-eval/tests/agent_test.rs | 1 + 4 files changed, 158 insertions(+), 16 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 61cd677e5..008640027 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -94,6 +94,7 @@ members = [ "z3-verifier", "jolt-eval", "jolt-eval/macros", + "jolt-eval/guest-sandbox", ] exclude = ["jolt-eval/fuzz"] diff --git a/jolt-eval/guest-sandbox/src/lib.rs b/jolt-eval/guest-sandbox/src/lib.rs index c3b8403c0..2f0f1b124 100644 --- a/jolt-eval/guest-sandbox/src/lib.rs +++ b/jolt-eval/guest-sandbox/src/lib.rs @@ -1,8 +1,12 @@ #![cfg_attr(feature = "guest", no_std)] #[jolt::provable(heap_size = 32768, stack_size = 65536, max_trace_length = 1048576)] -fn sandbox(input: &[u8]) -> Vec { - // Identity function — the red-team agent patches this to explore +fn sandbox(input: &[u8]) -> u32 { + // Simple hash — the red-team agent patches this to explore // code paths that might break soundness. - input.to_vec() + let mut h: u32 = 0; + for &b in input { + h = h.wrapping_mul(31).wrapping_add(b as u32); + } + h } diff --git a/jolt-eval/src/invariant/soundness.rs b/jolt-eval/src/invariant/soundness.rs index 72890c133..d0da6c5ec 100644 --- a/jolt-eval/src/invariant/soundness.rs +++ b/jolt-eval/src/invariant/soundness.rs @@ -44,7 +44,7 @@ const MAX_HEAP_SIZE: u64 = 1 << 20; const MAX_TRACE_LENGTH: usize = 1 << 20; // ~1M steps impl GuestMemoryConfig { - fn validate(&self) -> Result<(), CheckError> { + pub fn validate(&self) -> Result<(), CheckError> { if self.max_input_size > MAX_INPUT_SIZE || self.max_output_size > MAX_OUTPUT_SIZE || self.stack_size > MAX_STACK_SIZE @@ -205,7 +205,7 @@ impl Invariant for SoundnessInvariant { vec![SoundnessInput { patch: String::new(), memory: GuestMemoryConfig::default(), - program_input: vec![1, 2, 3], + program_input: postcard::to_stdvec::<[u8]>(&[1, 2, 3]).unwrap(), claimed_output: vec![0xFF], claimed_panic: false, }] @@ -296,25 +296,50 @@ pub fn filter_patch(patch: &str) -> String { } /// Compile the sandbox guest and return the ELF bytes. +/// +/// `Program::build` panics on compilation failure, so we catch it. fn compile_guest( sandbox_dir: &Path, memory_config: &MemoryConfig, ) -> Result, CheckError> { let target_dir = sandbox_dir.join("target").to_string_lossy().to_string(); - let mut program = Program::new("sandbox-guest"); - program.set_memory_config(*memory_config); - program.build(&target_dir); - program - .get_elf_contents() - .ok_or_else(|| CheckError::InvalidInput("guest ELF not found after build".into())) + let mc = *memory_config; + let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + let mut program = Program::new("sandbox-guest"); + program.set_memory_config(mc); + program.build(&target_dir); + program.get_elf_contents() + })); + match result { + Ok(Some(elf)) => Ok(elf), + Ok(None) => Err(CheckError::InvalidInput( + "guest ELF not found after build".into(), + )), + Err(_) => Err(CheckError::InvalidInput( + "guest compilation panicked".into(), + )), + } } #[cfg(test)] mod tests { - use super::filter_patch; + use super::*; + use crate::Invariant; + + fn default_input() -> SoundnessInput { + SoundnessInput { + patch: String::new(), + memory: GuestMemoryConfig::default(), + program_input: postcard::to_stdvec::<[u8]>(&[1, 2, 3]).unwrap(), + claimed_output: vec![0xFF], + claimed_panic: false, + } + } + + // ── filter_patch ──────────────────────────────────────────────── #[test] - fn keeps_safe_hunks() { + fn filter_keeps_safe_hunks() { let patch = "\ diff --git a/src/lib.rs b/src/lib.rs --- a/src/lib.rs @@ -328,7 +353,7 @@ diff --git a/src/lib.rs b/src/lib.rs } #[test] - fn drops_hunks_with_path_traversal() { + fn filter_drops_path_traversal() { let patch = "\ diff --git a/../../jolt-core/src/lib.rs b/../../jolt-core/src/lib.rs --- a/../../jolt-core/src/lib.rs @@ -342,7 +367,7 @@ diff --git a/../../jolt-core/src/lib.rs b/../../jolt-core/src/lib.rs } #[test] - fn mixed_safe_and_unsafe_hunks() { + fn filter_mixed_safe_and_unsafe() { let patch = "\ diff --git a/src/lib.rs b/src/lib.rs --- a/src/lib.rs @@ -370,8 +395,119 @@ diff --git a/Cargo.toml b/Cargo.toml } #[test] - fn empty_patch_stays_empty() { + fn filter_empty_patch() { assert!(filter_patch("").is_empty()); assert!(filter_patch(" \n ").trim().is_empty()); } + + // ── memory config validation ──────────────────────────────────── + + #[test] + fn validate_accepts_defaults() { + assert!(GuestMemoryConfig::default().validate().is_ok()); + } + + #[test] + fn validate_rejects_oversized_input() { + let c = GuestMemoryConfig { max_input_size: u64::MAX, ..Default::default() }; + assert!(matches!(c.validate(), Err(CheckError::InvalidInput(_)))); + } + + #[test] + fn validate_rejects_oversized_output() { + let c = GuestMemoryConfig { max_output_size: u64::MAX, ..Default::default() }; + assert!(matches!(c.validate(), Err(CheckError::InvalidInput(_)))); + } + + #[test] + fn validate_rejects_oversized_stack() { + let c = GuestMemoryConfig { stack_size: u64::MAX, ..Default::default() }; + assert!(matches!(c.validate(), Err(CheckError::InvalidInput(_)))); + } + + #[test] + fn validate_rejects_oversized_heap() { + let c = GuestMemoryConfig { heap_size: u64::MAX, ..Default::default() }; + assert!(matches!(c.validate(), Err(CheckError::InvalidInput(_)))); + } + + #[test] + fn validate_rejects_oversized_trace() { + let c = GuestMemoryConfig { max_trace_length: usize::MAX, ..Default::default() }; + assert!(matches!(c.validate(), Err(CheckError::InvalidInput(_)))); + } + + #[test] + fn check_rejects_oversized_memory_before_compilation() { + let inv = SoundnessInvariant; + let setup = inv.setup(); + let input = SoundnessInput { + memory: GuestMemoryConfig { heap_size: u64::MAX, ..Default::default() }, + ..default_input() + }; + assert!(matches!(inv.check(&setup, input), Err(CheckError::InvalidInput(_)))); + } + + // ── patching ──────────────────────────────────────────────────── + + #[test] + fn check_garbage_patch_is_noop() { + let inv = SoundnessInvariant; + let setup = inv.setup(); + let input = SoundnessInput { + patch: "this is not a valid unified diff\n+garbage".into(), + ..default_input() + }; + // Garbage with no diff headers passes filter_patch unchanged. + // git apply --allow-empty treats it as a no-op (no hunks), + // so the unpatched sandbox compiles and the check proceeds normally. + assert!(inv.check(&setup, input).is_ok()); + } + + // ── compilation + prove/verify (slow) ─────────────────────────── + + #[test] + fn check_path_traversal_filtered_then_compiles() { + let inv = SoundnessInvariant; + let setup = inv.setup(); + let input = SoundnessInput { + patch: "\ +diff --git a/../../etc/passwd b/../../etc/passwd +--- a/../../etc/passwd ++++ b/../../etc/passwd +@@ -1 +1 @@ +-root ++hacked +" + .into(), + ..default_input() + }; + // Traversal hunks are filtered out → empty patch → compiles + // unpatched sandbox → proves → verifier rejects dishonest claim. + assert!(inv.check(&setup, input).is_ok()); + } + + #[test] + fn check_unpatched_sandbox_rejects_dishonest_output() { + let inv = SoundnessInvariant; + let setup = inv.setup(); + // claimed_output=[0xFF] doesn't match the identity function's + // honest output for input [1,2,3]. Verifier should reject. + assert!(inv.check(&setup, default_input()).is_ok()); + } + + #[test] + fn check_noop_claim_returns_invalid_input() { + let inv = SoundnessInvariant; + let setup = inv.setup(); + // The sandbox computes h = wrapping hash of input bytes. + // For input [1,2,3]: h = ((0*31+1)*31+2)*31+3 = 1026 + let honest_output = postcard::to_stdvec(&1026u32).unwrap(); + let input = SoundnessInput { + claimed_output: honest_output, + claimed_panic: false, + ..default_input() + }; + assert!(matches!(inv.check(&setup, input), Err(CheckError::InvalidInput(_)))); + } } diff --git a/jolt-eval/tests/agent_test.rs b/jolt-eval/tests/agent_test.rs index e388a54c5..955d57505 100644 --- a/jolt-eval/tests/agent_test.rs +++ b/jolt-eval/tests/agent_test.rs @@ -1053,3 +1053,4 @@ fn optimize_invariant_failure_mid_sequence() { assert_eq!(env.rejected, 1); // iter 2 rejected (invariant failure) assert_eq!(result.best["time"], 7.0); } + From e19244c6b492766601d34e5e10c94522fe7b53f6 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Thu, 2 Apr 2026 19:29:46 -0400 Subject: [PATCH 31/86] refactor(jolt-eval): replace guest-based objectives with tokei-based code metrics Remove all 7 guest-dependent objectives (peak_rss, prover_time, proof_size, verifier_time, guest_cycles, inline_lengths, wrapping_cost) and the measure-objectives/optimize binaries. Add 2 codebase quality objectives using tokei CLI: - lloc: total lines of Rust code in jolt-core/src/ - comment_density: comments/code ratio in jolt-core/src/ Clean up guests/ module: remove catalog.rs, SharedSetup, serialize/deserialize_proof, and other cruft only used by the old objectives. guests/ now only contains TestCase and type aliases. Co-Authored-By: Claude Opus 4.6 (1M context) --- Cargo.lock | 11 +- jolt-eval/Cargo.toml | 12 -- jolt-eval/bin/measure_objectives.rs | 91 --------- jolt-eval/bin/optimize.rs | 203 -------------------- jolt-eval/guest-sandbox/.gitignore | 1 + jolt-eval/src/guests/catalog.rs | 170 ----------------- jolt-eval/src/guests/mod.rs | 54 ------ jolt-eval/src/lib.rs | 35 +--- jolt-eval/src/objective/comment_density.rs | 39 ++++ jolt-eval/src/objective/guest_cycles.rs | 36 ---- jolt-eval/src/objective/inline_lengths.rs | 47 ----- jolt-eval/src/objective/lloc.rs | 68 +++++++ jolt-eval/src/objective/mod.rs | 209 ++------------------- jolt-eval/src/objective/peak_rss.rs | 67 ------- jolt-eval/src/objective/proof_size.rs | 47 ----- jolt-eval/src/objective/prover_time.rs | 52 ----- jolt-eval/src/objective/verifier_time.rs | 58 ------ jolt-eval/src/objective/wrapping_cost.rs | 43 ----- jolt-eval/tests/integration.rs | 38 ++-- 19 files changed, 153 insertions(+), 1128 deletions(-) delete mode 100644 jolt-eval/bin/measure_objectives.rs delete mode 100644 jolt-eval/bin/optimize.rs create mode 100644 jolt-eval/guest-sandbox/.gitignore delete mode 100644 jolt-eval/src/guests/catalog.rs create mode 100644 jolt-eval/src/objective/comment_density.rs delete mode 100644 jolt-eval/src/objective/guest_cycles.rs delete mode 100644 jolt-eval/src/objective/inline_lengths.rs create mode 100644 jolt-eval/src/objective/lloc.rs delete mode 100644 jolt-eval/src/objective/peak_rss.rs delete mode 100644 jolt-eval/src/objective/proof_size.rs delete mode 100644 jolt-eval/src/objective/prover_time.rs delete mode 100644 jolt-eval/src/objective/verifier_time.rs delete mode 100644 jolt-eval/src/objective/wrapping_cost.rs diff --git a/Cargo.lock b/Cargo.lock index 7bbdb1ff9..ccadbcbc7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2946,7 +2946,6 @@ version = "0.1.0" dependencies = [ "arbitrary", "ark-bn254", - "ark-serialize 0.5.0", "clap", "common", "enumset", @@ -2955,13 +2954,10 @@ dependencies = [ "jolt-eval-macros", "postcard", "rand 0.8.5", - "rayon", "schemars 0.8.22", "serde", "serde_json", - "sysinfo", "tempfile", - "thiserror 2.0.18", "tracer", "tracing", "tracing-subscriber", @@ -4991,6 +4987,13 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "sandbox-guest" +version = "0.1.0" +dependencies = [ + "jolt-sdk", +] + [[package]] name = "scc" version = "2.4.0" diff --git a/jolt-eval/Cargo.toml b/jolt-eval/Cargo.toml index e4ee0539a..4081be8dd 100644 --- a/jolt-eval/Cargo.toml +++ b/jolt-eval/Cargo.toml @@ -9,17 +9,13 @@ common = { workspace = true, features = ["std"] } tracer = { workspace = true } ark-bn254 = { workspace = true } -ark-serialize = { workspace = true } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true, features = ["std"] } postcard = { workspace = true, features = ["use-std"] } -thiserror = { workspace = true } eyre = { workspace = true } tracing = { workspace = true } clap = { workspace = true, features = ["derive"] } -rayon = { workspace = true } rand = { workspace = true } -sysinfo = { workspace = true } tracing-subscriber = { workspace = true } arbitrary = { version = "1", features = ["derive"] } @@ -29,14 +25,6 @@ tempfile = "3" jolt-eval-macros = { path = "macros" } -[[bin]] -name = "measure-objectives" -path = "bin/measure_objectives.rs" - [[bin]] name = "redteam" path = "bin/redteam.rs" - -[[bin]] -name = "optimize" -path = "bin/optimize.rs" diff --git a/jolt-eval/bin/measure_objectives.rs b/jolt-eval/bin/measure_objectives.rs deleted file mode 100644 index 365322f54..000000000 --- a/jolt-eval/bin/measure_objectives.rs +++ /dev/null @@ -1,91 +0,0 @@ -use clap::Parser; - -use jolt_eval::guests; -use jolt_eval::objective::{build_objectives_from_inventory, registered_objectives}; -use jolt_eval::SharedSetup; - -#[derive(Parser)] -#[command(name = "measure-objectives")] -#[command(about = "Measure Jolt performance objectives")] -struct Cli { - /// Guest program to evaluate (e.g. muldiv, fibonacci, sha2) - #[arg(long)] - guest: Option, - - /// Path to a pre-compiled guest ELF (alternative to --guest) - #[arg(long)] - elf: Option, - - /// Only measure the named objective (default: all) - #[arg(long)] - objective: Option, - - /// Number of samples per objective - #[arg(long)] - samples: Option, - - /// Max trace length override - #[arg(long)] - max_trace_length: Option, -} - -fn main() -> eyre::Result<()> { - tracing_subscriber::fmt::init(); - let cli = Cli::parse(); - - let (test_case, default_inputs) = guests::resolve_test_case( - cli.guest.as_deref(), - cli.elf.as_deref(), - cli.max_trace_length, - ); - - let setup = SharedSetup::new_from_arc(test_case); - let objectives = build_objectives_from_inventory(Some(&setup), default_inputs); - - let filtered: Vec<_> = if let Some(name) = &cli.objective { - objectives - .into_iter() - .filter(|o| o.name() == name.as_str()) - .collect() - } else { - objectives - }; - - if filtered.is_empty() { - let all_names: Vec<_> = registered_objectives().map(|e| e.name).collect(); - eprintln!( - "No matching objectives. Available: {}", - all_names.join(", ") - ); - std::process::exit(1); - } - - println!("{:<25} {:>15} {:>10}", "Objective", "Value", "Direction"); - println!("{}", "-".repeat(52)); - - for obj in &filtered { - let samples = cli.samples.unwrap_or(1); - let mut measurements = Vec::new(); - - for _ in 0..samples { - match obj.collect_measurement() { - Ok(val) => measurements.push(val), - Err(e) => { - println!("{:<25} {:>15}", obj.name(), format!("ERROR: {e}")); - continue; - } - } - } - - if !measurements.is_empty() { - let mean = measurements.iter().sum::() / measurements.len() as f64; - let dir = match obj.direction() { - jolt_eval::Direction::Minimize => "min", - jolt_eval::Direction::Maximize => "max", - }; - println!("{:<25} {:>15.2} {:>10}", obj.name(), mean, dir); - } - } - - Ok(()) -} diff --git a/jolt-eval/bin/optimize.rs b/jolt-eval/bin/optimize.rs deleted file mode 100644 index f22c9dd70..000000000 --- a/jolt-eval/bin/optimize.rs +++ /dev/null @@ -1,203 +0,0 @@ -use std::collections::HashMap; -use std::process::Command; - -use clap::Parser; - -use jolt_eval::agent::ClaudeCodeAgent; -use jolt_eval::guests; -use jolt_eval::invariant::JoltInvariants; -use jolt_eval::objective::optimize::{auto_optimize, OptimizeConfig, OptimizeEnv}; -use jolt_eval::objective::{ - build_objectives_from_inventory, measure_dyn, AbstractObjective, Direction, -}; -use jolt_eval::SharedSetup; - -#[derive(Parser)] -#[command(name = "optimize")] -#[command(about = "AI-driven optimization of Jolt objectives")] -struct Cli { - /// Guest program to evaluate (e.g. muldiv, fibonacci, sha2) - #[arg(long)] - guest: Option, - - /// Path to a pre-compiled guest ELF (alternative to --guest) - #[arg(long)] - elf: Option, - - /// Objectives to optimize (comma-separated). Default: all. - #[arg(long)] - objectives: Option, - - /// Number of optimization iterations - #[arg(long, default_value = "5")] - iterations: usize, - - /// AI model to use - #[arg(long, default_value = "claude-sonnet-4-20250514")] - model: String, - - /// Maximum number of Claude agentic turns per iteration - #[arg(long, default_value = "30")] - max_turns: usize, - - /// Extra context to include in the optimization prompt - #[arg(long)] - hint: Option, - - /// Max trace length override - #[arg(long)] - max_trace_length: Option, -} - -struct RealEnv { - objectives: Vec>, - invariants: Vec, - repo_dir: std::path::PathBuf, -} - -impl OptimizeEnv for RealEnv { - fn measure(&mut self) -> HashMap { - measure_dyn(&self.objectives) - } - - fn check_invariants(&mut self) -> bool { - self.invariants.iter().all(|inv| { - let results = inv.run_checks(0); - results.iter().all(|r| r.is_ok()) - }) - } - - fn directions(&self) -> HashMap { - self.objectives - .iter() - .map(|o| (o.name().to_string(), o.direction())) - .collect() - } - - fn apply_diff(&mut self, diff: &str) { - if let Err(e) = jolt_eval::agent::apply_diff(&self.repo_dir, diff) { - tracing::warn!("Failed to apply diff: {e}"); - } - } - - fn accept(&mut self, iteration: usize) { - println!(" Improvement found -- keeping changes."); - let _ = Command::new("git") - .current_dir(&self.repo_dir) - .args(["add", "-A"]) - .status(); - let msg = format!("perf(auto-optimize): iteration {iteration}"); - let _ = Command::new("git") - .current_dir(&self.repo_dir) - .args(["commit", "-m", &msg, "--allow-empty"]) - .status(); - } - - fn reject(&mut self) { - println!(" Reverting changes."); - let _ = Command::new("git") - .current_dir(&self.repo_dir) - .args(["checkout", "."]) - .status(); - } -} - -fn main() -> eyre::Result<()> { - tracing_subscriber::fmt::init(); - let cli = Cli::parse(); - - let (test_case, default_inputs) = guests::resolve_test_case( - cli.guest.as_deref(), - cli.elf.as_deref(), - cli.max_trace_length, - ); - - let setup = SharedSetup::new_from_arc(test_case.clone()); - let all_objectives = build_objectives_from_inventory(Some(&setup), default_inputs.clone()); - let all_names: Vec = all_objectives - .iter() - .map(|o| o.name().to_string()) - .collect(); - - let filter_names: Option> = cli - .objectives - .as_ref() - .map(|s| s.split(',').map(|n| n.trim().to_string()).collect()); - - let objectives: Vec> = if let Some(names) = &filter_names { - all_objectives - .into_iter() - .filter(|o| names.contains(&o.name().to_string())) - .collect() - } else { - all_objectives - }; - - if objectives.is_empty() { - eprintln!( - "No matching objectives. Available: {}", - all_names.join(", ") - ); - std::process::exit(1); - } - - let invariants = JoltInvariants::all(); - let repo_dir = std::env::current_dir()?; - - let mut env = RealEnv { - objectives, - invariants, - repo_dir: repo_dir.clone(), - }; - - println!("=== Baseline measurements ==="); - let baseline = env.measure(); - print_measurements(&env.directions(), &baseline); - println!(); - - let agent = ClaudeCodeAgent::new(&cli.model, cli.max_turns); - let config = OptimizeConfig { - num_iterations: cli.iterations, - hint: cli.hint.clone(), - }; - - let result = auto_optimize(&agent, &mut env, &config, &repo_dir); - - println!("=== Optimization summary ==="); - println!( - "{}/{} iterations produced improvements.", - result - .attempts - .iter() - .filter(|a| a.invariants_passed - && a.measurements - .iter() - .any(|(name, &val)| { result.baseline.get(name).is_some_and(|&b| val < b) })) - .count(), - result.attempts.len() - ); - println!(); - println!("Final measurements:"); - print_measurements(&env.directions(), &result.best); - - Ok(()) -} - -fn print_measurements( - directions: &HashMap, - measurements: &HashMap, -) { - let mut names: Vec<_> = directions.keys().collect(); - names.sort(); - for name in names { - let val = measurements - .get(name) - .map(|v| format!("{v:.4}")) - .unwrap_or_else(|| "N/A".to_string()); - let dir = match directions[name] { - Direction::Minimize => "min", - Direction::Maximize => "max", - }; - println!(" {:<25} {:>15} {:>6}", name, val, dir); - } -} diff --git a/jolt-eval/guest-sandbox/.gitignore b/jolt-eval/guest-sandbox/.gitignore new file mode 100644 index 000000000..b83d22266 --- /dev/null +++ b/jolt-eval/guest-sandbox/.gitignore @@ -0,0 +1 @@ +/target/ diff --git a/jolt-eval/src/guests/catalog.rs b/jolt-eval/src/guests/catalog.rs deleted file mode 100644 index 18901669d..000000000 --- a/jolt-eval/src/guests/catalog.rs +++ /dev/null @@ -1,170 +0,0 @@ -use std::sync::Arc; - -use common::constants::{DEFAULT_MAX_TRUSTED_ADVICE_SIZE, DEFAULT_MAX_UNTRUSTED_ADVICE_SIZE}; -use common::jolt_device::MemoryConfig; -use jolt_core::host::Program; - -use super::TestCase; - -/// A known guest program that jolt-eval can compile and run. -pub struct GuestSpec { - /// Cargo package name of the guest crate (e.g. "muldiv-guest"). - pub package: &'static str, - /// Short name used in CLI `--guest` flags. - pub name: &'static str, - pub heap_size: u64, - pub stack_size: u64, - pub max_input_size: u64, - pub max_output_size: u64, - pub max_trace_length: usize, - /// Default inputs to serialize and pass to the guest program. - pub default_inputs: fn() -> Vec, -} - -impl GuestSpec { - pub fn memory_config(&self) -> MemoryConfig { - MemoryConfig { - max_input_size: self.max_input_size, - max_output_size: self.max_output_size, - max_untrusted_advice_size: DEFAULT_MAX_UNTRUSTED_ADVICE_SIZE, - max_trusted_advice_size: DEFAULT_MAX_TRUSTED_ADVICE_SIZE, - stack_size: self.stack_size, - heap_size: self.heap_size, - program_size: None, - } - } - - pub fn compile(&self, target_dir: &str) -> TestCase { - let mut program = Program::new(self.package); - program.set_memory_config(self.memory_config()); - program.build(target_dir); - let elf_bytes = program - .get_elf_contents() - .expect("guest ELF not found after build"); - TestCase { - elf_contents: elf_bytes, - memory_config: self.memory_config(), - max_trace_length: self.max_trace_length, - } - } -} - -pub static GUESTS: &[GuestSpec] = &[ - GuestSpec { - package: "muldiv-guest", - name: "muldiv", - heap_size: 32768, - stack_size: 4096, - max_input_size: 4096, - max_output_size: 4096, - max_trace_length: 65536, - default_inputs: || postcard::to_stdvec(&(12031293u32, 17u32, 92u32)).unwrap(), - }, - GuestSpec { - package: "fibonacci-guest", - name: "fibonacci", - heap_size: 32768, - stack_size: 4096, - max_input_size: 4096, - max_output_size: 4096, - max_trace_length: 65536, - default_inputs: || postcard::to_stdvec(&100u32).unwrap(), - }, - GuestSpec { - package: "sha2-guest", - name: "sha2", - heap_size: 32768, - stack_size: 4096, - max_input_size: 4096, - max_output_size: 4096, - max_trace_length: 65536, - default_inputs: || postcard::to_stdvec(&vec![5u8; 32]).unwrap(), - }, - GuestSpec { - package: "sha3-guest", - name: "sha3", - heap_size: 32768, - stack_size: 4096, - max_input_size: 4096, - max_output_size: 4096, - max_trace_length: 65536, - default_inputs: || postcard::to_stdvec(&vec![5u8; 32]).unwrap(), - }, - GuestSpec { - package: "collatz-guest", - name: "collatz", - heap_size: 32768, - stack_size: 4096, - max_input_size: 4096, - max_output_size: 4096, - max_trace_length: 1048576, - default_inputs: || postcard::to_stdvec(&19u32).unwrap(), - }, - GuestSpec { - package: "alloc-guest", - name: "alloc", - heap_size: 32768, - stack_size: 4096, - max_input_size: 4096, - max_output_size: 4096, - max_trace_length: 65536, - default_inputs: Vec::new, - }, -]; - -pub fn find_guest(name: &str) -> Option<&'static GuestSpec> { - GUESTS.iter().find(|g| g.name == name) -} - -pub fn guest_names() -> Vec<&'static str> { - GUESTS.iter().map(|g| g.name).collect() -} - -/// Resolve a `TestCase` from either `--guest ` or `--elf `. -pub fn resolve_test_case( - guest: Option<&str>, - elf: Option<&str>, - max_trace_length_override: Option, -) -> (Arc, Vec) { - if let Some(name) = guest { - let spec = find_guest(name).unwrap_or_else(|| { - eprintln!( - "Unknown guest '{name}'. Available: {}", - guest_names().join(", ") - ); - std::process::exit(1); - }); - let mut tc = spec.compile("/tmp/jolt-guest-targets"); - if let Some(mtl) = max_trace_length_override { - tc.max_trace_length = mtl; - } - let inputs = (spec.default_inputs)(); - (Arc::new(tc), inputs) - } else if let Some(path) = elf { - let elf_bytes = std::fs::read(path).unwrap_or_else(|e| { - eprintln!("Failed to read ELF {path}: {e}"); - std::process::exit(1); - }); - let tc = TestCase { - elf_contents: elf_bytes, - memory_config: MemoryConfig { - max_input_size: 4096, - max_output_size: 4096, - max_untrusted_advice_size: DEFAULT_MAX_UNTRUSTED_ADVICE_SIZE, - max_trusted_advice_size: DEFAULT_MAX_TRUSTED_ADVICE_SIZE, - stack_size: 65536, - heap_size: 32768, - program_size: None, - }, - max_trace_length: max_trace_length_override.unwrap_or(65536), - }; - (Arc::new(tc), vec![]) - } else { - eprintln!( - "Provide either --guest or --elf .\n\ - Available guests: {}", - guest_names().join(", ") - ); - std::process::exit(1); - } -} diff --git a/jolt-eval/src/guests/mod.rs b/jolt-eval/src/guests/mod.rs index 93dcb0548..82fb80255 100644 --- a/jolt-eval/src/guests/mod.rs +++ b/jolt-eval/src/guests/mod.rs @@ -1,18 +1,10 @@ -mod catalog; - -use std::sync::Arc; - use ark_bn254::Fr; -use ark_serialize::{CanonicalDeserialize, CanonicalSerialize}; use jolt_core::curve::Bn254Curve; use jolt_core::poly::commitment::dory::DoryCommitmentScheme; use jolt_core::transcripts::Blake2bTranscript; -pub use catalog::{find_guest, guest_names, resolve_test_case, GuestSpec, GUESTS}; pub use jolt_core::guest::program::Program as GuestProgram; -pub use jolt_core::poly::commitment::commitment_scheme::CommitmentScheme; pub use jolt_core::utils::errors::ProofVerifyError; -pub use jolt_core::zkvm::Serializable; pub use tracer::JoltDevice; pub type F = Fr; @@ -35,14 +27,6 @@ pub struct TestCase { } impl TestCase { - pub fn new(program: GuestProgram, max_trace_length: usize) -> Self { - Self { - elf_contents: program.elf_contents, - memory_config: program.memory_config, - max_trace_length, - } - } - pub fn make_program(&self) -> GuestProgram { GuestProgram::new(&self.elf_contents, &self.memory_config) } @@ -118,41 +102,3 @@ impl TestCase { verifier.verify() } } - -/// Serialize a proof to bytes. -pub fn serialize_proof(proof: &Proof) -> Vec { - let mut buf = Vec::new(); - proof - .serialize_compressed(&mut buf) - .expect("proof serialization failed"); - buf -} - -/// Deserialize a proof from bytes. -pub fn deserialize_proof(bytes: &[u8]) -> Result { - Proof::deserialize_compressed(bytes) -} - -/// Shared setup reusable across multiple invariants/objectives -/// operating on the same program. -pub struct SharedSetup { - pub test_case: Arc, - pub prover_preprocessing: Arc, - pub verifier_preprocessing: Arc, -} - -impl SharedSetup { - pub fn new(test_case: TestCase) -> Self { - Self::new_from_arc(Arc::new(test_case)) - } - - pub fn new_from_arc(test_case: Arc) -> Self { - let prover_pp = test_case.prover_preprocessing(); - let verifier_pp = TestCase::verifier_preprocessing(&prover_pp); - Self { - test_case, - prover_preprocessing: Arc::new(prover_pp), - verifier_preprocessing: Arc::new(verifier_pp), - } - } -} diff --git a/jolt-eval/src/lib.rs b/jolt-eval/src/lib.rs index c2905555f..f266f9486 100644 --- a/jolt-eval/src/lib.rs +++ b/jolt-eval/src/lib.rs @@ -8,11 +8,7 @@ pub mod guests; pub mod invariant; pub mod objective; -pub use guests::{ - deserialize_proof, serialize_proof, CommitmentScheme, GuestProgram, JoltDevice, Proof, - ProofVerifyError, ProverPreprocessing, Serializable, SharedSetup, TestCase, - VerifierPreprocessing, F, FS, PCS, -}; +pub use guests::{JoltDevice, ProofVerifyError, TestCase}; pub use invariant::{ CheckError, Invariant, InvariantTargets, InvariantViolation, JoltInvariants, SynthesisTarget, }; @@ -21,32 +17,3 @@ pub use objective::{AbstractObjective, Direction, MeasurementError, Objective}; // Re-exports used by the #[invariant] proc macro generated code. pub use arbitrary; pub use rand; - -/// Run all provided invariants, returning results keyed by name. -pub fn check_all_invariants( - invariants: &[JoltInvariants], - num_random: usize, -) -> std::collections::HashMap>> { - invariants - .iter() - .map(|inv| { - let name = inv.name().to_string(); - let results = inv.run_checks(num_random); - (name, results) - }) - .collect() -} - -/// Measure all provided objectives, returning results keyed by name. -pub fn measure_all_objectives( - objectives: &[Objective], -) -> std::collections::HashMap> { - objectives - .iter() - .map(|obj| { - let name = obj.name().to_string(); - let result = obj.collect_measurement(); - (name, result) - }) - .collect() -} diff --git a/jolt-eval/src/objective/comment_density.rs b/jolt-eval/src/objective/comment_density.rs new file mode 100644 index 000000000..620646eea --- /dev/null +++ b/jolt-eval/src/objective/comment_density.rs @@ -0,0 +1,39 @@ +use std::path::{Path, PathBuf}; + +use super::lloc::TokeiStats; +use super::{AbstractObjective, Direction, MeasurementError}; + +/// Comment density (comments / code) across `jolt-core/src/`. +/// +/// Higher is better — more documentation relative to code. +pub struct CommentDensityObjective { + root: PathBuf, +} + +impl CommentDensityObjective { + pub fn new(root: &Path) -> Self { + Self { + root: root.to_path_buf(), + } + } +} + +impl AbstractObjective for CommentDensityObjective { + fn name(&self) -> &str { + "comment_density" + } + + fn collect_measurement(&self) -> Result { + let TokeiStats { + code, comments, .. + } = super::lloc::tokei_rust_stats(&self.root.join("jolt-core/src"))?; + if code == 0 { + return Ok(0.0); + } + Ok(comments as f64 / code as f64) + } + + fn direction(&self) -> Direction { + Direction::Maximize + } +} diff --git a/jolt-eval/src/objective/guest_cycles.rs b/jolt-eval/src/objective/guest_cycles.rs deleted file mode 100644 index 40ff488a3..000000000 --- a/jolt-eval/src/objective/guest_cycles.rs +++ /dev/null @@ -1,36 +0,0 @@ -use std::sync::Arc; - -use super::{AbstractObjective, Direction, MeasurementError}; -use crate::TestCase; - -/// Measures guest instruction cycle count via program tracing. -pub struct GuestCycleCountObjective { - pub test_case: Arc, - pub inputs: Vec, -} - -impl GuestCycleCountObjective { - pub fn new(test_case: Arc, inputs: Vec) -> Self { - Self { test_case, inputs } - } -} - -impl AbstractObjective for GuestCycleCountObjective { - fn name(&self) -> &str { - "guest_cycle_count" - } - - fn collect_measurement(&self) -> Result { - let program = self.test_case.make_program(); - let (_lazy_trace, trace, _memory, _io) = program.trace(&self.inputs, &[], &[]); - Ok(trace.len() as f64) - } - - fn recommended_samples(&self) -> usize { - 1 - } - - fn direction(&self) -> Direction { - Direction::Minimize - } -} diff --git a/jolt-eval/src/objective/inline_lengths.rs b/jolt-eval/src/objective/inline_lengths.rs deleted file mode 100644 index 54136a3b1..000000000 --- a/jolt-eval/src/objective/inline_lengths.rs +++ /dev/null @@ -1,47 +0,0 @@ -use std::sync::Arc; - -use super::{AbstractObjective, Direction, MeasurementError}; -use crate::TestCase; - -/// Measures total virtual/inline sequence length in the decoded bytecode. -/// -/// Inline sequences replace guest-side computation with constraint-native -/// implementations, so their total length reflects how much of the program -/// is handled by optimized inline instructions. -pub struct InlineLengthsObjective { - pub test_case: Arc, -} - -impl InlineLengthsObjective { - pub fn new(test_case: Arc) -> Self { - Self { test_case } - } -} - -impl AbstractObjective for InlineLengthsObjective { - fn name(&self) -> &str { - "inline_lengths" - } - - fn collect_measurement(&self) -> Result { - let program = self.test_case.make_program(); - let (instructions, _memory_init, _program_size, _entry) = program.decode(); - - // Count INLINE instructions (optimized constraint-native operations) - let total_inline_length: usize = instructions - .iter() - .filter(|instr| matches!(instr, tracer::instruction::Instruction::INLINE(_))) - .count(); - - Ok(total_inline_length as f64) - } - - fn recommended_samples(&self) -> usize { - 1 - } - - fn direction(&self) -> Direction { - // More inlines generally means more efficient execution - Direction::Maximize - } -} diff --git a/jolt-eval/src/objective/lloc.rs b/jolt-eval/src/objective/lloc.rs new file mode 100644 index 000000000..825325d5b --- /dev/null +++ b/jolt-eval/src/objective/lloc.rs @@ -0,0 +1,68 @@ +use std::path::{Path, PathBuf}; +use std::process::Command; + +use super::{AbstractObjective, Direction, MeasurementError}; + +/// Total lines of Rust code (excluding comments and blanks) across +/// `jolt-core/src/`, as reported by `tokei`. +pub struct LlocObjective { + root: PathBuf, +} + +impl LlocObjective { + pub fn new(root: &Path) -> Self { + Self { + root: root.to_path_buf(), + } + } +} + +impl AbstractObjective for LlocObjective { + fn name(&self) -> &str { + "lloc" + } + + fn collect_measurement(&self) -> Result { + let stats = tokei_rust_stats(&self.root.join("jolt-core/src"))?; + Ok(stats.code as f64) + } + + fn direction(&self) -> Direction { + Direction::Minimize + } +} + +pub(crate) struct TokeiStats { + pub code: u64, + pub comments: u64, +} + +/// Run `tokei --type Rust -o json` on a directory and parse the result. +pub(crate) fn tokei_rust_stats(dir: &Path) -> Result { + let output = Command::new("tokei") + .arg(dir) + .args(["--type", "Rust", "-o", "json"]) + .output() + .map_err(|e| { + MeasurementError::new(format!( + "tokei: {e}. Install via: cargo install tokei" + )) + })?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(MeasurementError::new(format!("tokei failed: {stderr}"))); + } + + let json: serde_json::Value = serde_json::from_slice(&output.stdout) + .map_err(|e| MeasurementError::new(format!("tokei JSON parse: {e}")))?; + + let rust = json + .get("Rust") + .ok_or_else(|| MeasurementError::new("no Rust section in tokei output"))?; + + Ok(TokeiStats { + code: rust["code"].as_u64().unwrap_or(0), + comments: rust["comments"].as_u64().unwrap_or(0), + }) +} diff --git a/jolt-eval/src/objective/mod.rs b/jolt-eval/src/objective/mod.rs index 153962bae..1788d48c5 100644 --- a/jolt-eval/src/objective/mod.rs +++ b/jolt-eval/src/objective/mod.rs @@ -1,16 +1,9 @@ -pub mod guest_cycles; -pub mod inline_lengths; +pub mod comment_density; +pub mod lloc; pub mod optimize; -pub mod peak_rss; -pub mod proof_size; -pub mod prover_time; -pub mod verifier_time; -pub mod wrapping_cost; -use std::collections::HashMap; use std::fmt; - -use crate::SharedSetup; +use std::path::Path; /// Whether lower or higher values are better. #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -44,195 +37,42 @@ impl MeasurementError { /// Core objective trait for measurable properties. pub trait AbstractObjective: Send + Sync { fn name(&self) -> &str; - - /// Take a single measurement and return its scalar value. fn collect_measurement(&self) -> Result; - - /// How many samples to take for statistical significance. - fn recommended_samples(&self) -> usize { - 1 - } - - /// What threshold is considered a regression (e.g. 0.05 = 5% slowdown). - fn regression_threshold(&self) -> Option { - None - } - fn direction(&self) -> Direction; } -pub struct ObjectiveEntry { - pub name: &'static str, - pub direction: Direction, - /// Whether this objective requires a compiled guest program. - pub needs_guest: bool, - pub build: fn(Option<&SharedSetup>, Vec) -> Box, -} - -/// All registered objective entries. -pub fn registered_objectives() -> impl Iterator { - [ - ObjectiveEntry { - name: "peak_rss", - direction: Direction::Minimize, - needs_guest: true, - build: |s, inputs| { - let setup = s.unwrap(); - Box::new(peak_rss::PeakRssObjective::new( - setup.test_case.clone(), - setup.prover_preprocessing.clone(), - inputs, - )) - }, - }, - ObjectiveEntry { - name: "prover_time", - direction: Direction::Minimize, - needs_guest: true, - build: |s, inputs| { - let setup = s.unwrap(); - Box::new(prover_time::ProverTimeObjective::new( - setup.test_case.clone(), - setup.prover_preprocessing.clone(), - inputs, - )) - }, - }, - ObjectiveEntry { - name: "proof_size", - direction: Direction::Minimize, - needs_guest: true, - build: |s, inputs| { - let setup = s.unwrap(); - Box::new(proof_size::ProofSizeObjective::new( - setup.test_case.clone(), - setup.prover_preprocessing.clone(), - inputs, - )) - }, - }, - ObjectiveEntry { - name: "verifier_time", - direction: Direction::Minimize, - needs_guest: true, - build: |s, inputs| { - let setup = s.unwrap(); - Box::new(verifier_time::VerifierTimeObjective::new( - setup.test_case.clone(), - setup.prover_preprocessing.clone(), - setup.verifier_preprocessing.clone(), - inputs, - )) - }, - }, - ObjectiveEntry { - name: "guest_cycle_count", - direction: Direction::Minimize, - needs_guest: true, - build: |s, inputs| { - let setup = s.unwrap(); - Box::new(guest_cycles::GuestCycleCountObjective::new( - setup.test_case.clone(), - inputs, - )) - }, - }, - ObjectiveEntry { - name: "inline_lengths", - direction: Direction::Maximize, - needs_guest: true, - build: |s, _inputs| { - let setup = s.unwrap(); - Box::new(inline_lengths::InlineLengthsObjective::new( - setup.test_case.clone(), - )) - }, - }, - ObjectiveEntry { - name: "wrapping_cost", - direction: Direction::Minimize, - needs_guest: true, - build: |s, _inputs| { - let setup = s.unwrap(); - Box::new(wrapping_cost::WrappingCostObjective::new( - setup.test_case.clone(), - setup.prover_preprocessing.clone(), - )) - }, - }, - ] - .into_iter() -} - -/// Build all registered objectives from a [`SharedSetup`]. -/// -/// Pass `None` to include only objectives that don't require a guest. -pub fn build_objectives_from_inventory( - setup: Option<&SharedSetup>, - inputs: Vec, -) -> Vec> { - registered_objectives() - .filter(|entry| !entry.needs_guest || setup.is_some()) - .map(|entry| (entry.build)(setup, inputs.clone())) - .collect() -} - -/// Measure a list of trait-object objectives. -pub fn measure_dyn(objectives: &[Box]) -> HashMap { - objectives - .iter() - .filter_map(|obj| { - let name = obj.name().to_string(); - obj.collect_measurement().ok().map(|v| (name, v)) - }) - .collect() -} - /// Centralized objective enum dispatching to concrete implementations. pub enum Objective { - PeakRss(peak_rss::PeakRssObjective), - ProverTime(prover_time::ProverTimeObjective), - ProofSize(proof_size::ProofSizeObjective), - VerifierTime(verifier_time::VerifierTimeObjective), - GuestCycleCount(guest_cycles::GuestCycleCountObjective), - InlineLengths(inline_lengths::InlineLengthsObjective), - WrappingCost(wrapping_cost::WrappingCostObjective), + Lloc(lloc::LlocObjective), + CommentDensity(comment_density::CommentDensityObjective), } impl Objective { + pub fn all(root: &Path) -> Vec { + vec![ + Self::Lloc(lloc::LlocObjective::new(root)), + Self::CommentDensity(comment_density::CommentDensityObjective::new(root)), + ] + } + pub fn name(&self) -> &str { match self { - Self::PeakRss(o) => o.name(), - Self::ProverTime(o) => o.name(), - Self::ProofSize(o) => o.name(), - Self::VerifierTime(o) => o.name(), - Self::GuestCycleCount(o) => o.name(), - Self::InlineLengths(o) => o.name(), - Self::WrappingCost(o) => o.name(), + Self::Lloc(o) => o.name(), + Self::CommentDensity(o) => o.name(), } } pub fn collect_measurement(&self) -> Result { match self { - Self::PeakRss(o) => o.collect_measurement(), - Self::ProverTime(o) => o.collect_measurement(), - Self::ProofSize(o) => o.collect_measurement(), - Self::VerifierTime(o) => o.collect_measurement(), - Self::GuestCycleCount(o) => o.collect_measurement(), - Self::InlineLengths(o) => o.collect_measurement(), - Self::WrappingCost(o) => o.collect_measurement(), + Self::Lloc(o) => o.collect_measurement(), + Self::CommentDensity(o) => o.collect_measurement(), } } pub fn direction(&self) -> Direction { match self { - Self::PeakRss(o) => o.direction(), - Self::ProverTime(o) => o.direction(), - Self::ProofSize(o) => o.direction(), - Self::VerifierTime(o) => o.direction(), - Self::GuestCycleCount(o) => o.direction(), - Self::InlineLengths(o) => o.direction(), - Self::WrappingCost(o) => o.direction(), + Self::Lloc(o) => o.direction(), + Self::CommentDensity(o) => o.direction(), } } } @@ -241,17 +81,6 @@ impl Objective { pub struct OptimizationAttempt { pub description: String, pub diff: String, - pub measurements: HashMap, + pub measurements: std::collections::HashMap, pub invariants_passed: bool, } - -/// Measure all objectives and return a map of name -> value. -pub fn measure_objectives(objectives: &[Objective]) -> HashMap { - objectives - .iter() - .filter_map(|obj| { - let name = obj.name().to_string(); - obj.collect_measurement().ok().map(|v| (name, v)) - }) - .collect() -} diff --git a/jolt-eval/src/objective/peak_rss.rs b/jolt-eval/src/objective/peak_rss.rs deleted file mode 100644 index ca49f924a..000000000 --- a/jolt-eval/src/objective/peak_rss.rs +++ /dev/null @@ -1,67 +0,0 @@ -use std::sync::Arc; - -use sysinfo::{Pid, System}; - -use super::{AbstractObjective, Direction, MeasurementError}; -use crate::{ProverPreprocessing, TestCase}; - -/// Measures peak resident set size (RSS) during proving. -/// -/// Uses the `sysinfo` crate to sample memory before and after proving. -/// For more accurate results, run in an isolated process. -pub struct PeakRssObjective { - pub test_case: Arc, - pub prover_preprocessing: Arc, - pub inputs: Vec, -} - -impl PeakRssObjective { - pub fn new( - test_case: Arc, - prover_preprocessing: Arc, - inputs: Vec, - ) -> Self { - Self { - test_case, - prover_preprocessing, - inputs, - } - } -} - -impl AbstractObjective for PeakRssObjective { - fn name(&self) -> &str { - "peak_rss" - } - - fn collect_measurement(&self) -> Result { - let pid = Pid::from_u32(std::process::id()); - let mut sys = System::new(); - - sys.refresh_processes(sysinfo::ProcessesToUpdate::Some(&[pid]), true); - let rss_before = sys.process(pid).map(|p| p.memory()).unwrap_or(0); - - let (_proof, _io) = self - .test_case - .prove(&self.prover_preprocessing, &self.inputs); - - sys.refresh_processes(sysinfo::ProcessesToUpdate::Some(&[pid]), true); - let rss_after = sys.process(pid).map(|p| p.memory()).unwrap_or(0); - - // Report peak RSS in megabytes - let peak_mb = rss_after.max(rss_before) as f64 / (1024.0 * 1024.0); - Ok(peak_mb) - } - - fn recommended_samples(&self) -> usize { - 1 - } - - fn regression_threshold(&self) -> Option { - Some(0.10) - } - - fn direction(&self) -> Direction { - Direction::Minimize - } -} diff --git a/jolt-eval/src/objective/proof_size.rs b/jolt-eval/src/objective/proof_size.rs deleted file mode 100644 index 29211c2db..000000000 --- a/jolt-eval/src/objective/proof_size.rs +++ /dev/null @@ -1,47 +0,0 @@ -use std::sync::Arc; - -use super::{AbstractObjective, Direction, MeasurementError}; -use crate::{serialize_proof, ProverPreprocessing, TestCase}; - -/// Measures serialized proof size in bytes. -pub struct ProofSizeObjective { - pub test_case: Arc, - pub prover_preprocessing: Arc, - pub inputs: Vec, -} - -impl ProofSizeObjective { - pub fn new( - test_case: Arc, - prover_preprocessing: Arc, - inputs: Vec, - ) -> Self { - Self { - test_case, - prover_preprocessing, - inputs, - } - } -} - -impl AbstractObjective for ProofSizeObjective { - fn name(&self) -> &str { - "proof_size" - } - - fn collect_measurement(&self) -> Result { - let (proof, _io) = self - .test_case - .prove(&self.prover_preprocessing, &self.inputs); - let bytes = serialize_proof(&proof); - Ok(bytes.len() as f64) - } - - fn recommended_samples(&self) -> usize { - 1 - } - - fn direction(&self) -> Direction { - Direction::Minimize - } -} diff --git a/jolt-eval/src/objective/prover_time.rs b/jolt-eval/src/objective/prover_time.rs deleted file mode 100644 index 7b839f576..000000000 --- a/jolt-eval/src/objective/prover_time.rs +++ /dev/null @@ -1,52 +0,0 @@ -use std::sync::Arc; -use std::time::Instant; - -use super::{AbstractObjective, Direction, MeasurementError}; -use crate::{ProverPreprocessing, TestCase}; - -/// Measures wall-clock prover time in seconds. -pub struct ProverTimeObjective { - pub test_case: Arc, - pub prover_preprocessing: Arc, - pub inputs: Vec, -} - -impl ProverTimeObjective { - pub fn new( - test_case: Arc, - prover_preprocessing: Arc, - inputs: Vec, - ) -> Self { - Self { - test_case, - prover_preprocessing, - inputs, - } - } -} - -impl AbstractObjective for ProverTimeObjective { - fn name(&self) -> &str { - "prover_time" - } - - fn collect_measurement(&self) -> Result { - let start = Instant::now(); - let (_proof, _io) = self - .test_case - .prove(&self.prover_preprocessing, &self.inputs); - Ok(start.elapsed().as_secs_f64()) - } - - fn recommended_samples(&self) -> usize { - 3 - } - - fn regression_threshold(&self) -> Option { - Some(0.05) - } - - fn direction(&self) -> Direction { - Direction::Minimize - } -} diff --git a/jolt-eval/src/objective/verifier_time.rs b/jolt-eval/src/objective/verifier_time.rs deleted file mode 100644 index 1223f95a9..000000000 --- a/jolt-eval/src/objective/verifier_time.rs +++ /dev/null @@ -1,58 +0,0 @@ -use std::sync::Arc; -use std::time::Instant; - -use super::{AbstractObjective, Direction, MeasurementError}; -use crate::{ProverPreprocessing, TestCase, VerifierPreprocessing}; - -/// Measures wall-clock verifier time in seconds. -pub struct VerifierTimeObjective { - pub test_case: Arc, - pub prover_preprocessing: Arc, - pub verifier_preprocessing: Arc, - pub inputs: Vec, -} - -impl VerifierTimeObjective { - pub fn new( - test_case: Arc, - prover_preprocessing: Arc, - verifier_preprocessing: Arc, - inputs: Vec, - ) -> Self { - Self { - test_case, - prover_preprocessing, - verifier_preprocessing, - inputs, - } - } -} - -impl AbstractObjective for VerifierTimeObjective { - fn name(&self) -> &str { - "verifier_time" - } - - fn collect_measurement(&self) -> Result { - let (proof, io_device) = self - .test_case - .prove(&self.prover_preprocessing, &self.inputs); - - let start = Instant::now(); - TestCase::verify(&self.verifier_preprocessing, proof, &io_device) - .map_err(|e| MeasurementError::new(format!("Verification failed: {e}")))?; - Ok(start.elapsed().as_secs_f64()) - } - - fn recommended_samples(&self) -> usize { - 5 - } - - fn regression_threshold(&self) -> Option { - Some(0.05) - } - - fn direction(&self) -> Direction { - Direction::Minimize - } -} diff --git a/jolt-eval/src/objective/wrapping_cost.rs b/jolt-eval/src/objective/wrapping_cost.rs deleted file mode 100644 index dfcddb924..000000000 --- a/jolt-eval/src/objective/wrapping_cost.rs +++ /dev/null @@ -1,43 +0,0 @@ -use std::sync::Arc; - -use super::{AbstractObjective, Direction, MeasurementError}; -use crate::{ProverPreprocessing, TestCase}; - -/// Measures the "wrapping cost" as the total number of constraints in the R1CS. -/// -/// This is derived from the preprocessing data which encodes the constraint -/// structure. Lower constraint counts mean cheaper verification. -pub struct WrappingCostObjective { - pub test_case: Arc, - pub prover_preprocessing: Arc, -} - -impl WrappingCostObjective { - pub fn new(test_case: Arc, prover_preprocessing: Arc) -> Self { - Self { - test_case, - prover_preprocessing, - } - } -} - -impl AbstractObjective for WrappingCostObjective { - fn name(&self) -> &str { - "wrapping_cost" - } - - fn collect_measurement(&self) -> Result { - // The padded trace length from preprocessing reflects the constraint - // system size, which is the dominant factor in wrapping cost. - let max_padded = self.prover_preprocessing.shared.max_padded_trace_length; - Ok(max_padded as f64) - } - - fn recommended_samples(&self) -> usize { - 1 - } - - fn direction(&self) -> Direction { - Direction::Minimize - } -} diff --git a/jolt-eval/tests/integration.rs b/jolt-eval/tests/integration.rs index 23517e49f..46c9ccdd0 100644 --- a/jolt-eval/tests/integration.rs +++ b/jolt-eval/tests/integration.rs @@ -107,24 +107,22 @@ fn test_constant_objective() { } #[test] -fn test_measure_objectives() { - use jolt_eval::objective::measure_dyn; - - let objectives: Vec> = vec![ - Box::new(ConstantObjective { - label: "prover_time", - value: 3.125, - direction: Direction::Minimize, - }), - Box::new(ConstantObjective { - label: "inline_count", - value: 256.0, - direction: Direction::Maximize, - }), - ]; - - let results = measure_dyn(&objectives); - assert_eq!(results.len(), 2); - assert_eq!(results["prover_time"], 3.125); - assert_eq!(results["inline_count"], 256.0); +fn test_lloc_objective() { + let root = std::path::Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .unwrap(); + let obj = jolt_eval::objective::lloc::LlocObjective::new(root); + let lloc = obj.collect_measurement().unwrap(); + assert!(lloc > 1000.0, "LLOC should be > 1000, got {lloc}"); +} + +#[test] +fn test_comment_density_objective() { + let root = std::path::Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .unwrap(); + let obj = jolt_eval::objective::comment_density::CommentDensityObjective::new(root); + let density = obj.collect_measurement().unwrap(); + assert!(density > 0.0, "comment density should be > 0, got {density}"); + assert!(density < 1.0, "comment density should be < 1, got {density}"); } From 9b025b572cb4cb633948a6e89bece2c2f6478f08 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Thu, 2 Apr 2026 19:49:07 -0400 Subject: [PATCH 32/86] refactor(jolt-eval): use rust-code-analysis for code quality objectives Replace tokei-based objectives with rust-code-analysis v0.0.24 library. Three objectives, each with unit tests: - lloc: total logical lines of code (minimize) - cognitive_complexity_avg: average cognitive complexity per function, measures how hard code is to understand (minimize) - halstead_bugs: estimated delivered bugs based on Halstead program volume (minimize) Co-Authored-By: Claude Opus 4.6 (1M context) --- Cargo.lock | 186 ++++++++++++++++++++- jolt-eval/Cargo.toml | 2 + jolt-eval/src/objective/cognitive.rs | 93 +++++++++++ jolt-eval/src/objective/comment_density.rs | 39 ----- jolt-eval/src/objective/halstead_bugs.rs | 80 +++++++++ jolt-eval/src/objective/lloc.rs | 94 +++++++---- jolt-eval/src/objective/mod.rs | 18 +- jolt-eval/tests/integration.rs | 22 +-- 8 files changed, 439 insertions(+), 95 deletions(-) create mode 100644 jolt-eval/src/objective/cognitive.rs delete mode 100644 jolt-eval/src/objective/comment_density.rs create mode 100644 jolt-eval/src/objective/halstead_bugs.rs diff --git a/Cargo.lock b/Cargo.lock index ccadbcbc7..24cca2b72 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -65,6 +65,15 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "aho-corasick" +version = "0.7.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" +dependencies = [ + "memchr", +] + [[package]] name = "aho-corasick" version = "1.1.4" @@ -2205,6 +2214,15 @@ dependencies = [ "slab", ] +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + [[package]] name = "generic-array" version = "0.14.7" @@ -2918,7 +2936,7 @@ dependencies = [ "jolt-optimizations", "memory-stats", "num", - "num-derive", + "num-derive 0.4.2", "num-traits", "postcard", "pprof", @@ -2954,6 +2972,7 @@ dependencies = [ "jolt-eval-macros", "postcard", "rand 0.8.5", + "rust-code-analysis", "schemars 0.8.22", "serde", "serde_json", @@ -3549,6 +3568,17 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" +[[package]] +name = "num-derive" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "876a53fff98e03a936a674b29568b0e605f06b29372c2489ff4de23f1949743d" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "num-derive" version = "0.4.2" @@ -4576,7 +4606,7 @@ version = "1.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" dependencies = [ - "aho-corasick", + "aho-corasick 1.1.4", "memchr", "regex-automata", "regex-syntax", @@ -4588,7 +4618,7 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" dependencies = [ - "aho-corasick", + "aho-corasick 1.1.4", "memchr", "regex-syntax", ] @@ -4861,6 +4891,35 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48fd7bd8a6377e15ad9d42a8ec25371b94ddc67abe7c8b9127bec79bebaaae18" +[[package]] +name = "rust-code-analysis" +version = "0.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92a0f85e044428a7b58538f95fa58a157d89d5bcc5b37df6e7024957e52bdc5a" +dependencies = [ + "aho-corasick 0.7.20", + "fxhash", + "lazy_static", + "num", + "num-derive 0.3.3", + "num-format", + "num-traits", + "petgraph", + "regex", + "serde", + "termcolor", + "tree-sitter", + "tree-sitter-ccomment", + "tree-sitter-java", + "tree-sitter-javascript", + "tree-sitter-mozcpp", + "tree-sitter-mozjs", + "tree-sitter-preproc", + "tree-sitter-python", + "tree-sitter-rust", + "tree-sitter-typescript", +] + [[package]] name = "rustc-demangle" version = "0.1.27" @@ -5734,6 +5793,15 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + [[package]] name = "text-block-macros" version = "0.2.0" @@ -6067,6 +6135,118 @@ dependencies = [ "tracing-log", ] +[[package]] +name = "tree-sitter" +version = "0.19.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f41201fed3db3b520405a9c01c61773a250d4c3f43e9861c14b2bb232c981ab" +dependencies = [ + "cc", + "regex", +] + +[[package]] +name = "tree-sitter-ccomment" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3b402bc539927bb457e5ab59aac7260e2c3b97c5fcfc043575788654eedd69a" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-cpp" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7bd90c7b7db59369ed00fbc40458d9c9b2b8ed145640e337e839ac07aa63e15" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-java" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "301ae2ee7813e1bf935dc06db947642400645bbea8878431e1b31131488d5430" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-javascript" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "840bb4d5f3c384cb76b976ff07297f5a24b6e61a708baa4464f53e395caaa5f9" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-mozcpp" +version = "0.19.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5439f32b7685af19efcd0165d28dab80261e1cc922ed259c9c7909c96ac4cc6" +dependencies = [ + "cc", + "tree-sitter", + "tree-sitter-cpp", +] + +[[package]] +name = "tree-sitter-mozjs" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "def6b21c10157d3d79b912191fa4549008885da827451a62be9f30abeb7319c8" +dependencies = [ + "cc", + "tree-sitter", + "tree-sitter-javascript", +] + +[[package]] +name = "tree-sitter-preproc" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "226b2a77578e83efa7a193919660ffc88c22e357f9c2d9f27b5b11898a8682d3" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-python" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5646bfe71c4eb1c21b714ce0c38334c311eab767095582859e85da6281e9fd6c" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-rust" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "784f7ef9cdbd4c895dc2d4bb785e95b4a5364a602eec803681db83d1927ddf15" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-typescript" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3f62d49c6e56bf291c412ee5e178ea14dff40f14a5f01a8847933f56d65bf3b" +dependencies = [ + "cc", + "tree-sitter", +] + [[package]] name = "try-lock" version = "0.2.5" diff --git a/jolt-eval/Cargo.toml b/jolt-eval/Cargo.toml index 4081be8dd..c4c4484a3 100644 --- a/jolt-eval/Cargo.toml +++ b/jolt-eval/Cargo.toml @@ -23,6 +23,8 @@ enumset = "1" schemars = "0.8" tempfile = "3" +rust-code-analysis = "0.0.24" + jolt-eval-macros = { path = "macros" } [[bin]] diff --git a/jolt-eval/src/objective/cognitive.rs b/jolt-eval/src/objective/cognitive.rs new file mode 100644 index 000000000..8d07f5ce8 --- /dev/null +++ b/jolt-eval/src/objective/cognitive.rs @@ -0,0 +1,93 @@ +use std::path::{Path, PathBuf}; + +use rust_code_analysis::FuncSpace; + +use super::lloc::{analyze_rust_file, rust_files}; +use super::{AbstractObjective, Direction, MeasurementError}; + +/// Average cognitive complexity per function across all Rust files under +/// `jolt-core/src/`. +/// +/// Cognitive complexity measures how difficult code is to understand, +/// penalizing deeply nested control flow, recursion, and breaks in +/// linear flow. Lower is better. +pub struct CognitiveComplexityObjective { + root: PathBuf, +} + +impl CognitiveComplexityObjective { + pub fn new(root: &Path) -> Self { + Self { + root: root.to_path_buf(), + } + } +} + +impl AbstractObjective for CognitiveComplexityObjective { + fn name(&self) -> &str { + "cognitive_complexity_avg" + } + + fn collect_measurement(&self) -> Result { + let src_dir = self.root.join("jolt-core/src"); + let mut total = 0.0; + let mut count = 0usize; + for path in rust_files(&src_dir)? { + if let Some(space) = analyze_rust_file(&path) { + collect_leaf_cognitive(&space, &mut total, &mut count); + } + } + if count == 0 { + return Ok(0.0); + } + Ok(total / count as f64) + } + + fn direction(&self) -> Direction { + Direction::Minimize + } +} + +/// Walk the function-space tree and collect cognitive complexity from +/// leaf functions (functions with no child spaces). +fn collect_leaf_cognitive(space: &FuncSpace, total: &mut f64, count: &mut usize) { + if space.spaces.is_empty() { + let c = space.metrics.cognitive.cognitive(); + if c > 0.0 { + *total += c; + *count += 1; + } + } else { + for child in &space.spaces { + collect_leaf_cognitive(child, total, count); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn cognitive_on_jolt_core() { + let root = Path::new(env!("CARGO_MANIFEST_DIR")).parent().unwrap(); + let obj = CognitiveComplexityObjective::new(root); + let val = obj.collect_measurement().unwrap(); + assert!(val > 0.0, "avg cognitive should be > 0, got {val}"); + assert!(val < 100.0, "avg cognitive should be < 100, got {val}"); + } + + #[test] + fn cognitive_on_single_file() { + let source = b"fn simple() { let x = 1; }".to_vec(); + let path = Path::new("test.rs"); + let space = + rust_code_analysis::get_function_spaces(&rust_code_analysis::LANG::Rust, source, path, None) + .unwrap(); + let mut total = 0.0; + let mut count = 0; + collect_leaf_cognitive(&space, &mut total, &mut count); + // A straight-line function has 0 cognitive complexity + assert_eq!(total, 0.0); + } +} diff --git a/jolt-eval/src/objective/comment_density.rs b/jolt-eval/src/objective/comment_density.rs deleted file mode 100644 index 620646eea..000000000 --- a/jolt-eval/src/objective/comment_density.rs +++ /dev/null @@ -1,39 +0,0 @@ -use std::path::{Path, PathBuf}; - -use super::lloc::TokeiStats; -use super::{AbstractObjective, Direction, MeasurementError}; - -/// Comment density (comments / code) across `jolt-core/src/`. -/// -/// Higher is better — more documentation relative to code. -pub struct CommentDensityObjective { - root: PathBuf, -} - -impl CommentDensityObjective { - pub fn new(root: &Path) -> Self { - Self { - root: root.to_path_buf(), - } - } -} - -impl AbstractObjective for CommentDensityObjective { - fn name(&self) -> &str { - "comment_density" - } - - fn collect_measurement(&self) -> Result { - let TokeiStats { - code, comments, .. - } = super::lloc::tokei_rust_stats(&self.root.join("jolt-core/src"))?; - if code == 0 { - return Ok(0.0); - } - Ok(comments as f64 / code as f64) - } - - fn direction(&self) -> Direction { - Direction::Maximize - } -} diff --git a/jolt-eval/src/objective/halstead_bugs.rs b/jolt-eval/src/objective/halstead_bugs.rs new file mode 100644 index 000000000..0b3b6ae60 --- /dev/null +++ b/jolt-eval/src/objective/halstead_bugs.rs @@ -0,0 +1,80 @@ +use std::path::{Path, PathBuf}; + +use rust_code_analysis::FuncSpace; + +use super::lloc::{analyze_rust_file, rust_files}; +use super::{AbstractObjective, Direction, MeasurementError}; + +/// Estimated number of delivered bugs across all Rust files under +/// `jolt-core/src/`, based on Halstead's bug prediction formula +/// (B = V / 3000, where V is program volume). +/// +/// Lower is better. +pub struct HalsteadBugsObjective { + root: PathBuf, +} + +impl HalsteadBugsObjective { + pub fn new(root: &Path) -> Self { + Self { + root: root.to_path_buf(), + } + } +} + +impl AbstractObjective for HalsteadBugsObjective { + fn name(&self) -> &str { + "halstead_bugs" + } + + fn collect_measurement(&self) -> Result { + let src_dir = self.root.join("jolt-core/src"); + let mut total = 0.0; + for path in rust_files(&src_dir)? { + if let Some(space) = analyze_rust_file(&path) { + total += sum_bugs(&space); + } + } + Ok(total) + } + + fn direction(&self) -> Direction { + Direction::Minimize + } +} + +/// Sum Halstead bugs across all function spaces in the tree, +/// skipping NaN values (empty functions produce 0/0). +fn sum_bugs(space: &FuncSpace) -> f64 { + let b = space.metrics.halstead.bugs(); + let mut total = if b.is_finite() { b } else { 0.0 }; + for child in &space.spaces { + total += sum_bugs(child); + } + total +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn halstead_bugs_on_jolt_core() { + let root = Path::new(env!("CARGO_MANIFEST_DIR")).parent().unwrap(); + let obj = HalsteadBugsObjective::new(root); + let val = obj.collect_measurement().unwrap(); + assert!(val > 0.0, "halstead bugs should be > 0, got {val}"); + } + + #[test] + fn halstead_bugs_on_trivial_code() { + let source = b"fn f() { let x = 1 + 2; }".to_vec(); + let path = Path::new("test.rs"); + let space = + rust_code_analysis::get_function_spaces(&rust_code_analysis::LANG::Rust, source, path, None) + .unwrap(); + let bugs = sum_bugs(&space); + // Trivial code should have very low estimated bugs + assert!(bugs < 1.0, "trivial code bugs should be < 1, got {bugs}"); + } +} diff --git a/jolt-eval/src/objective/lloc.rs b/jolt-eval/src/objective/lloc.rs index 825325d5b..2d27d2a62 100644 --- a/jolt-eval/src/objective/lloc.rs +++ b/jolt-eval/src/objective/lloc.rs @@ -1,10 +1,11 @@ use std::path::{Path, PathBuf}; -use std::process::Command; + +use rust_code_analysis::{get_function_spaces, FuncSpace, LANG}; use super::{AbstractObjective, Direction, MeasurementError}; -/// Total lines of Rust code (excluding comments and blanks) across -/// `jolt-core/src/`, as reported by `tokei`. +/// Total logical lines of code (LLOC) across all Rust files under +/// `jolt-core/src/`. pub struct LlocObjective { root: PathBuf, } @@ -23,8 +24,14 @@ impl AbstractObjective for LlocObjective { } fn collect_measurement(&self) -> Result { - let stats = tokei_rust_stats(&self.root.join("jolt-core/src"))?; - Ok(stats.code as f64) + let src_dir = self.root.join("jolt-core/src"); + let mut total = 0.0; + for path in rust_files(&src_dir)? { + if let Some(space) = analyze_rust_file(&path) { + total += space.metrics.loc.lloc(); + } + } + Ok(total) } fn direction(&self) -> Direction { @@ -32,37 +39,60 @@ impl AbstractObjective for LlocObjective { } } -pub(crate) struct TokeiStats { - pub code: u64, - pub comments: u64, +pub(crate) fn rust_files(dir: &Path) -> Result, MeasurementError> { + let mut files = Vec::new(); + walk_rust_files(dir, &mut files) + .map_err(|e| MeasurementError::new(format!("walking {}: {e}", dir.display())))?; + Ok(files) } -/// Run `tokei --type Rust -o json` on a directory and parse the result. -pub(crate) fn tokei_rust_stats(dir: &Path) -> Result { - let output = Command::new("tokei") - .arg(dir) - .args(["--type", "Rust", "-o", "json"]) - .output() - .map_err(|e| { - MeasurementError::new(format!( - "tokei: {e}. Install via: cargo install tokei" - )) - })?; - - if !output.status.success() { - let stderr = String::from_utf8_lossy(&output.stderr); - return Err(MeasurementError::new(format!("tokei failed: {stderr}"))); +fn walk_rust_files(dir: &Path, out: &mut Vec) -> std::io::Result<()> { + if !dir.is_dir() { + return Ok(()); } + for entry in std::fs::read_dir(dir)? { + let entry = entry?; + let path = entry.path(); + if path.is_dir() { + walk_rust_files(&path, out)?; + } else if path.extension().is_some_and(|e| e == "rs") { + out.push(path); + } + } + Ok(()) +} - let json: serde_json::Value = serde_json::from_slice(&output.stdout) - .map_err(|e| MeasurementError::new(format!("tokei JSON parse: {e}")))?; +pub(crate) fn analyze_rust_file(path: &Path) -> Option { + let source = std::fs::read(path).ok()?; + get_function_spaces(&LANG::Rust, source, path, None) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn lloc_on_jolt_core() { + let root = Path::new(env!("CARGO_MANIFEST_DIR")).parent().unwrap(); + let obj = LlocObjective::new(root); + let val = obj.collect_measurement().unwrap(); + assert!(val > 1000.0, "LLOC should be > 1000, got {val}"); + } - let rust = json - .get("Rust") - .ok_or_else(|| MeasurementError::new("no Rust section in tokei output"))?; + #[test] + fn lloc_on_inline_source() { + let source = b"fn f() { let x = 1; let y = 2; }".to_vec(); + let path = Path::new("test.rs"); + let space = get_function_spaces(&LANG::Rust, source, path, None).unwrap(); + let lloc = space.metrics.loc.lloc(); + assert!(lloc >= 2.0, "two statements should give lloc >= 2, got {lloc}"); + } - Ok(TokeiStats { - code: rust["code"].as_u64().unwrap_or(0), - comments: rust["comments"].as_u64().unwrap_or(0), - }) + #[test] + fn rust_files_finds_rs_files() { + let src = Path::new(env!("CARGO_MANIFEST_DIR")).join("src"); + let files = rust_files(&src).unwrap(); + assert!(!files.is_empty()); + assert!(files.iter().all(|f| f.extension().unwrap() == "rs")); + } } diff --git a/jolt-eval/src/objective/mod.rs b/jolt-eval/src/objective/mod.rs index 1788d48c5..05411806b 100644 --- a/jolt-eval/src/objective/mod.rs +++ b/jolt-eval/src/objective/mod.rs @@ -1,4 +1,5 @@ -pub mod comment_density; +pub mod cognitive; +pub mod halstead_bugs; pub mod lloc; pub mod optimize; @@ -44,35 +45,40 @@ pub trait AbstractObjective: Send + Sync { /// Centralized objective enum dispatching to concrete implementations. pub enum Objective { Lloc(lloc::LlocObjective), - CommentDensity(comment_density::CommentDensityObjective), + CognitiveComplexity(cognitive::CognitiveComplexityObjective), + HalsteadBugs(halstead_bugs::HalsteadBugsObjective), } impl Objective { pub fn all(root: &Path) -> Vec { vec![ Self::Lloc(lloc::LlocObjective::new(root)), - Self::CommentDensity(comment_density::CommentDensityObjective::new(root)), + Self::CognitiveComplexity(cognitive::CognitiveComplexityObjective::new(root)), + Self::HalsteadBugs(halstead_bugs::HalsteadBugsObjective::new(root)), ] } pub fn name(&self) -> &str { match self { Self::Lloc(o) => o.name(), - Self::CommentDensity(o) => o.name(), + Self::CognitiveComplexity(o) => o.name(), + Self::HalsteadBugs(o) => o.name(), } } pub fn collect_measurement(&self) -> Result { match self { Self::Lloc(o) => o.collect_measurement(), - Self::CommentDensity(o) => o.collect_measurement(), + Self::CognitiveComplexity(o) => o.collect_measurement(), + Self::HalsteadBugs(o) => o.collect_measurement(), } } pub fn direction(&self) -> Direction { match self { Self::Lloc(o) => o.direction(), - Self::CommentDensity(o) => o.direction(), + Self::CognitiveComplexity(o) => o.direction(), + Self::HalsteadBugs(o) => o.direction(), } } } diff --git a/jolt-eval/tests/integration.rs b/jolt-eval/tests/integration.rs index 46c9ccdd0..5b48d9f87 100644 --- a/jolt-eval/tests/integration.rs +++ b/jolt-eval/tests/integration.rs @@ -107,22 +107,14 @@ fn test_constant_objective() { } #[test] -fn test_lloc_objective() { +fn test_objective_all() { let root = std::path::Path::new(env!("CARGO_MANIFEST_DIR")) .parent() .unwrap(); - let obj = jolt_eval::objective::lloc::LlocObjective::new(root); - let lloc = obj.collect_measurement().unwrap(); - assert!(lloc > 1000.0, "LLOC should be > 1000, got {lloc}"); -} - -#[test] -fn test_comment_density_objective() { - let root = std::path::Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .unwrap(); - let obj = jolt_eval::objective::comment_density::CommentDensityObjective::new(root); - let density = obj.collect_measurement().unwrap(); - assert!(density > 0.0, "comment density should be > 0, got {density}"); - assert!(density < 1.0, "comment density should be < 1, got {density}"); + let objectives = jolt_eval::Objective::all(root); + assert_eq!(objectives.len(), 3); + for obj in &objectives { + let val = obj.collect_measurement().unwrap(); + assert!(val > 0.0, "{} should be > 0, got {val}", obj.name()); + } } From 2dd2fb63df1dbe1646541336beb8759822141308 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Thu, 2 Apr 2026 20:11:21 -0400 Subject: [PATCH 33/86] Tweak redteam prompt --- jolt-eval/src/invariant/synthesis/redteam.rs | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/jolt-eval/src/invariant/synthesis/redteam.rs b/jolt-eval/src/invariant/synthesis/redteam.rs index 39364e9a0..b2bb6d08b 100644 --- a/jolt-eval/src/invariant/synthesis/redteam.rs +++ b/jolt-eval/src/invariant/synthesis/redteam.rs @@ -189,7 +189,11 @@ fn build_redteam_prompt( 3. Construct a concrete counterexample input that you believe will trigger \ a violation.\n\ 4. The input will be deserialized and checked mechanically — only genuine \ - violations count.\n\n", + violations count.\n\n\ + IMPORTANT: Do NOT ask questions or request clarification. Do NOT create \ + plans or outlines. You MUST produce a concrete counterexample JSON on \ + every iteration. Spend your turns reading code and reasoning, then output \ + your best counterexample. A wrong guess is always better than no guess.\n\n", ); if let Some(example) = input_example { @@ -225,9 +229,12 @@ fn build_redteam_prompt( prompt.push_str( "## Required output\n\n\ - Respond with a JSON object containing:\n\ - - `analysis`: your reasoning and what you investigated\n\ - - `counterexample`: the candidate input matching the schema above\n", + You MUST respond with a JSON object containing exactly two fields:\n\ + - `analysis`: a brief summary of what you investigated and why you \ + chose this counterexample\n\ + - `counterexample`: the candidate input matching the schema above\n\n\ + Do NOT respond with anything other than this JSON object. No questions, \ + no plans, no markdown outside the JSON.\n", ); prompt From 7ac4bfc52aacab8282839f27d97523a3e3587855 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Thu, 2 Apr 2026 20:31:57 -0400 Subject: [PATCH 34/86] Tweak soundness invariant description --- jolt-eval/src/guests/mod.rs | 5 +- jolt-eval/src/invariant/soundness.rs | 124 +++++++++++++++++---------- 2 files changed, 80 insertions(+), 49 deletions(-) diff --git a/jolt-eval/src/guests/mod.rs b/jolt-eval/src/guests/mod.rs index 82fb80255..de65e3052 100644 --- a/jolt-eval/src/guests/mod.rs +++ b/jolt-eval/src/guests/mod.rs @@ -23,7 +23,6 @@ pub type VerifierPreprocessing = jolt_core::zkvm::verifier::JoltVerifierPreproce pub struct TestCase { pub elf_contents: Vec, pub memory_config: common::jolt_device::MemoryConfig, - pub max_trace_length: usize, } impl TestCase { @@ -31,9 +30,9 @@ impl TestCase { GuestProgram::new(&self.elf_contents, &self.memory_config) } - pub fn prover_preprocessing(&self) -> ProverPreprocessing { + pub fn prover_preprocessing(&self, max_trace_length: usize) -> ProverPreprocessing { let program = self.make_program(); - jolt_core::guest::prover::preprocess(&program, self.max_trace_length) + jolt_core::guest::prover::preprocess(&program, max_trace_length) .expect("prover preprocessing failed") } diff --git a/jolt-eval/src/invariant/soundness.rs b/jolt-eval/src/invariant/soundness.rs index d0da6c5ec..9670496dc 100644 --- a/jolt-eval/src/invariant/soundness.rs +++ b/jolt-eval/src/invariant/soundness.rs @@ -20,7 +20,6 @@ pub struct GuestMemoryConfig { pub max_output_size: u64, pub stack_size: u64, pub heap_size: u64, - pub max_trace_length: usize, } impl Default for GuestMemoryConfig { @@ -30,18 +29,16 @@ impl Default for GuestMemoryConfig { max_output_size: 4096, stack_size: 65536, heap_size: 32768, - max_trace_length: 1048576, } } } -/// Maximum allowed values for memory config parameters to prevent -/// the red-team agent from requesting absurd resource usage. +/// Maximum allowed values for memory config parameters. const MAX_INPUT_SIZE: u64 = 1 << 16; const MAX_OUTPUT_SIZE: u64 = 1 << 16; const MAX_STACK_SIZE: u64 = 1 << 16; const MAX_HEAP_SIZE: u64 = 1 << 20; -const MAX_TRACE_LENGTH: usize = 1 << 20; // ~1M steps +const MAX_TRACE_LENGTH: usize = 1 << 20; impl GuestMemoryConfig { pub fn validate(&self) -> Result<(), CheckError> { @@ -49,19 +46,12 @@ impl GuestMemoryConfig { || self.max_output_size > MAX_OUTPUT_SIZE || self.stack_size > MAX_STACK_SIZE || self.heap_size > MAX_HEAP_SIZE - || self.max_trace_length > MAX_TRACE_LENGTH { return Err(CheckError::InvalidInput(format!( "memory config exceeds limits: \ - input={}, output={}, stack={}, heap={}, trace={}; \ - limits: input/output/stack/heap<={}, trace<={}", - self.max_input_size, - self.max_output_size, - self.stack_size, - self.heap_size, - self.max_trace_length, - MAX_HEAP_SIZE, - MAX_TRACE_LENGTH, + input={}, output={}, stack={}, heap={}; \ + limits: input/output/stack<={MAX_STACK_SIZE}, heap<={MAX_HEAP_SIZE}", + self.max_input_size, self.max_output_size, self.stack_size, self.heap_size, ))); } Ok(()) @@ -129,11 +119,31 @@ impl Invariant for SoundnessInvariant { } fn description(&self) -> String { - "For any deterministic guest program (no advice) and fixed input, \ - there is only one (output, panic) pair that the verifier accepts. \ - A counterexample is a guest patch + input + dishonest (output, panic) \ - claim that the verifier incorrectly accepts." - .to_string() + format!( + "For any deterministic guest program (no advice) and fixed input, \ + there is only one (output, panic) pair that the verifier accepts. \ + A counterexample is a guest patch + input + dishonest (output, panic) \ + claim that the verifier incorrectly accepts. \ + For full context, read the invariant file: jolt-eval/src/invariant/soundness.rs \n\n\ + ### Guest sandbox\n\n\ + The guest template is at `jolt-eval/guest-sandbox/`. It contains:\n\ + - `Cargo.toml` — depends on `jolt-sdk`\n\ + - `src/lib.rs` — the `#[jolt::provable]` function (main patch target)\n\ + - `src/main.rs` — no_main entry point (rarely needs patching)\n\n\ + ### Producing a patch\n\n\ + To produce the `patch` field, modify files inside `jolt-eval/guest-sandbox/` \ + and run `git diff` **from the `jolt-eval/guest-sandbox/` directory**:\n\ + ```\n\ + cd jolt-eval/guest-sandbox && git diff\n\ + ```\n\ + The patch is applied with `git apply` from the same directory. \ + Hunks referencing paths with `..` are filtered out.\n\n\ + ### Limits\n\n\ + Memory config: max_input_size <= {MAX_INPUT_SIZE}, \ + max_output_size <= {MAX_OUTPUT_SIZE}, \ + stack_size <= {MAX_STACK_SIZE}, heap_size <= {MAX_HEAP_SIZE}. \ + The program's execution trace must not exceed {MAX_TRACE_LENGTH} cycles." + ) } fn setup(&self) -> SoundnessSetup { @@ -143,11 +153,7 @@ impl Invariant for SoundnessInvariant { } } - fn check( - &self, - setup: &SoundnessSetup, - input: SoundnessInput, - ) -> Result<(), CheckError> { + fn check(&self, setup: &SoundnessSetup, input: SoundnessInput) -> Result<(), CheckError> { // 1. Validate memory config input.memory.validate()?; let memory_config = input.memory.to_memory_config(); @@ -160,13 +166,27 @@ impl Invariant for SoundnessInvariant { // _guard drops here (or on early return), reverting the patch - // 4. Build a TestCase and prove - let test_case = TestCase { + // 4. Trace to determine actual trace length, then prove + let mut test_case = TestCase { elf_contents: elf_bytes, memory_config, - max_trace_length: input.memory.max_trace_length, }; - let prover_pp = test_case.prover_preprocessing(); + let (_bytecode, _memory_init, program_size, _e_entry) = + jolt_core::guest::program::decode(&test_case.elf_contents); + test_case.memory_config.program_size = Some(program_size); + + let program = test_case.make_program(); + let (_lazy_trace, trace, _memory, _io) = program.trace(&input.program_input, &[], &[]); + let max_trace_length = (trace.len() + 1).next_power_of_two(); + drop(trace); + + if max_trace_length > MAX_TRACE_LENGTH { + return Err(CheckError::InvalidInput(format!( + "trace length {max_trace_length} exceeds limit {MAX_TRACE_LENGTH}" + ))); + } + + let prover_pp = test_case.prover_preprocessing(max_trace_length); let verifier_pp = TestCase::verifier_preprocessing(&prover_pp); let (proof, honest_device) = test_case.prove(&prover_pp, &input.program_input); @@ -298,10 +318,7 @@ pub fn filter_patch(patch: &str) -> String { /// Compile the sandbox guest and return the ELF bytes. /// /// `Program::build` panics on compilation failure, so we catch it. -fn compile_guest( - sandbox_dir: &Path, - memory_config: &MemoryConfig, -) -> Result, CheckError> { +fn compile_guest(sandbox_dir: &Path, memory_config: &MemoryConfig) -> Result, CheckError> { let target_dir = sandbox_dir.join("target").to_string_lossy().to_string(); let mc = *memory_config; let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { @@ -409,31 +426,37 @@ diff --git a/Cargo.toml b/Cargo.toml #[test] fn validate_rejects_oversized_input() { - let c = GuestMemoryConfig { max_input_size: u64::MAX, ..Default::default() }; + let c = GuestMemoryConfig { + max_input_size: u64::MAX, + ..Default::default() + }; assert!(matches!(c.validate(), Err(CheckError::InvalidInput(_)))); } #[test] fn validate_rejects_oversized_output() { - let c = GuestMemoryConfig { max_output_size: u64::MAX, ..Default::default() }; + let c = GuestMemoryConfig { + max_output_size: u64::MAX, + ..Default::default() + }; assert!(matches!(c.validate(), Err(CheckError::InvalidInput(_)))); } #[test] fn validate_rejects_oversized_stack() { - let c = GuestMemoryConfig { stack_size: u64::MAX, ..Default::default() }; + let c = GuestMemoryConfig { + stack_size: u64::MAX, + ..Default::default() + }; assert!(matches!(c.validate(), Err(CheckError::InvalidInput(_)))); } #[test] fn validate_rejects_oversized_heap() { - let c = GuestMemoryConfig { heap_size: u64::MAX, ..Default::default() }; - assert!(matches!(c.validate(), Err(CheckError::InvalidInput(_)))); - } - - #[test] - fn validate_rejects_oversized_trace() { - let c = GuestMemoryConfig { max_trace_length: usize::MAX, ..Default::default() }; + let c = GuestMemoryConfig { + heap_size: u64::MAX, + ..Default::default() + }; assert!(matches!(c.validate(), Err(CheckError::InvalidInput(_)))); } @@ -442,10 +465,16 @@ diff --git a/Cargo.toml b/Cargo.toml let inv = SoundnessInvariant; let setup = inv.setup(); let input = SoundnessInput { - memory: GuestMemoryConfig { heap_size: u64::MAX, ..Default::default() }, + memory: GuestMemoryConfig { + heap_size: u64::MAX, + ..Default::default() + }, ..default_input() }; - assert!(matches!(inv.check(&setup, input), Err(CheckError::InvalidInput(_)))); + assert!(matches!( + inv.check(&setup, input), + Err(CheckError::InvalidInput(_)) + )); } // ── patching ──────────────────────────────────────────────────── @@ -508,6 +537,9 @@ diff --git a/../../etc/passwd b/../../etc/passwd claimed_panic: false, ..default_input() }; - assert!(matches!(inv.check(&setup, input), Err(CheckError::InvalidInput(_)))); + assert!(matches!( + inv.check(&setup, input), + Err(CheckError::InvalidInput(_)) + )); } } From cb683ca432d9182aa023bf07f95bb27bfa982eb2 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Thu, 2 Apr 2026 21:03:24 -0400 Subject: [PATCH 35/86] refactor(jolt-eval): remove TestCase, add measure/optimize binaries Replace TestCase struct with free functions in guests/ module (prover_preprocessing, verifier_preprocessing, prove, verify, verify_with_claims). Soundness invariant now uses GuestProgram directly and derives trace length from actual execution. Add measure-objectives and optimize binaries back. Use generics instead of dyn for auto_optimize parameters. Add trace length limit (2^20) to soundness invariant and document all limits in the invariant description. Also strengthen redteam prompt to prevent the agent from asking questions or producing plans instead of counterexamples. Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/Cargo.toml | 8 ++ jolt-eval/bin/measure_objectives.rs | 61 +++++++++ jolt-eval/bin/optimize.rs | 186 +++++++++++++++++++++++++++ jolt-eval/src/guests/mod.rs | 143 ++++++++++---------- jolt-eval/src/invariant/soundness.rs | 29 ++--- jolt-eval/src/lib.rs | 2 +- jolt-eval/src/objective/optimize.rs | 6 +- 7 files changed, 338 insertions(+), 97 deletions(-) create mode 100644 jolt-eval/bin/measure_objectives.rs create mode 100644 jolt-eval/bin/optimize.rs diff --git a/jolt-eval/Cargo.toml b/jolt-eval/Cargo.toml index c4c4484a3..2ff401aad 100644 --- a/jolt-eval/Cargo.toml +++ b/jolt-eval/Cargo.toml @@ -27,6 +27,14 @@ rust-code-analysis = "0.0.24" jolt-eval-macros = { path = "macros" } +[[bin]] +name = "measure-objectives" +path = "bin/measure_objectives.rs" + +[[bin]] +name = "optimize" +path = "bin/optimize.rs" + [[bin]] name = "redteam" path = "bin/redteam.rs" diff --git a/jolt-eval/bin/measure_objectives.rs b/jolt-eval/bin/measure_objectives.rs new file mode 100644 index 000000000..fbf2ecaa1 --- /dev/null +++ b/jolt-eval/bin/measure_objectives.rs @@ -0,0 +1,61 @@ +use clap::Parser; + +use jolt_eval::objective::Objective; + +#[derive(Parser)] +#[command(name = "measure-objectives")] +#[command(about = "Measure Jolt code quality and performance objectives")] +struct Cli { + /// Only measure the named objective (default: all) + #[arg(long)] + objective: Option, +} + +fn main() -> eyre::Result<()> { + tracing_subscriber::fmt::init(); + let cli = Cli::parse(); + + let repo_root = std::env::current_dir()?; + let objectives = Objective::all(&repo_root); + + let filtered: Vec<_> = if let Some(name) = &cli.objective { + objectives + .into_iter() + .filter(|o| o.name() == name.as_str()) + .collect() + } else { + objectives + }; + + if filtered.is_empty() { + let all_names: Vec<_> = Objective::all(&repo_root) + .iter() + .map(|o| o.name().to_string()) + .collect(); + eprintln!( + "No matching objectives. Available: {}", + all_names.join(", ") + ); + std::process::exit(1); + } + + println!("{:<30} {:>15} {:>10}", "Objective", "Value", "Direction"); + println!("{}", "-".repeat(57)); + + for obj in &filtered { + match obj.collect_measurement() { + Ok(val) => { + let dir = match obj.direction() { + jolt_eval::Direction::Minimize => "min", + jolt_eval::Direction::Maximize => "max", + }; + println!("{:<30} {:>15.2} {:>10}", obj.name(), val, dir); + } + Err(e) => { + println!("{:<30} {:>15}", obj.name(), format!("ERROR: {e}")); + } + } + } + + Ok(()) +} diff --git a/jolt-eval/bin/optimize.rs b/jolt-eval/bin/optimize.rs new file mode 100644 index 000000000..4db879e59 --- /dev/null +++ b/jolt-eval/bin/optimize.rs @@ -0,0 +1,186 @@ +use std::collections::HashMap; +use std::process::Command; + +use clap::Parser; + +use jolt_eval::agent::ClaudeCodeAgent; +use jolt_eval::invariant::JoltInvariants; +use jolt_eval::objective::optimize::{auto_optimize, OptimizeConfig, OptimizeEnv}; +use jolt_eval::objective::{Direction, Objective}; + +#[derive(Parser)] +#[command(name = "optimize")] +#[command(about = "AI-driven optimization of Jolt codebase objectives")] +struct Cli { + /// Objectives to optimize (comma-separated). Default: all. + #[arg(long)] + objectives: Option, + + /// Number of optimization iterations + #[arg(long, default_value = "5")] + iterations: usize, + + /// AI model to use + #[arg(long, default_value = "claude-sonnet-4-20250514")] + model: String, + + /// Maximum number of Claude agentic turns per iteration + #[arg(long, default_value = "30")] + max_turns: usize, + + /// Extra context to include in the optimization prompt + #[arg(long)] + hint: Option, +} + +struct RealEnv { + objectives: Vec, + invariants: Vec, + repo_dir: std::path::PathBuf, +} + +impl OptimizeEnv for RealEnv { + fn measure(&mut self) -> HashMap { + self.objectives + .iter() + .filter_map(|o| { + let name = o.name().to_string(); + o.collect_measurement().ok().map(|v| (name, v)) + }) + .collect() + } + + fn check_invariants(&mut self) -> bool { + self.invariants.iter().all(|inv| { + let results = inv.run_checks(0); + results.iter().all(|r| r.is_ok()) + }) + } + + fn directions(&self) -> HashMap { + self.objectives + .iter() + .map(|o| (o.name().to_string(), o.direction())) + .collect() + } + + fn apply_diff(&mut self, diff: &str) { + if let Err(e) = jolt_eval::agent::apply_diff(&self.repo_dir, diff) { + tracing::warn!("Failed to apply diff: {e}"); + } + } + + fn accept(&mut self, iteration: usize) { + println!(" Improvement found -- keeping changes."); + let _ = Command::new("git") + .current_dir(&self.repo_dir) + .args(["add", "-A"]) + .status(); + let msg = format!("perf(auto-optimize): iteration {iteration}"); + let _ = Command::new("git") + .current_dir(&self.repo_dir) + .args(["commit", "-m", &msg, "--allow-empty"]) + .status(); + } + + fn reject(&mut self) { + println!(" Reverting changes."); + let _ = Command::new("git") + .current_dir(&self.repo_dir) + .args(["checkout", "."]) + .status(); + } +} + +fn main() -> eyre::Result<()> { + tracing_subscriber::fmt::init(); + let cli = Cli::parse(); + + let repo_dir = std::env::current_dir()?; + let all_objectives = Objective::all(&repo_dir); + let all_names: Vec = all_objectives + .iter() + .map(|o| o.name().to_string()) + .collect(); + + let filter_names: Option> = cli + .objectives + .as_ref() + .map(|s| s.split(',').map(|n| n.trim().to_string()).collect()); + + let objectives: Vec = if let Some(names) = &filter_names { + all_objectives + .into_iter() + .filter(|o| names.contains(&o.name().to_string())) + .collect() + } else { + all_objectives + }; + + if objectives.is_empty() { + eprintln!( + "No matching objectives. Available: {}", + all_names.join(", ") + ); + std::process::exit(1); + } + + let invariants = JoltInvariants::all(); + + let mut env = RealEnv { + objectives, + invariants, + repo_dir: repo_dir.clone(), + }; + + println!("=== Baseline measurements ==="); + let baseline = env.measure(); + print_measurements(&env.directions(), &baseline); + println!(); + + let agent = ClaudeCodeAgent::new(&cli.model, cli.max_turns); + let config = OptimizeConfig { + num_iterations: cli.iterations, + hint: cli.hint.clone(), + }; + + let result = auto_optimize(&agent, &mut env, &config, &repo_dir); + + println!("=== Optimization summary ==="); + println!( + "{}/{} iterations produced improvements.", + result + .attempts + .iter() + .filter(|a| a.invariants_passed + && a.measurements + .iter() + .any(|(name, &val)| { result.baseline.get(name).is_some_and(|&b| val < b) })) + .count(), + result.attempts.len() + ); + println!(); + println!("Final measurements:"); + print_measurements(&env.directions(), &result.best); + + Ok(()) +} + +fn print_measurements( + directions: &HashMap, + measurements: &HashMap, +) { + let mut names: Vec<_> = directions.keys().collect(); + names.sort(); + for name in names { + let val = measurements + .get(name) + .map(|v| format!("{v:.4}")) + .unwrap_or_else(|| "N/A".to_string()); + let dir = match directions[name] { + Direction::Minimize => "min", + Direction::Maximize => "max", + }; + println!(" {:<30} {:>15} {:>6}", name, val, dir); + } +} diff --git a/jolt-eval/src/guests/mod.rs b/jolt-eval/src/guests/mod.rs index de65e3052..336e0e188 100644 --- a/jolt-eval/src/guests/mod.rs +++ b/jolt-eval/src/guests/mod.rs @@ -16,88 +16,77 @@ pub type Proof = jolt_core::zkvm::proof_serialization::JoltProof; pub type ProverPreprocessing = jolt_core::zkvm::prover::JoltProverPreprocessing; pub type VerifierPreprocessing = jolt_core::zkvm::verifier::JoltVerifierPreprocessing; -/// A self-contained test case wrapping a compiled guest program. -/// -/// Stores raw ELF bytes and memory configuration so it can reconstruct -/// a `GuestProgram` on demand without requiring `Clone` on the program. -pub struct TestCase { - pub elf_contents: Vec, - pub memory_config: common::jolt_device::MemoryConfig, +pub fn prover_preprocessing( + program: &GuestProgram, + max_trace_length: usize, +) -> ProverPreprocessing { + jolt_core::guest::prover::preprocess(program, max_trace_length) + .expect("prover preprocessing failed") } -impl TestCase { - pub fn make_program(&self) -> GuestProgram { - GuestProgram::new(&self.elf_contents, &self.memory_config) - } - - pub fn prover_preprocessing(&self, max_trace_length: usize) -> ProverPreprocessing { - let program = self.make_program(); - jolt_core::guest::prover::preprocess(&program, max_trace_length) - .expect("prover preprocessing failed") - } - - pub fn verifier_preprocessing(prover_pp: &ProverPreprocessing) -> VerifierPreprocessing { - VerifierPreprocessing::from(prover_pp) - } +pub fn verifier_preprocessing(prover_pp: &ProverPreprocessing) -> VerifierPreprocessing { + VerifierPreprocessing::from(prover_pp) +} - pub fn prove(&self, prover_pp: &ProverPreprocessing, inputs: &[u8]) -> (Proof, JoltDevice) { - let program = self.make_program(); - let mut output_bytes = vec![0u8; self.memory_config.max_output_size as usize]; - let (proof, io_device, _debug) = jolt_core::guest::prover::prove::( - &program, - inputs, - &[], - &[], - None, - None, - &mut output_bytes, - prover_pp, - ); - (proof, io_device) - } +pub fn prove( + program: &GuestProgram, + prover_pp: &ProverPreprocessing, + inputs: &[u8], +) -> (Proof, JoltDevice) { + let mut output_bytes = vec![0u8; program.memory_config.max_output_size as usize]; + let (proof, io_device, _debug) = jolt_core::guest::prover::prove::( + program, + inputs, + &[], + &[], + None, + None, + &mut output_bytes, + prover_pp, + ); + (proof, io_device) +} - pub fn verify( - verifier_pp: &VerifierPreprocessing, - proof: Proof, - io_device: &JoltDevice, - ) -> Result<(), ProofVerifyError> { - jolt_core::guest::verifier::verify::( - &io_device.inputs, - None, - &io_device.outputs, - proof, - verifier_pp, - ) - } +pub fn verify( + verifier_pp: &VerifierPreprocessing, + proof: Proof, + io_device: &JoltDevice, +) -> Result<(), ProofVerifyError> { + jolt_core::guest::verifier::verify::( + &io_device.inputs, + None, + &io_device.outputs, + proof, + verifier_pp, + ) +} - /// Verify a proof against claimed (potentially malicious) outputs and panic flag. - pub fn verify_with_claims( - verifier_pp: &VerifierPreprocessing, - proof: Proof, - inputs: &[u8], - claimed_outputs: &[u8], - claimed_panic: bool, - ) -> Result<(), ProofVerifyError> { - use common::jolt_device::MemoryConfig; - use jolt_core::zkvm::verifier::JoltVerifier; +/// Verify a proof against claimed (potentially malicious) outputs and panic flag. +pub fn verify_with_claims( + verifier_pp: &VerifierPreprocessing, + proof: Proof, + inputs: &[u8], + claimed_outputs: &[u8], + claimed_panic: bool, +) -> Result<(), ProofVerifyError> { + use common::jolt_device::MemoryConfig; + use jolt_core::zkvm::verifier::JoltVerifier; - let memory_layout = &verifier_pp.shared.memory_layout; - let memory_config = MemoryConfig { - max_untrusted_advice_size: memory_layout.max_untrusted_advice_size, - max_trusted_advice_size: memory_layout.max_trusted_advice_size, - max_input_size: memory_layout.max_input_size, - max_output_size: memory_layout.max_output_size, - stack_size: memory_layout.stack_size, - heap_size: memory_layout.heap_size, - program_size: Some(memory_layout.program_size), - }; - let mut io_device = JoltDevice::new(&memory_config); - io_device.inputs = inputs.to_vec(); - io_device.outputs = claimed_outputs.to_vec(); - io_device.panic = claimed_panic; + let memory_layout = &verifier_pp.shared.memory_layout; + let memory_config = MemoryConfig { + max_untrusted_advice_size: memory_layout.max_untrusted_advice_size, + max_trusted_advice_size: memory_layout.max_trusted_advice_size, + max_input_size: memory_layout.max_input_size, + max_output_size: memory_layout.max_output_size, + stack_size: memory_layout.stack_size, + heap_size: memory_layout.heap_size, + program_size: Some(memory_layout.program_size), + }; + let mut io_device = JoltDevice::new(&memory_config); + io_device.inputs = inputs.to_vec(); + io_device.outputs = claimed_outputs.to_vec(); + io_device.panic = claimed_panic; - let verifier = - JoltVerifier::::new(verifier_pp, proof, io_device, None, None)?; - verifier.verify() - } + let verifier = JoltVerifier::::new(verifier_pp, proof, io_device, None, None)?; + verifier.verify() } diff --git a/jolt-eval/src/invariant/soundness.rs b/jolt-eval/src/invariant/soundness.rs index 9670496dc..d73695299 100644 --- a/jolt-eval/src/invariant/soundness.rs +++ b/jolt-eval/src/invariant/soundness.rs @@ -8,7 +8,7 @@ use common::jolt_device::MemoryConfig; use jolt_core::host::Program; use super::{CheckError, Invariant, InvariantViolation}; -use crate::TestCase; +use crate::guests; /// Guest memory layout parameters. /// @@ -156,7 +156,7 @@ impl Invariant for SoundnessInvariant { fn check(&self, setup: &SoundnessSetup, input: SoundnessInput) -> Result<(), CheckError> { // 1. Validate memory config input.memory.validate()?; - let memory_config = input.memory.to_memory_config(); + let mut memory_config = input.memory.to_memory_config(); // 2. Apply patch to sandbox in-place, revert on exit let _guard = apply_patch(&setup.sandbox_dir, &input.patch)?; @@ -166,16 +166,12 @@ impl Invariant for SoundnessInvariant { // _guard drops here (or on early return), reverting the patch - // 4. Trace to determine actual trace length, then prove - let mut test_case = TestCase { - elf_contents: elf_bytes, - memory_config, - }; + // 4. Decode to get program_size, then trace to get actual length let (_bytecode, _memory_init, program_size, _e_entry) = - jolt_core::guest::program::decode(&test_case.elf_contents); - test_case.memory_config.program_size = Some(program_size); + jolt_core::guest::program::decode(&elf_bytes); + memory_config.program_size = Some(program_size); - let program = test_case.make_program(); + let program = guests::GuestProgram::new(&elf_bytes, &memory_config); let (_lazy_trace, trace, _memory, _io) = program.trace(&input.program_input, &[], &[]); let max_trace_length = (trace.len() + 1).next_power_of_two(); drop(trace); @@ -186,11 +182,12 @@ impl Invariant for SoundnessInvariant { ))); } - let prover_pp = test_case.prover_preprocessing(max_trace_length); - let verifier_pp = TestCase::verifier_preprocessing(&prover_pp); - let (proof, honest_device) = test_case.prove(&prover_pp, &input.program_input); + // 5. Prove and verify + let prover_pp = guests::prover_preprocessing(&program, max_trace_length); + let verifier_pp = guests::verifier_preprocessing(&prover_pp); + let (proof, honest_device) = guests::prove(&program, &prover_pp, &input.program_input); - // 5. Skip no-op claims (the claim matches the honest execution) + // 6. Skip no-op claims (the claim matches the honest execution) if input.claimed_output == honest_device.outputs && input.claimed_panic == honest_device.panic { @@ -199,8 +196,8 @@ impl Invariant for SoundnessInvariant { )); } - // 6. Verify with the dishonest claim — this SHOULD fail - match TestCase::verify_with_claims( + // 7. Verify with the dishonest claim — this SHOULD fail + match guests::verify_with_claims( &verifier_pp, proof, &honest_device.inputs, diff --git a/jolt-eval/src/lib.rs b/jolt-eval/src/lib.rs index f266f9486..8bd6a1b77 100644 --- a/jolt-eval/src/lib.rs +++ b/jolt-eval/src/lib.rs @@ -8,7 +8,7 @@ pub mod guests; pub mod invariant; pub mod objective; -pub use guests::{JoltDevice, ProofVerifyError, TestCase}; +pub use guests::{GuestProgram, JoltDevice, ProofVerifyError}; pub use invariant::{ CheckError, Invariant, InvariantTargets, InvariantViolation, JoltInvariants, SynthesisTarget, }; diff --git a/jolt-eval/src/objective/optimize.rs b/jolt-eval/src/objective/optimize.rs index 96c5fca58..20e3e0786 100644 --- a/jolt-eval/src/objective/optimize.rs +++ b/jolt-eval/src/objective/optimize.rs @@ -59,9 +59,9 @@ pub trait OptimizeEnv { /// 3. If the agent produced a diff, applies it via [`OptimizeEnv::apply_diff`]. /// 4. Re-measures objectives and checks invariants. /// 5. Accepts or rejects the change. -pub fn auto_optimize( - agent: &dyn AgentHarness, - env: &mut dyn OptimizeEnv, +pub fn auto_optimize( + agent: &A, + env: &mut E, config: &OptimizeConfig, repo_dir: &Path, ) -> OptimizeResult { From 3697139ebdd6ac513f770786ecb7a70920b9df77 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Fri, 3 Apr 2026 10:37:49 -0400 Subject: [PATCH 36/86] feat(jolt-eval): Criterion integration for perf objectives, sync script MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add PerfObjective trait (setup/run split for Criterion's b.iter loop) and bench_objective! macro that generates Criterion harness from a type, mirroring fuzz_invariant! for invariants. Rewrite bind_bench.rs as PerfObjective impls. Remove perf objectives from the Objective enum — perf measurements come from Criterion output. measure-objectives reads target/criterion/*/new/estimates.json after running cargo bench. Add sync_targets.sh that scans source for #[invariant(...Fuzz...)] and PerfObjective impls, generates/removes fuzz targets and bench files, and updates Cargo.toml entries. Idempotent. Add benches/results symlink to target/criterion/ for easy access to Criterion HTML reports. Co-Authored-By: Claude Opus 4.6 (1M context) --- Cargo.lock | 1 + jolt-eval/Cargo.toml | 11 + .../benches/bind_parallel_high_to_low.rs | 2 + .../benches/bind_parallel_low_to_high.rs | 2 + jolt-eval/benches/results | 1 + jolt-eval/bin/measure_objectives.rs | 101 +++-- jolt-eval/fuzz/Cargo.lock | 348 ++++++++++++------ jolt-eval/fuzz/Cargo.toml | 9 +- jolt-eval/src/lib.rs | 2 +- jolt-eval/src/objective/bind_bench.rs | 100 +++++ jolt-eval/src/objective/mod.rs | 32 +- jolt-eval/src/objective/synthesis.rs | 29 ++ jolt-eval/sync_targets.sh | 172 +++++++++ 13 files changed, 659 insertions(+), 151 deletions(-) create mode 100644 jolt-eval/benches/bind_parallel_high_to_low.rs create mode 100644 jolt-eval/benches/bind_parallel_low_to_high.rs create mode 120000 jolt-eval/benches/results create mode 100644 jolt-eval/src/objective/bind_bench.rs create mode 100644 jolt-eval/src/objective/synthesis.rs create mode 100755 jolt-eval/sync_targets.sh diff --git a/Cargo.lock b/Cargo.lock index 24cca2b72..701f3f15b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2966,6 +2966,7 @@ dependencies = [ "ark-bn254", "clap", "common", + "criterion", "enumset", "eyre", "jolt-core", diff --git a/jolt-eval/Cargo.toml b/jolt-eval/Cargo.toml index 2ff401aad..d2495b1b2 100644 --- a/jolt-eval/Cargo.toml +++ b/jolt-eval/Cargo.toml @@ -27,6 +27,17 @@ rust-code-analysis = "0.0.24" jolt-eval-macros = { path = "macros" } +[dev-dependencies] +criterion = { workspace = true } + +[[bench]] +name = "bind_parallel_high_to_low" +harness = false + +[[bench]] +name = "bind_parallel_low_to_high" +harness = false + [[bin]] name = "measure-objectives" path = "bin/measure_objectives.rs" diff --git a/jolt-eval/benches/bind_parallel_high_to_low.rs b/jolt-eval/benches/bind_parallel_high_to_low.rs new file mode 100644 index 000000000..d3f0bb55d --- /dev/null +++ b/jolt-eval/benches/bind_parallel_high_to_low.rs @@ -0,0 +1,2 @@ +use jolt_eval::objective::bind_bench::BindHighToLowObjective; +jolt_eval::bench_objective!(BindHighToLowObjective); diff --git a/jolt-eval/benches/bind_parallel_low_to_high.rs b/jolt-eval/benches/bind_parallel_low_to_high.rs new file mode 100644 index 000000000..5c640d6d2 --- /dev/null +++ b/jolt-eval/benches/bind_parallel_low_to_high.rs @@ -0,0 +1,2 @@ +use jolt_eval::objective::bind_bench::BindLowToHighObjective; +jolt_eval::bench_objective!(BindLowToHighObjective); diff --git a/jolt-eval/benches/results b/jolt-eval/benches/results new file mode 120000 index 000000000..32ab80389 --- /dev/null +++ b/jolt-eval/benches/results @@ -0,0 +1 @@ +../../target/criterion \ No newline at end of file diff --git a/jolt-eval/bin/measure_objectives.rs b/jolt-eval/bin/measure_objectives.rs index fbf2ecaa1..08bc9dbde 100644 --- a/jolt-eval/bin/measure_objectives.rs +++ b/jolt-eval/bin/measure_objectives.rs @@ -1,6 +1,8 @@ +use std::path::Path; + use clap::Parser; -use jolt_eval::objective::Objective; +use jolt_eval::objective::{perf_objective_names, Objective}; #[derive(Parser)] #[command(name = "measure-objectives")] @@ -9,6 +11,10 @@ struct Cli { /// Only measure the named objective (default: all) #[arg(long)] objective: Option, + + /// Skip Criterion benchmarks (only show static-analysis objectives) + #[arg(long)] + no_bench: bool, } fn main() -> eyre::Result<()> { @@ -16,46 +22,85 @@ fn main() -> eyre::Result<()> { let cli = Cli::parse(); let repo_root = std::env::current_dir()?; - let objectives = Objective::all(&repo_root); - let filtered: Vec<_> = if let Some(name) = &cli.objective { - objectives - .into_iter() - .filter(|o| o.name() == name.as_str()) - .collect() - } else { - objectives - }; - - if filtered.is_empty() { - let all_names: Vec<_> = Objective::all(&repo_root) - .iter() - .map(|o| o.name().to_string()) - .collect(); - eprintln!( - "No matching objectives. Available: {}", - all_names.join(", ") - ); - std::process::exit(1); - } + println!("{:<35} {:>15} {:>10}", "Objective", "Value", "Direction"); + println!("{}", "-".repeat(62)); - println!("{:<30} {:>15} {:>10}", "Objective", "Value", "Direction"); - println!("{}", "-".repeat(57)); - - for obj in &filtered { + // Static-analysis objectives + let objectives = Objective::all(&repo_root); + for obj in &objectives { + if let Some(ref name) = cli.objective { + if obj.name() != name.as_str() { + continue; + } + } match obj.collect_measurement() { Ok(val) => { let dir = match obj.direction() { jolt_eval::Direction::Minimize => "min", jolt_eval::Direction::Maximize => "max", }; - println!("{:<30} {:>15.2} {:>10}", obj.name(), val, dir); + println!("{:<35} {:>15.6} {:>10}", obj.name(), val, dir); } Err(e) => { - println!("{:<30} {:>15}", obj.name(), format!("ERROR: {e}")); + println!("{:<35} {:>15}", obj.name(), format!("ERROR: {e}")); + } + } + } + + // Performance objectives (from Criterion) + if !cli.no_bench { + let perf_names = perf_objective_names(); + let run_bench = cli + .objective + .as_ref() + .is_none_or(|name| perf_names.contains(&name.as_str())); + + if run_bench { + eprintln!("Running Criterion benchmarks..."); + let status = std::process::Command::new("cargo") + .args(["bench", "-p", "jolt-eval"]) + .status(); + + match status { + Ok(s) if s.success() => { + for &name in perf_names { + if let Some(ref filter) = cli.objective { + if name != filter.as_str() { + continue; + } + } + match read_criterion_estimate(name) { + Some(secs) => { + println!("{:<35} {:>15.6} {:>10}", name, secs, "min"); + } + None => { + println!("{:<35} {:>15}", name, "NO DATA"); + } + } + } + } + _ => { + eprintln!("cargo bench failed; skipping perf objectives"); + } } } } Ok(()) } + +/// Read the point estimate (mean, in seconds) from Criterion's output. +/// +/// Criterion writes to `target/criterion//new/estimates.json`. +fn read_criterion_estimate(bench_name: &str) -> Option { + let path = Path::new("target/criterion") + .join(bench_name) + .join("new") + .join("estimates.json"); + let data = std::fs::read_to_string(path).ok()?; + let json: serde_json::Value = serde_json::from_str(&data).ok()?; + // Criterion stores times in nanoseconds + let nanos = json.get("mean")?.get("point_estimate")?.as_f64()?; + Some(nanos / 1e9) +} diff --git a/jolt-eval/fuzz/Cargo.lock b/jolt-eval/fuzz/Cargo.lock index 64582eecf..7747142bb 100644 --- a/jolt-eval/fuzz/Cargo.lock +++ b/jolt-eval/fuzz/Cargo.lock @@ -36,6 +36,15 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "aho-corasick" +version = "0.7.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" +dependencies = [ + "memchr", +] + [[package]] name = "aho-corasick" version = "1.1.4" @@ -337,6 +346,12 @@ version = "3.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + [[package]] name = "cc" version = "1.2.58" @@ -742,6 +757,12 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + [[package]] name = "fixedbitset" version = "0.5.7" @@ -776,6 +797,15 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + [[package]] name = "generic-array" version = "0.14.7" @@ -981,14 +1011,14 @@ dependencies = [ "derive_more", "dory-pcs", "eyre", - "fixedbitset", + "fixedbitset 0.5.7", "itertools 0.14.0", "jolt-inlines-keccak256", "jolt-inlines-sha2", "jolt-optimizations", "memory-stats", "num", - "num-derive", + "num-derive 0.4.2", "num-traits", "postcard", "rand", @@ -1012,7 +1042,6 @@ version = "0.1.0" dependencies = [ "arbitrary", "ark-bn254", - "ark-serialize", "clap", "common", "enumset", @@ -1021,13 +1050,11 @@ dependencies = [ "jolt-eval-macros", "postcard", "rand", - "rayon", + "rust-code-analysis", "schemars", "serde", "serde_json", - "sysinfo", "tempfile", - "thiserror", "tracer", "tracing", "tracing-subscriber", @@ -1204,15 +1231,6 @@ dependencies = [ "simd-adler32", ] -[[package]] -name = "ntapi" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3b335231dfd352ffb0f8017f3b6027a4917f7df785ea2143d8af2adc66980ae" -dependencies = [ - "winapi", -] - [[package]] name = "nu-ansi-term" version = "0.50.3" @@ -1255,6 +1273,17 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-derive" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "876a53fff98e03a936a674b29568b0e605f06b29372c2489ff4de23f1949743d" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "num-derive" version = "0.4.2" @@ -1266,6 +1295,16 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "num-format" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a652d9771a63711fd3c3deb670acfbe5c30a4072e664d7a3bf5a9e1056ac72c3" +dependencies = [ + "arrayvec", + "itoa", +] + [[package]] name = "num-integer" version = "0.1.46" @@ -1306,25 +1345,6 @@ dependencies = [ "autocfg", ] -[[package]] -name = "objc2-core-foundation" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a180dd8642fa45cdb7dd721cd4c11b1cadd4929ce112ebd8b9f5803cc79d536" -dependencies = [ - "bitflags", -] - -[[package]] -name = "objc2-io-kit" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33fafba39597d6dc1fb709123dfa8289d39406734be322956a69f0931c73bb15" -dependencies = [ - "libc", - "objc2-core-foundation", -] - [[package]] name = "object" version = "0.38.1" @@ -1366,6 +1386,16 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "petgraph" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" +dependencies = [ + "fixedbitset 0.4.2", + "indexmap", +] + [[package]] name = "pin-project-lite" version = "0.2.17" @@ -1483,13 +1513,25 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick 1.1.4", + "memchr", + "regex-automata", + "regex-syntax", +] + [[package]] name = "regex-automata" version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" dependencies = [ - "aho-corasick", + "aho-corasick 1.1.4", "memchr", "regex-syntax", ] @@ -1500,6 +1542,35 @@ version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" +[[package]] +name = "rust-code-analysis" +version = "0.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92a0f85e044428a7b58538f95fa58a157d89d5bcc5b37df6e7024957e52bdc5a" +dependencies = [ + "aho-corasick 0.7.20", + "fxhash", + "lazy_static", + "num", + "num-derive 0.3.3", + "num-format", + "num-traits", + "petgraph", + "regex", + "serde", + "termcolor", + "tree-sitter", + "tree-sitter-ccomment", + "tree-sitter-java", + "tree-sitter-javascript", + "tree-sitter-mozcpp", + "tree-sitter-mozjs", + "tree-sitter-preproc", + "tree-sitter-python", + "tree-sitter-rust", + "tree-sitter-typescript", +] + [[package]] name = "rustc-demangle" version = "0.1.27" @@ -1722,20 +1793,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "sysinfo" -version = "0.38.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92ab6a2f8bfe508deb3c6406578252e491d299cbbf3bc0529ecc3313aee4a52f" -dependencies = [ - "libc", - "memchr", - "ntapi", - "objc2-core-foundation", - "objc2-io-kit", - "windows", -] - [[package]] name = "tempfile" version = "3.27.0" @@ -1749,6 +1806,15 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + [[package]] name = "thiserror" version = "2.0.18" @@ -1874,6 +1940,118 @@ dependencies = [ "tracing-log", ] +[[package]] +name = "tree-sitter" +version = "0.19.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f41201fed3db3b520405a9c01c61773a250d4c3f43e9861c14b2bb232c981ab" +dependencies = [ + "cc", + "regex", +] + +[[package]] +name = "tree-sitter-ccomment" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3b402bc539927bb457e5ab59aac7260e2c3b97c5fcfc043575788654eedd69a" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-cpp" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7bd90c7b7db59369ed00fbc40458d9c9b2b8ed145640e337e839ac07aa63e15" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-java" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "301ae2ee7813e1bf935dc06db947642400645bbea8878431e1b31131488d5430" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-javascript" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "840bb4d5f3c384cb76b976ff07297f5a24b6e61a708baa4464f53e395caaa5f9" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-mozcpp" +version = "0.19.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5439f32b7685af19efcd0165d28dab80261e1cc922ed259c9c7909c96ac4cc6" +dependencies = [ + "cc", + "tree-sitter", + "tree-sitter-cpp", +] + +[[package]] +name = "tree-sitter-mozjs" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "def6b21c10157d3d79b912191fa4549008885da827451a62be9f30abeb7319c8" +dependencies = [ + "cc", + "tree-sitter", + "tree-sitter-javascript", +] + +[[package]] +name = "tree-sitter-preproc" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "226b2a77578e83efa7a193919660ffc88c22e357f9c2d9f27b5b11898a8682d3" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-python" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5646bfe71c4eb1c21b714ce0c38334c311eab767095582859e85da6281e9fd6c" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-rust" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "784f7ef9cdbd4c895dc2d4bb785e95b4a5364a602eec803681db83d1927ddf15" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-typescript" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3f62d49c6e56bf291c412ee5e178ea14dff40f14a5f01a8847933f56d65bf3b" +dependencies = [ + "cc", + "tree-sitter", +] + [[package]] name = "twox-hash" version = "2.1.2" @@ -2038,46 +2216,12 @@ dependencies = [ ] [[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" +name = "winapi-util" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "windows" -version = "0.62.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "527fadee13e0c05939a6a05d5bd6eec6cd2e3dbd648b9f8e447c6518133d8580" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-collections", - "windows-core", - "windows-future", - "windows-numerics", -] - -[[package]] -name = "windows-collections" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23b2d95af1a8a14a3c7367e1ed4fc9c20e0a26e79551b1454d72583c97cc6610" -dependencies = [ - "windows-core", + "windows-sys 0.61.2", ] [[package]] @@ -2093,17 +2237,6 @@ dependencies = [ "windows-strings", ] -[[package]] -name = "windows-future" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1d6f90251fe18a279739e78025bd6ddc52a7e22f921070ccdc67dde84c605cb" -dependencies = [ - "windows-core", - "windows-link", - "windows-threading", -] - [[package]] name = "windows-implement" version = "0.60.2" @@ -2132,16 +2265,6 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" -[[package]] -name = "windows-numerics" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e2e40844ac143cdb44aead537bbf727de9b044e107a0f1220392177d15b0f26" -dependencies = [ - "windows-core", - "windows-link", -] - [[package]] name = "windows-result" version = "0.4.1" @@ -2194,15 +2317,6 @@ dependencies = [ "windows_x86_64_msvc", ] -[[package]] -name = "windows-threading" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3949bd5b99cafdf1c7ca86b43ca564028dfe27d66958f2470940f73d86d75b37" -dependencies = [ - "windows-link", -] - [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" diff --git a/jolt-eval/fuzz/Cargo.toml b/jolt-eval/fuzz/Cargo.toml index af96ee259..e4bdf4319 100644 --- a/jolt-eval/fuzz/Cargo.toml +++ b/jolt-eval/fuzz/Cargo.toml @@ -21,15 +21,16 @@ libfuzzer-sys = "0.4" jolt-eval = { path = ".." } [[bin]] -name = "split_eq_bind_low_high" -path = "fuzz_targets/split_eq_bind_low_high.rs" +name = "split_eq_bind_high_low" +path = "fuzz_targets/split_eq_bind_high_low.rs" test = false doc = false bench = false [[bin]] -name = "split_eq_bind_high_low" -path = "fuzz_targets/split_eq_bind_high_low.rs" +name = "split_eq_bind_low_high" +path = "fuzz_targets/split_eq_bind_low_high.rs" test = false doc = false bench = false + diff --git a/jolt-eval/src/lib.rs b/jolt-eval/src/lib.rs index 8bd6a1b77..80c3ef051 100644 --- a/jolt-eval/src/lib.rs +++ b/jolt-eval/src/lib.rs @@ -12,7 +12,7 @@ pub use guests::{GuestProgram, JoltDevice, ProofVerifyError}; pub use invariant::{ CheckError, Invariant, InvariantTargets, InvariantViolation, JoltInvariants, SynthesisTarget, }; -pub use objective::{AbstractObjective, Direction, MeasurementError, Objective}; +pub use objective::{AbstractObjective, Direction, MeasurementError, Objective, PerfObjective}; // Re-exports used by the #[invariant] proc macro generated code. pub use arbitrary; diff --git a/jolt-eval/src/objective/bind_bench.rs b/jolt-eval/src/objective/bind_bench.rs new file mode 100644 index 000000000..c66f95b94 --- /dev/null +++ b/jolt-eval/src/objective/bind_bench.rs @@ -0,0 +1,100 @@ +use ark_bn254::Fr; +use jolt_core::field::JoltField; +use jolt_core::poly::dense_mlpoly::DensePolynomial; +use jolt_core::poly::multilinear_polynomial::BindingOrder; + +use super::PerfObjective; + +type Challenge = ::Challenge; + +/// Number of variables for the benchmark polynomial (2^NUM_VARS evaluations). +const NUM_VARS: usize = 20; + +pub struct BindSetup { + /// Original evaluations (cloned into a fresh poly each iteration). + pub evals: Vec, + pub challenge: Challenge, +} + +/// Benchmark `DensePolynomial::bind_parallel` with `LowToHigh` binding. +#[derive(Default)] +pub struct BindLowToHighObjective; + +impl BindLowToHighObjective { + pub const NAME: &str = "bind_parallel_low_to_high"; +} + +impl PerfObjective for BindLowToHighObjective { + type Setup = BindSetup; + + fn name(&self) -> &str { + Self::NAME + } + + fn setup(&self) -> BindSetup { + let mut rng = rand::thread_rng(); + BindSetup { + evals: (0..1 << NUM_VARS) + .map(|_| Fr::random(&mut rng)) + .collect(), + challenge: Challenge::random(&mut rng), + } + } + + fn run(&self, setup: &mut BindSetup) { + let mut poly = DensePolynomial::new(setup.evals.clone()); + poly.bind_parallel(setup.challenge, BindingOrder::LowToHigh); + std::hint::black_box(poly); + } +} + +/// Benchmark `DensePolynomial::bind_parallel` with `HighToLow` binding. +#[derive(Default)] +pub struct BindHighToLowObjective; + +impl BindHighToLowObjective { + pub const NAME: &str = "bind_parallel_high_to_low"; +} + +impl PerfObjective for BindHighToLowObjective { + type Setup = BindSetup; + + fn name(&self) -> &str { + Self::NAME + } + + fn setup(&self) -> BindSetup { + let mut rng = rand::thread_rng(); + BindSetup { + evals: (0..1 << NUM_VARS) + .map(|_| Fr::random(&mut rng)) + .collect(), + challenge: Challenge::random(&mut rng), + } + } + + fn run(&self, setup: &mut BindSetup) { + let mut poly = DensePolynomial::new(setup.evals.clone()); + poly.bind_parallel(setup.challenge, BindingOrder::HighToLow); + std::hint::black_box(poly); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn bind_low_to_high_runs() { + let obj = BindLowToHighObjective; + let mut setup = obj.setup(); + obj.run(&mut setup); + } + + #[test] + fn bind_high_to_low_runs() { + let obj = BindHighToLowObjective; + let mut setup = obj.setup(); + obj.run(&mut setup); + } +} diff --git a/jolt-eval/src/objective/mod.rs b/jolt-eval/src/objective/mod.rs index 05411806b..9b426ec01 100644 --- a/jolt-eval/src/objective/mod.rs +++ b/jolt-eval/src/objective/mod.rs @@ -1,7 +1,9 @@ +pub mod bind_bench; pub mod cognitive; pub mod halstead_bugs; pub mod lloc; pub mod optimize; +pub mod synthesis; use std::fmt; use std::path::Path; @@ -42,7 +44,27 @@ pub trait AbstractObjective: Send + Sync { fn direction(&self) -> Direction; } -/// Centralized objective enum dispatching to concrete implementations. +/// A performance objective suitable for Criterion benchmarking. +/// +/// Separates setup (run once) from the hot path (run many times in +/// Criterion's `b.iter()` loop). Use the `bench_objective!` macro to +/// generate a Criterion benchmark harness from a `PerfObjective`. +pub trait PerfObjective: Default + Send + Sync { + type Setup: Send; + + fn name(&self) -> &str; + + /// One-time setup (e.g. allocate polynomial, generate challenges). + fn setup(&self) -> Self::Setup; + + /// The hot path to benchmark. Called repeatedly by Criterion. + fn run(&self, setup: &mut Self::Setup); +} + +/// Centralized enum for static-analysis objectives. +/// +/// Performance objectives are handled separately via Criterion benchmarks +/// (see `PerfObjective` and `bench_objective!`). pub enum Objective { Lloc(lloc::LlocObjective), CognitiveComplexity(cognitive::CognitiveComplexityObjective), @@ -83,6 +105,14 @@ impl Objective { } } +/// Names of all registered `PerfObjective` benchmarks. +pub fn perf_objective_names() -> &'static [&'static str] { + &[ + bind_bench::BindLowToHighObjective::NAME, + bind_bench::BindHighToLowObjective::NAME, + ] +} + /// Record of a single optimization attempt for post-hoc analysis. pub struct OptimizationAttempt { pub description: String, diff --git a/jolt-eval/src/objective/synthesis.rs b/jolt-eval/src/objective/synthesis.rs new file mode 100644 index 000000000..5c8c07341 --- /dev/null +++ b/jolt-eval/src/objective/synthesis.rs @@ -0,0 +1,29 @@ +/// Macro that generates a Criterion benchmark harness for a `PerfObjective`. +/// +/// Takes a concrete `PerfObjective` expression. Setup is performed once; +/// Criterion calls `run()` repeatedly with statistical rigor. +/// +/// # Usage +/// +/// ```ignore +/// // benches/bind_parallel_low_to_high.rs +/// use jolt_eval::objective::bind_bench::BindLowToHighObjective; +/// jolt_eval::bench_objective!(BindLowToHighObjective); +/// ``` +#[macro_export] +macro_rules! bench_objective { + ($obj_ty:ty) => { + use $crate::PerfObjective as _; + + fn __bench(c: &mut ::criterion::Criterion) { + let obj = <$obj_ty>::default(); + let mut setup = obj.setup(); + c.bench_function(obj.name(), |b| { + b.iter(|| obj.run(&mut setup)); + }); + } + + ::criterion::criterion_group!(benches, __bench); + ::criterion::criterion_main!(benches); + }; +} diff --git a/jolt-eval/sync_targets.sh b/jolt-eval/sync_targets.sh new file mode 100755 index 000000000..79ed96d1d --- /dev/null +++ b/jolt-eval/sync_targets.sh @@ -0,0 +1,172 @@ +#!/usr/bin/env bash +# +# Synchronizes fuzz targets and Criterion benchmarks with the invariant +# and objective definitions in jolt-eval source code. +# +# Run from the repo root: +# ./jolt-eval/sync_targets.sh +# +# Idempotent: running twice produces no changes. + +set -euo pipefail + +EVAL_DIR="$(cd "$(dirname "$0")" && pwd)" +FUZZ_DIR="$EVAL_DIR/fuzz" +BENCH_DIR="$EVAL_DIR/benches" + +# ── Helpers ────────────────────────────────────────────────────────── + +# Convert CamelCase to snake_case, stripping Invariant/Objective suffix +to_snake() { + echo "$1" \ + | sed 's/Invariant$//' \ + | sed 's/Objective$//' \ + | sed 's/\([A-Z]\)/_\1/g' \ + | sed 's/^_//' \ + | tr '[:upper:]' '[:lower:]' +} + +# ── Fuzz targets ───────────────────────────────────────────────────── + +echo "=== Syncing fuzz targets ===" + +mkdir -p "$FUZZ_DIR/fuzz_targets" + +# Find (snake_name, module_path, struct_name) for each fuzzable invariant +fuzz_entries="" +for file in "$EVAL_DIR"/src/invariant/*.rs; do + [ -f "$file" ] || continue + basename_rs=$(basename "$file" .rs) + [ "$basename_rs" = "mod" ] && continue + + # Look for #[invariant(...Fuzz...)] annotations + { grep -n 'invariant.*Fuzz' "$file" 2>/dev/null || true; } | while IFS=: read -r line _; do + struct=$(sed -n "$((line+1)),$((line+5))p" "$file" \ + | grep -o 'pub struct [A-Za-z_]*' | head -1 | awk '{print $3}') + [ -z "$struct" ] && continue + snake=$(to_snake "$struct") + echo "$snake invariant::${basename_rs}::${struct}" + done +done | sort -u > /tmp/jolt_fuzz_entries + +# Generate missing fuzz target files +while read -r snake mod_struct; do + [ -z "$snake" ] && continue + struct="${mod_struct##*::}" + target_file="$FUZZ_DIR/fuzz_targets/${snake}.rs" + if [ ! -f "$target_file" ]; then + echo " Creating fuzz target: $snake" + cat > "$target_file" </dev/null; then + echo " Removing stale fuzz target: $base" + rm "$f" + fi +done + +# Regenerate fuzz/Cargo.toml [[bin]] entries +{ + sed '/^\[\[bin\]\]/,$d' "$FUZZ_DIR/Cargo.toml" + while read -r snake _; do + [ -z "$snake" ] && continue + cat < "$FUZZ_DIR/Cargo.toml.tmp" +mv "$FUZZ_DIR/Cargo.toml.tmp" "$FUZZ_DIR/Cargo.toml" + +# ── Criterion benchmarks ───────────────────────────────────────────── + +echo "=== Syncing Criterion benchmarks ===" + +mkdir -p "$BENCH_DIR" + +# Find (bench_name, module_path, struct_name) for each PerfObjective +bench_entries="" +for file in "$EVAL_DIR"/src/objective/*.rs; do + [ -f "$file" ] || continue + basename_rs=$(basename "$file" .rs) + [ "$basename_rs" = "mod" ] && continue + + { grep -n 'impl PerfObjective for' "$file" 2>/dev/null || true; } | while IFS=: read -r _ rest; do + struct=$(echo "$rest" | grep -o 'for [A-Za-z_]*' | awk '{print $2}') + [ -z "$struct" ] && continue + # Try to find the NAME const + bench_name=$(grep -A5 "impl $struct" "$file" \ + | grep 'const NAME' | head -1 \ + | grep -o '"[^"]*"' | tr -d '"') || true + [ -z "$bench_name" ] && bench_name=$(to_snake "$struct" | sed 's/_objective$//') + echo "$bench_name objective::${basename_rs}::${struct}" + done +done | sort -u > /tmp/jolt_bench_entries + +# Generate missing bench files +while read -r name mod_struct; do + [ -z "$name" ] && continue + struct="${mod_struct##*::}" + bench_file="$BENCH_DIR/${name}.rs" + if [ ! -f "$bench_file" ]; then + echo " Creating benchmark: $name" + cat > "$bench_file" </dev/null; then + echo " Removing stale benchmark: $base" + rm "$f" + fi +done + +# Update Cargo.toml [[bench]] entries +CARGO_TOML="$EVAL_DIR/Cargo.toml" +tmpfile=$(mktemp) + +# Remove existing [[bench]] blocks +awk ' + /^\[\[bench\]\]/ { skip=1; next } + skip && /^$/ { skip=0; next } + skip && /^\[/ { skip=0 } + !skip { print } +' "$CARGO_TOML" > "$tmpfile" + +# Insert new [[bench]] entries before the first [[bin]] +{ + sed '/^\[\[bin\]\]/,$d' "$tmpfile" + while read -r name _; do + [ -z "$name" ] && continue + cat < "$CARGO_TOML" +rm -f "$tmpfile" /tmp/jolt_fuzz_entries /tmp/jolt_bench_entries + +echo "=== Done ===" From a8418a182a14f4912e6bb295670483c04521b165 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Fri, 3 Apr 2026 10:44:12 -0400 Subject: [PATCH 37/86] tinker with measure_objectives output --- jolt-eval/bin/measure_objectives.rs | 58 ++++++++++++++++------------- 1 file changed, 33 insertions(+), 25 deletions(-) diff --git a/jolt-eval/bin/measure_objectives.rs b/jolt-eval/bin/measure_objectives.rs index 08bc9dbde..cbaa2b55e 100644 --- a/jolt-eval/bin/measure_objectives.rs +++ b/jolt-eval/bin/measure_objectives.rs @@ -23,31 +23,6 @@ fn main() -> eyre::Result<()> { let repo_root = std::env::current_dir()?; - println!("{:<35} {:>15} {:>10}", "Objective", "Value", "Direction"); - println!("{}", "-".repeat(62)); - - // Static-analysis objectives - let objectives = Objective::all(&repo_root); - for obj in &objectives { - if let Some(ref name) = cli.objective { - if obj.name() != name.as_str() { - continue; - } - } - match obj.collect_measurement() { - Ok(val) => { - let dir = match obj.direction() { - jolt_eval::Direction::Minimize => "min", - jolt_eval::Direction::Maximize => "max", - }; - println!("{:<35} {:>15.6} {:>10}", obj.name(), val, dir); - } - Err(e) => { - println!("{:<35} {:>15}", obj.name(), format!("ERROR: {e}")); - } - } - } - // Performance objectives (from Criterion) if !cli.no_bench { let perf_names = perf_objective_names(); @@ -64,6 +39,11 @@ fn main() -> eyre::Result<()> { match status { Ok(s) if s.success() => { + println!( + "\n\n{:<35} {:>15} {:>10}", + "Objective", "Value", "Direction" + ); + println!("{}", "-".repeat(62)); for &name in perf_names { if let Some(ref filter) = cli.objective { if name != filter.as_str() { @@ -85,6 +65,34 @@ fn main() -> eyre::Result<()> { } } } + } else { + println!( + "\n\n{:<35} {:>15} {:>10}", + "Objective", "Value", "Direction" + ); + println!("{}", "-".repeat(62)); + } + + // Static-analysis objectives + let objectives = Objective::all(&repo_root); + for obj in &objectives { + if let Some(ref name) = cli.objective { + if obj.name() != name.as_str() { + continue; + } + } + match obj.collect_measurement() { + Ok(val) => { + let dir = match obj.direction() { + jolt_eval::Direction::Minimize => "min", + jolt_eval::Direction::Maximize => "max", + }; + println!("{:<35} {:>15.6} {:>10}", obj.name(), val, dir); + } + Err(e) => { + println!("{:<35} {:>15}", obj.name(), format!("ERROR: {e}")); + } + } } Ok(()) From fdff1724ccba5d5c304d39e7a76ab2dd2a9c2029 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Fri, 3 Apr 2026 10:53:32 -0400 Subject: [PATCH 38/86] feat(jolt-eval): add units() method to objective traits Add units() to AbstractObjective (default None), PerfObjective (default "s"), and Objective enum. LLOC reports "lines", perf objectives report "s", others show "-". measure-objectives table now includes a Units column. Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/bin/measure_objectives.rs | 33 +++++++++++++++++------------ jolt-eval/src/objective/lloc.rs | 4 ++++ jolt-eval/src/objective/mod.rs | 15 +++++++++++++ 3 files changed, 38 insertions(+), 14 deletions(-) diff --git a/jolt-eval/bin/measure_objectives.rs b/jolt-eval/bin/measure_objectives.rs index cbaa2b55e..490b0afd3 100644 --- a/jolt-eval/bin/measure_objectives.rs +++ b/jolt-eval/bin/measure_objectives.rs @@ -17,6 +17,18 @@ struct Cli { no_bench: bool, } +fn print_header() { + println!( + "{:<35} {:>15} {:>8} {:>10}", + "Objective", "Value", "Units", "Direction" + ); + println!("{}", "-".repeat(70)); +} + +fn print_row(name: &str, val: f64, units: &str, dir: &str) { + println!("{:<35} {:>15.6} {:>8} {:>10}", name, val, units, dir); +} + fn main() -> eyre::Result<()> { tracing_subscriber::fmt::init(); let cli = Cli::parse(); @@ -39,11 +51,8 @@ fn main() -> eyre::Result<()> { match status { Ok(s) if s.success() => { - println!( - "\n\n{:<35} {:>15} {:>10}", - "Objective", "Value", "Direction" - ); - println!("{}", "-".repeat(62)); + println!(); + print_header(); for &name in perf_names { if let Some(ref filter) = cli.objective { if name != filter.as_str() { @@ -51,9 +60,7 @@ fn main() -> eyre::Result<()> { } } match read_criterion_estimate(name) { - Some(secs) => { - println!("{:<35} {:>15.6} {:>10}", name, secs, "min"); - } + Some(secs) => print_row(name, secs, "s", "min"), None => { println!("{:<35} {:>15}", name, "NO DATA"); } @@ -66,11 +73,8 @@ fn main() -> eyre::Result<()> { } } } else { - println!( - "\n\n{:<35} {:>15} {:>10}", - "Objective", "Value", "Direction" - ); - println!("{}", "-".repeat(62)); + println!(); + print_header(); } // Static-analysis objectives @@ -87,7 +91,8 @@ fn main() -> eyre::Result<()> { jolt_eval::Direction::Minimize => "min", jolt_eval::Direction::Maximize => "max", }; - println!("{:<35} {:>15.6} {:>10}", obj.name(), val, dir); + let units = obj.units().unwrap_or("-"); + print_row(obj.name(), val, units, dir); } Err(e) => { println!("{:<35} {:>15}", obj.name(), format!("ERROR: {e}")); diff --git a/jolt-eval/src/objective/lloc.rs b/jolt-eval/src/objective/lloc.rs index 2d27d2a62..50f7b786f 100644 --- a/jolt-eval/src/objective/lloc.rs +++ b/jolt-eval/src/objective/lloc.rs @@ -37,6 +37,10 @@ impl AbstractObjective for LlocObjective { fn direction(&self) -> Direction { Direction::Minimize } + + fn units(&self) -> Option<&str> { + Some("lines") + } } pub(crate) fn rust_files(dir: &Path) -> Result, MeasurementError> { diff --git a/jolt-eval/src/objective/mod.rs b/jolt-eval/src/objective/mod.rs index 9b426ec01..73b5da5f2 100644 --- a/jolt-eval/src/objective/mod.rs +++ b/jolt-eval/src/objective/mod.rs @@ -42,6 +42,9 @@ pub trait AbstractObjective: Send + Sync { fn name(&self) -> &str; fn collect_measurement(&self) -> Result; fn direction(&self) -> Direction; + fn units(&self) -> Option<&str> { + None + } } /// A performance objective suitable for Criterion benchmarking. @@ -59,6 +62,10 @@ pub trait PerfObjective: Default + Send + Sync { /// The hot path to benchmark. Called repeatedly by Criterion. fn run(&self, setup: &mut Self::Setup); + + fn units(&self) -> &str { + "s" + } } /// Centralized enum for static-analysis objectives. @@ -96,6 +103,14 @@ impl Objective { } } + pub fn units(&self) -> Option<&str> { + match self { + Self::Lloc(o) => o.units(), + Self::CognitiveComplexity(o) => o.units(), + Self::HalsteadBugs(o) => o.units(), + } + } + pub fn direction(&self) -> Direction { match self { Self::Lloc(o) => o.direction(), From 44ca2379c4fa738e16cdbc05c028e877c31237ef Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Fri, 3 Apr 2026 12:04:16 -0400 Subject: [PATCH 39/86] feat(jolt-eval): use iter_batched and --save-baseline for benchmarks Switch bench_objective! macro from iter() to iter_batched() with BatchSize::LargeInput, so per-iteration setup (poly clone) is excluded from measurement. PerfObjective::run now takes owned Setup. bind_bench uses thread_local shared state to generate random evals once, then clones per iteration in setup(). measure-objectives uses --quick for fast feedback. optimize binary uses --save-baseline optimize to enable Criterion's change detection across iterations. Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/bin/measure_objectives.rs | 2 +- jolt-eval/bin/optimize.rs | 74 ++++++++++++++++++++++++--- jolt-eval/src/objective/bind_bench.rs | 74 +++++++++++++++++---------- jolt-eval/src/objective/mod.rs | 8 +-- jolt-eval/src/objective/synthesis.rs | 11 ++-- 5 files changed, 127 insertions(+), 42 deletions(-) diff --git a/jolt-eval/bin/measure_objectives.rs b/jolt-eval/bin/measure_objectives.rs index 490b0afd3..f8bd560b1 100644 --- a/jolt-eval/bin/measure_objectives.rs +++ b/jolt-eval/bin/measure_objectives.rs @@ -46,7 +46,7 @@ fn main() -> eyre::Result<()> { if run_bench { eprintln!("Running Criterion benchmarks..."); let status = std::process::Command::new("cargo") - .args(["bench", "-p", "jolt-eval"]) + .args(["bench", "-p", "jolt-eval", "--", "--quick"]) .status(); match status { diff --git a/jolt-eval/bin/optimize.rs b/jolt-eval/bin/optimize.rs index 4db879e59..56c6977bd 100644 --- a/jolt-eval/bin/optimize.rs +++ b/jolt-eval/bin/optimize.rs @@ -1,4 +1,5 @@ use std::collections::HashMap; +use std::path::Path; use std::process::Command; use clap::Parser; @@ -6,7 +7,7 @@ use clap::Parser; use jolt_eval::agent::ClaudeCodeAgent; use jolt_eval::invariant::JoltInvariants; use jolt_eval::objective::optimize::{auto_optimize, OptimizeConfig, OptimizeEnv}; -use jolt_eval::objective::{Direction, Objective}; +use jolt_eval::objective::{perf_objective_names, Direction, Objective}; #[derive(Parser)] #[command(name = "optimize")] @@ -37,17 +38,46 @@ struct RealEnv { objectives: Vec, invariants: Vec, repo_dir: std::path::PathBuf, + /// Whether to include perf benchmarks in measurements. + bench_perf: bool, } impl OptimizeEnv for RealEnv { fn measure(&mut self) -> HashMap { - self.objectives + let mut results: HashMap = self + .objectives .iter() .filter_map(|o| { let name = o.name().to_string(); o.collect_measurement().ok().map(|v| (name, v)) }) - .collect() + .collect(); + + if self.bench_perf { + // Run Criterion with --save-baseline to enable comparison + let status = Command::new("cargo") + .current_dir(&self.repo_dir) + .args([ + "bench", + "-p", + "jolt-eval", + "--", + "--quick", + "--save-baseline", + "optimize", + ]) + .status(); + + if matches!(status, Ok(s) if s.success()) { + for &name in perf_objective_names() { + if let Some(secs) = read_criterion_estimate(name) { + results.insert(name.to_string(), secs); + } + } + } + } + + results } fn check_invariants(&mut self) -> bool { @@ -58,10 +88,19 @@ impl OptimizeEnv for RealEnv { } fn directions(&self) -> HashMap { - self.objectives + let mut dirs: HashMap = self + .objectives .iter() .map(|o| (o.name().to_string(), o.direction())) - .collect() + .collect(); + + if self.bench_perf { + for &name in perf_objective_names() { + dirs.insert(name.to_string(), Direction::Minimize); + } + } + + dirs } fn apply_diff(&mut self, diff: &str) { @@ -101,6 +140,7 @@ fn main() -> eyre::Result<()> { let all_names: Vec = all_objectives .iter() .map(|o| o.name().to_string()) + .chain(perf_objective_names().iter().map(|s| s.to_string())) .collect(); let filter_names: Option> = cli @@ -108,6 +148,12 @@ fn main() -> eyre::Result<()> { .as_ref() .map(|s| s.split(',').map(|n| n.trim().to_string()).collect()); + let bench_perf = filter_names.as_ref().is_none_or(|names| { + perf_objective_names() + .iter() + .any(|p| names.contains(&p.to_string())) + }); + let objectives: Vec = if let Some(names) = &filter_names { all_objectives .into_iter() @@ -117,7 +163,7 @@ fn main() -> eyre::Result<()> { all_objectives }; - if objectives.is_empty() { + if objectives.is_empty() && !bench_perf { eprintln!( "No matching objectives. Available: {}", all_names.join(", ") @@ -131,6 +177,7 @@ fn main() -> eyre::Result<()> { objectives, invariants, repo_dir: repo_dir.clone(), + bench_perf, }; println!("=== Baseline measurements ==="); @@ -175,12 +222,23 @@ fn print_measurements( for name in names { let val = measurements .get(name) - .map(|v| format!("{v:.4}")) + .map(|v| format!("{v:.6}")) .unwrap_or_else(|| "N/A".to_string()); let dir = match directions[name] { Direction::Minimize => "min", Direction::Maximize => "max", }; - println!(" {:<30} {:>15} {:>6}", name, val, dir); + println!(" {:<35} {:>15} {:>6}", name, val, dir); } } + +fn read_criterion_estimate(bench_name: &str) -> Option { + let path = Path::new("target/criterion") + .join(bench_name) + .join("optimize") + .join("estimates.json"); + let data = std::fs::read_to_string(path).ok()?; + let json: serde_json::Value = serde_json::from_str(&data).ok()?; + let nanos = json.get("mean")?.get("point_estimate")?.as_f64()?; + Some(nanos / 1e9) +} diff --git a/jolt-eval/src/objective/bind_bench.rs b/jolt-eval/src/objective/bind_bench.rs index c66f95b94..d20949c2b 100644 --- a/jolt-eval/src/objective/bind_bench.rs +++ b/jolt-eval/src/objective/bind_bench.rs @@ -10,12 +10,37 @@ type Challenge = ::Challenge; /// Number of variables for the benchmark polynomial (2^NUM_VARS evaluations). const NUM_VARS: usize = 20; +/// Per-iteration state: a fresh polynomial and a challenge to bind with. pub struct BindSetup { - /// Original evaluations (cloned into a fresh poly each iteration). - pub evals: Vec, + pub poly: DensePolynomial, pub challenge: Challenge, } +/// Shared state used to produce per-iteration setups cheaply. +struct BindShared { + evals: Vec, + challenge: Challenge, +} + +impl BindShared { + fn new() -> Self { + let mut rng = rand::thread_rng(); + Self { + evals: (0..1 << NUM_VARS) + .map(|_| Fr::random(&mut rng)) + .collect(), + challenge: Challenge::random(&mut rng), + } + } + + fn make_setup(&self) -> BindSetup { + BindSetup { + poly: DensePolynomial::new(self.evals.clone()), + challenge: self.challenge, + } + } +} + /// Benchmark `DensePolynomial::bind_parallel` with `LowToHigh` binding. #[derive(Default)] pub struct BindLowToHighObjective; @@ -32,19 +57,18 @@ impl PerfObjective for BindLowToHighObjective { } fn setup(&self) -> BindSetup { - let mut rng = rand::thread_rng(); - BindSetup { - evals: (0..1 << NUM_VARS) - .map(|_| Fr::random(&mut rng)) - .collect(), - challenge: Challenge::random(&mut rng), + // Thread-local shared state so we only generate random evals once. + thread_local! { + static SHARED: BindShared = BindShared::new(); } + SHARED.with(|s| s.make_setup()) } - fn run(&self, setup: &mut BindSetup) { - let mut poly = DensePolynomial::new(setup.evals.clone()); - poly.bind_parallel(setup.challenge, BindingOrder::LowToHigh); - std::hint::black_box(poly); + fn run(&self, mut setup: BindSetup) { + setup + .poly + .bind_parallel(setup.challenge, BindingOrder::LowToHigh); + std::hint::black_box(&setup.poly); } } @@ -64,19 +88,17 @@ impl PerfObjective for BindHighToLowObjective { } fn setup(&self) -> BindSetup { - let mut rng = rand::thread_rng(); - BindSetup { - evals: (0..1 << NUM_VARS) - .map(|_| Fr::random(&mut rng)) - .collect(), - challenge: Challenge::random(&mut rng), + thread_local! { + static SHARED: BindShared = BindShared::new(); } + SHARED.with(|s| s.make_setup()) } - fn run(&self, setup: &mut BindSetup) { - let mut poly = DensePolynomial::new(setup.evals.clone()); - poly.bind_parallel(setup.challenge, BindingOrder::HighToLow); - std::hint::black_box(poly); + fn run(&self, mut setup: BindSetup) { + setup + .poly + .bind_parallel(setup.challenge, BindingOrder::HighToLow); + std::hint::black_box(&setup.poly); } } @@ -87,14 +109,14 @@ mod tests { #[test] fn bind_low_to_high_runs() { let obj = BindLowToHighObjective; - let mut setup = obj.setup(); - obj.run(&mut setup); + let setup = obj.setup(); + obj.run(setup); } #[test] fn bind_high_to_low_runs() { let obj = BindHighToLowObjective; - let mut setup = obj.setup(); - obj.run(&mut setup); + let setup = obj.setup(); + obj.run(setup); } } diff --git a/jolt-eval/src/objective/mod.rs b/jolt-eval/src/objective/mod.rs index 73b5da5f2..5309b5464 100644 --- a/jolt-eval/src/objective/mod.rs +++ b/jolt-eval/src/objective/mod.rs @@ -57,11 +57,13 @@ pub trait PerfObjective: Default + Send + Sync { fn name(&self) -> &str; - /// One-time setup (e.g. allocate polynomial, generate challenges). + /// Per-iteration setup (e.g. clone a polynomial). Called by + /// `iter_batched` before each measured iteration. fn setup(&self) -> Self::Setup; - /// The hot path to benchmark. Called repeatedly by Criterion. - fn run(&self, setup: &mut Self::Setup); + /// The hot path to benchmark. Takes owned setup so the clone cost + /// is excluded from measurement via `iter_batched`. + fn run(&self, setup: Self::Setup); fn units(&self) -> &str { "s" diff --git a/jolt-eval/src/objective/synthesis.rs b/jolt-eval/src/objective/synthesis.rs index 5c8c07341..34a78f66c 100644 --- a/jolt-eval/src/objective/synthesis.rs +++ b/jolt-eval/src/objective/synthesis.rs @@ -1,7 +1,7 @@ /// Macro that generates a Criterion benchmark harness for a `PerfObjective`. /// -/// Takes a concrete `PerfObjective` expression. Setup is performed once; -/// Criterion calls `run()` repeatedly with statistical rigor. +/// Uses `iter_batched` with `BatchSize::LargeInput` so that per-iteration +/// setup (e.g. polynomial clone) is excluded from the measurement. /// /// # Usage /// @@ -17,9 +17,12 @@ macro_rules! bench_objective { fn __bench(c: &mut ::criterion::Criterion) { let obj = <$obj_ty>::default(); - let mut setup = obj.setup(); c.bench_function(obj.name(), |b| { - b.iter(|| obj.run(&mut setup)); + b.iter_batched( + || obj.setup(), + |setup| obj.run(setup), + ::criterion::BatchSize::LargeInput, + ); }); } From b4d5e3d687fe55caf6dbbcfc1cd6916edf3b2f02 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Fri, 3 Apr 2026 12:45:26 -0400 Subject: [PATCH 40/86] fix(jolt-eval): run Criterion benchmarks individually with --bench flag Pass --bench to cargo bench for each perf objective so that Criterion-specific args (--quick, --save-baseline) don't get forwarded to the built-in test harness which doesn't understand them. Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/bin/measure_objectives.rs | 46 ++++++++++++++++------------- jolt-eval/bin/optimize.rs | 33 +++++++++++---------- 2 files changed, 43 insertions(+), 36 deletions(-) diff --git a/jolt-eval/bin/measure_objectives.rs b/jolt-eval/bin/measure_objectives.rs index f8bd560b1..80368ce92 100644 --- a/jolt-eval/bin/measure_objectives.rs +++ b/jolt-eval/bin/measure_objectives.rs @@ -45,31 +45,37 @@ fn main() -> eyre::Result<()> { if run_bench { eprintln!("Running Criterion benchmarks..."); - let status = std::process::Command::new("cargo") - .args(["bench", "-p", "jolt-eval", "--", "--quick"]) - .status(); + let mut any_succeeded = false; + for &name in perf_names { + if let Some(ref filter) = cli.objective { + if name != filter.as_str() { + continue; + } + } + let status = std::process::Command::new("cargo") + .args(["bench", "-p", "jolt-eval", "--bench", name, "--", "--quick"]) + .status(); + if matches!(status, Ok(s) if s.success()) { + any_succeeded = true; + } + } - match status { - Ok(s) if s.success() => { - println!(); - print_header(); - for &name in perf_names { - if let Some(ref filter) = cli.objective { - if name != filter.as_str() { - continue; - } + if any_succeeded { + println!(); + print_header(); + for &name in perf_names { + if let Some(ref filter) = cli.objective { + if name != filter.as_str() { + continue; } - match read_criterion_estimate(name) { - Some(secs) => print_row(name, secs, "s", "min"), - None => { - println!("{:<35} {:>15}", name, "NO DATA"); - } + } + match read_criterion_estimate(name) { + Some(secs) => print_row(name, secs, "s", "min"), + None => { + println!("{:<35} {:>15}", name, "NO DATA"); } } } - _ => { - eprintln!("cargo bench failed; skipping perf objectives"); - } } } } else { diff --git a/jolt-eval/bin/optimize.rs b/jolt-eval/bin/optimize.rs index 56c6977bd..b1a1dc12f 100644 --- a/jolt-eval/bin/optimize.rs +++ b/jolt-eval/bin/optimize.rs @@ -54,22 +54,23 @@ impl OptimizeEnv for RealEnv { .collect(); if self.bench_perf { - // Run Criterion with --save-baseline to enable comparison - let status = Command::new("cargo") - .current_dir(&self.repo_dir) - .args([ - "bench", - "-p", - "jolt-eval", - "--", - "--quick", - "--save-baseline", - "optimize", - ]) - .status(); - - if matches!(status, Ok(s) if s.success()) { - for &name in perf_objective_names() { + for &name in perf_objective_names() { + let status = Command::new("cargo") + .current_dir(&self.repo_dir) + .args([ + "bench", + "-p", + "jolt-eval", + "--bench", + name, + "--", + "--quick", + "--save-baseline", + "optimize", + ]) + .status(); + + if matches!(status, Ok(s) if s.success()) { if let Some(secs) = read_criterion_estimate(name) { results.insert(name.to_string(), secs); } From 52c2d248a7cdf4f6c7daa1c3a86876d8d1f2635d Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Fri, 3 Apr 2026 15:27:03 -0400 Subject: [PATCH 41/86] refactor(jolt-eval): add DiffScope, enrich_input, capture worktree diffs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add DiffScope enum (All, Include, Exclude) to AgentHarness::invoke and invoke_structured so callers explicitly control which files appear in the captured diff: - optimize: Exclude(["jolt-eval/"]) — agent can't modify eval framework - redteam: Include(["jolt-eval/guest-sandbox/"]) — only sandbox changes Add Invariant::enrich_input (default no-op) for post-processing deserialized input with the worktree diff. SoundnessInvariant overrides it to fill the patch field from the diff when the agent edited files directly instead of providing a patch in JSON. Extract capture_diff helper in claude.rs that translates DiffScope to git pathspecs. Update soundness description to reflect that the harness captures patches automatically. Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/src/agent/claude.rs | 58 ++++++++++++++------ jolt-eval/src/agent/mock.rs | 9 ++- jolt-eval/src/agent/mod.rs | 32 ++++++++--- jolt-eval/src/invariant/mod.rs | 9 +++ jolt-eval/src/invariant/soundness.rs | 32 +++++++---- jolt-eval/src/invariant/synthesis/redteam.rs | 10 +++- jolt-eval/src/objective/optimize.rs | 5 +- jolt-eval/tests/agent_test.rs | 47 +++++++++------- 8 files changed, 139 insertions(+), 63 deletions(-) diff --git a/jolt-eval/src/agent/claude.rs b/jolt-eval/src/agent/claude.rs index 6bb032431..e60ea1cc3 100644 --- a/jolt-eval/src/agent/claude.rs +++ b/jolt-eval/src/agent/claude.rs @@ -1,7 +1,7 @@ use std::path::{Path, PathBuf}; use std::process::Command; -use super::{AgentError, AgentHarness, AgentResponse}; +use super::{AgentError, AgentHarness, AgentResponse, DiffScope}; /// Agent implementation that invokes the Claude Code CLI in an isolated /// git worktree. @@ -55,26 +55,18 @@ impl ClaudeCodeAgent { } impl AgentHarness for ClaudeCodeAgent { - fn invoke(&self, repo_dir: &Path, prompt: &str) -> Result { + fn invoke( + &self, + repo_dir: &Path, + prompt: &str, + diff_scope: &DiffScope, + ) -> Result { let worktree_dir = create_worktree(repo_dir)?; tracing::info!("Created worktree at {}", worktree_dir.display()); let result = self.run_cli(&worktree_dir, prompt, &[], true); - // Capture diff before cleanup - let diff = Command::new("git") - .current_dir(&worktree_dir) - .args(["diff", "HEAD"]) - .output() - .ok() - .and_then(|o| { - let s = String::from_utf8_lossy(&o.stdout).to_string(); - if s.trim().is_empty() { - None - } else { - Some(s) - } - }); + let diff = capture_diff(&worktree_dir, diff_scope); tracing::info!("Cleaning up worktree..."); remove_worktree(repo_dir, &worktree_dir); @@ -109,6 +101,7 @@ impl AgentHarness for ClaudeCodeAgent { repo_dir: &Path, prompt: &str, schema: &serde_json::Value, + diff_scope: &DiffScope, ) -> Result { let worktree_dir = create_worktree(repo_dir)?; tracing::info!("Created worktree at {}", worktree_dir.display()); @@ -123,6 +116,8 @@ impl AgentHarness for ClaudeCodeAgent { false, ); + let diff = capture_diff(&worktree_dir, diff_scope); + tracing::info!("Cleaning up worktree..."); remove_worktree(repo_dir, &worktree_dir); let _ = std::fs::remove_dir_all(&worktree_dir); @@ -177,8 +172,37 @@ impl AgentHarness for ClaudeCodeAgent { )); }; - Ok(AgentResponse { text, diff: None }) + Ok(AgentResponse { text, diff }) + } +} + +/// Capture a unified diff of changes in a worktree relative to HEAD, +/// filtered by the given [`DiffScope`]. +fn capture_diff(worktree_dir: &Path, scope: &DiffScope) -> Option { + let mut cmd = Command::new("git"); + cmd.current_dir(worktree_dir) + .args(["diff", "HEAD", "--"]); + match scope { + DiffScope::All => {} + DiffScope::Include(paths) => { + for p in paths { + cmd.arg(p); + } + } + DiffScope::Exclude(paths) => { + for p in paths { + cmd.arg(format!(":!{p}")); + } + } } + cmd.output().ok().and_then(|o| { + let s = String::from_utf8_lossy(&o.stdout).to_string(); + if s.trim().is_empty() { + None + } else { + Some(s) + } + }) } /// Create an isolated detached git worktree from `repo_dir`. diff --git a/jolt-eval/src/agent/mock.rs b/jolt-eval/src/agent/mock.rs index a814dcbfe..8fad2804a 100644 --- a/jolt-eval/src/agent/mock.rs +++ b/jolt-eval/src/agent/mock.rs @@ -1,6 +1,6 @@ use std::path::Path; -use super::{AgentError, AgentHarness, AgentResponse}; +use super::{AgentError, AgentHarness, AgentResponse, DiffScope}; /// A mock agent for testing. Returns pre-configured responses and records /// every prompt it receives. @@ -64,7 +64,12 @@ impl MockAgent { } impl AgentHarness for MockAgent { - fn invoke(&self, _repo_dir: &Path, prompt: &str) -> Result { + fn invoke( + &self, + _repo_dir: &Path, + prompt: &str, + _diff_scope: &DiffScope, + ) -> Result { self.prompts.lock().unwrap().push(prompt.to_string()); let mut responses = self.responses.lock().unwrap(); diff --git a/jolt-eval/src/agent/mod.rs b/jolt-eval/src/agent/mod.rs index ecb9ec663..19d370fb2 100644 --- a/jolt-eval/src/agent/mod.rs +++ b/jolt-eval/src/agent/mod.rs @@ -38,28 +38,42 @@ impl AgentError { } } +/// Git pathspec filter for controlling which files appear in the +/// captured diff after an agent run. +pub enum DiffScope { + /// Capture all changes. + All, + /// Only include changes under these paths. + Include(Vec), + /// Include everything except changes under these paths. + Exclude(Vec), +} + /// A coding agent that can analyze or modify a repository given a prompt. /// -/// Implementors are responsible for their own isolation strategy (worktrees, -/// containers, API calls, etc.). The `repo_dir` parameter indicates the -/// repository root so the agent can set up whatever sandbox it needs. +/// The `diff_scope` parameter controls which file changes are captured +/// in `AgentResponse::diff` after the agent finishes. pub trait AgentHarness: Send + Sync { - fn invoke(&self, repo_dir: &Path, prompt: &str) -> Result; + /// Invoke the agent with a prompt. The agent can read and modify + /// files in its worktree; changes matching `diff_scope` are captured. + fn invoke( + &self, + repo_dir: &Path, + prompt: &str, + diff_scope: &DiffScope, + ) -> Result; /// Invoke the agent with a JSON Schema constraint on the response. /// - /// Agents that support structured output (e.g. Claude Code with - /// `--output-format json --json-schema`) should override this to - /// guarantee the response conforms to `schema`. - /// /// The default falls back to [`invoke`](Self::invoke). fn invoke_structured( &self, repo_dir: &Path, prompt: &str, _schema: &serde_json::Value, + diff_scope: &DiffScope, ) -> Result { - self.invoke(repo_dir, prompt) + self.invoke(repo_dir, prompt, diff_scope) } } diff --git a/jolt-eval/src/invariant/mod.rs b/jolt-eval/src/invariant/mod.rs index 0be7098dd..56898f9f8 100644 --- a/jolt-eval/src/invariant/mod.rs +++ b/jolt-eval/src/invariant/mod.rs @@ -106,6 +106,15 @@ pub trait Invariant: Send + Sync { fn seed_corpus(&self) -> Vec { vec![] } + + /// Post-process a deserialized input with context from the agent run. + /// + /// Called by the red-team harness after deserializing the agent's JSON + /// counterexample. The `diff` is the unified diff captured from the + /// agent's worktree (if any files were modified). The default is a no-op. + fn enrich_input(&self, input: Self::Input, _diff: Option<&str>) -> Self::Input { + input + } } /// Declares which synthesis targets an invariant supports. diff --git a/jolt-eval/src/invariant/soundness.rs b/jolt-eval/src/invariant/soundness.rs index d73695299..3a1ab58b9 100644 --- a/jolt-eval/src/invariant/soundness.rs +++ b/jolt-eval/src/invariant/soundness.rs @@ -125,20 +125,21 @@ impl Invariant for SoundnessInvariant { A counterexample is a guest patch + input + dishonest (output, panic) \ claim that the verifier incorrectly accepts. \ For full context, read the invariant file: jolt-eval/src/invariant/soundness.rs \n\n\ - ### Guest sandbox\n\n\ + ## Guest sandbox\n\n\ The guest template is at `jolt-eval/guest-sandbox/`. It contains:\n\ - `Cargo.toml` — depends on `jolt-sdk`\n\ - `src/lib.rs` — the `#[jolt::provable]` function (main patch target)\n\ - `src/main.rs` — no_main entry point (rarely needs patching)\n\n\ - ### Producing a patch\n\n\ - To produce the `patch` field, modify files inside `jolt-eval/guest-sandbox/` \ - and run `git diff` **from the `jolt-eval/guest-sandbox/` directory**:\n\ - ```\n\ - cd jolt-eval/guest-sandbox && git diff\n\ - ```\n\ - The patch is applied with `git apply` from the same directory. \ - Hunks referencing paths with `..` are filtered out.\n\n\ - ### Limits\n\n\ + ## Producing a patch\n\n\ + Simply edit the files inside `jolt-eval/guest-sandbox/` directly. \ + The harness automatically captures your changes as a `git diff` \ + from the worktree before cleanup and uses it as the patch. \ + You do NOT need to put the patch in the JSON counterexample — \ + leave the `patch` field empty and the harness fills it in.\n\n\ + Alternatively, you can provide a patch explicitly in the JSON \ + `patch` field. If non-empty, it takes precedence over the \ + worktree diff. Hunks referencing paths with `..` are filtered out.\n\n\ + ## Limits\n\n\ Memory config: max_input_size <= {MAX_INPUT_SIZE}, \ max_output_size <= {MAX_OUTPUT_SIZE}, \ stack_size <= {MAX_STACK_SIZE}, heap_size <= {MAX_HEAP_SIZE}. \ @@ -227,6 +228,17 @@ impl Invariant for SoundnessInvariant { claimed_panic: false, }] } + + /// If the agent modified `guest-sandbox/` in its worktree, use that + /// diff as the patch (unless the agent already provided one in JSON). + fn enrich_input(&self, mut input: SoundnessInput, diff: Option<&str>) -> SoundnessInput { + if input.patch.trim().is_empty() { + if let Some(diff) = diff { + input.patch = diff.to_string(); + } + } + input + } } /// RAII guard that reverts a patch on drop via `git checkout`. diff --git a/jolt-eval/src/invariant/synthesis/redteam.rs b/jolt-eval/src/invariant/synthesis/redteam.rs index b2bb6d08b..9b4488691 100644 --- a/jolt-eval/src/invariant/synthesis/redteam.rs +++ b/jolt-eval/src/invariant/synthesis/redteam.rs @@ -1,7 +1,7 @@ use std::path::Path; use super::super::{CheckError, FailedAttempt, Invariant}; -use crate::agent::AgentHarness; +use crate::agent::{AgentHarness, DiffScope}; /// Result of a red-team session. pub enum RedTeamResult { @@ -63,7 +63,9 @@ pub fn auto_redteam( &failed_attempts, ); - let response = match agent.invoke_structured(repo_dir, &prompt, &envelope_schema) { + let diff_scope = DiffScope::Include(vec!["jolt-eval/guest-sandbox/".into()]); + let response = + match agent.invoke_structured(repo_dir, &prompt, &envelope_schema, &diff_scope) { Ok(r) => r, Err(e) => { tracing::warn!("Agent invocation failed: {e}"); @@ -109,6 +111,10 @@ pub fn auto_redteam( } }; + // Let the invariant fill in fields from the agent's worktree diff + // (e.g. SoundnessInvariant uses it to populate the patch field). + let input = invariant.enrich_input(input, response.diff.as_deref()); + match invariant.check(&setup, input) { Ok(()) => { failed_attempts.push(FailedAttempt { diff --git a/jolt-eval/src/objective/optimize.rs b/jolt-eval/src/objective/optimize.rs index 20e3e0786..ffc950743 100644 --- a/jolt-eval/src/objective/optimize.rs +++ b/jolt-eval/src/objective/optimize.rs @@ -1,7 +1,7 @@ use std::collections::HashMap; use std::path::Path; -use crate::agent::{truncate, AgentHarness}; +use crate::agent::{truncate, AgentHarness, DiffScope}; use crate::objective::{Direction, OptimizationAttempt}; /// Configuration for an optimization run. @@ -73,7 +73,8 @@ pub fn auto_optimize( for iteration in 0..config.num_iterations { let prompt = build_optimize_prompt(&directions, &best, &attempts, config.hint.as_deref()); - let response = match agent.invoke(repo_dir, &prompt) { + let diff_scope = DiffScope::Exclude(vec!["jolt-eval/".into()]); + let response = match agent.invoke(repo_dir, &prompt, &diff_scope) { Ok(r) => r, Err(e) => { tracing::info!("Agent error: {e}"); diff --git a/jolt-eval/tests/agent_test.rs b/jolt-eval/tests/agent_test.rs index 955d57505..a85ef418c 100644 --- a/jolt-eval/tests/agent_test.rs +++ b/jolt-eval/tests/agent_test.rs @@ -2,7 +2,7 @@ use std::collections::HashMap; use std::path::Path; use enumset::EnumSet; -use jolt_eval::agent::{AgentError, AgentHarness, AgentResponse, MockAgent}; +use jolt_eval::agent::{AgentError, AgentHarness, AgentResponse, DiffScope, MockAgent}; use jolt_eval::invariant::synthesis::redteam::{auto_redteam, RedTeamConfig, RedTeamResult}; use jolt_eval::invariant::{ CheckError, Invariant, InvariantTargets, InvariantViolation, SynthesisTarget, @@ -104,7 +104,7 @@ impl Invariant for FailsOnZeroInvariant { #[test] fn mock_always_ok_returns_text() { let agent = MockAgent::always_ok("hello world"); - let resp = agent.invoke(Path::new("/tmp"), "test prompt").unwrap(); + let resp = agent.invoke(Path::new("/tmp"), "test prompt", &DiffScope::All).unwrap(); assert_eq!(resp.text, "hello world"); assert!(resp.diff.is_none()); } @@ -112,16 +112,16 @@ fn mock_always_ok_returns_text() { #[test] fn mock_always_err_returns_error() { let agent = MockAgent::always_err("boom"); - let err = agent.invoke(Path::new("/tmp"), "test").unwrap_err(); + let err = agent.invoke(Path::new("/tmp"), "test", &DiffScope::All).unwrap_err(); assert_eq!(err.message, "boom"); } #[test] fn mock_records_prompts() { let agent = MockAgent::always_ok("ok"); - agent.invoke(Path::new("/tmp"), "prompt 1").unwrap(); - agent.invoke(Path::new("/tmp"), "prompt 2").unwrap(); - agent.invoke(Path::new("/tmp"), "prompt 3").unwrap(); + agent.invoke(Path::new("/tmp"), "prompt 1", &DiffScope::All).unwrap(); + agent.invoke(Path::new("/tmp"), "prompt 2", &DiffScope::All).unwrap(); + agent.invoke(Path::new("/tmp"), "prompt 3", &DiffScope::All).unwrap(); let prompts = agent.recorded_prompts(); assert_eq!(prompts.len(), 3); @@ -134,7 +134,7 @@ fn mock_records_prompts() { fn mock_always_ok_repeats_indefinitely() { let agent = MockAgent::always_ok("same"); for _ in 0..100 { - let resp = agent.invoke(Path::new("/tmp"), "x").unwrap(); + let resp = agent.invoke(Path::new("/tmp"), "x", &DiffScope::All).unwrap(); assert_eq!(resp.text, "same"); } } @@ -143,7 +143,7 @@ fn mock_always_ok_repeats_indefinitely() { fn mock_always_err_repeats_indefinitely() { let agent = MockAgent::always_err("fail"); for _ in 0..100 { - let err = agent.invoke(Path::new("/tmp"), "x").unwrap_err(); + let err = agent.invoke(Path::new("/tmp"), "x", &DiffScope::All).unwrap_err(); assert_eq!(err.message, "fail"); } } @@ -162,15 +162,15 @@ fn mock_from_responses_returns_in_order() { Err(AgentError::new("third fails")), ]); - let r1 = agent.invoke(Path::new("/tmp"), "a").unwrap(); + let r1 = agent.invoke(Path::new("/tmp"), "a", &DiffScope::All).unwrap(); assert_eq!(r1.text, "first"); assert!(r1.diff.is_none()); - let r2 = agent.invoke(Path::new("/tmp"), "b").unwrap(); + let r2 = agent.invoke(Path::new("/tmp"), "b", &DiffScope::All).unwrap(); assert_eq!(r2.text, "second"); assert_eq!(r2.diff.as_deref(), Some("diff")); - let r3 = agent.invoke(Path::new("/tmp"), "c").unwrap_err(); + let r3 = agent.invoke(Path::new("/tmp"), "c", &DiffScope::All).unwrap_err(); assert_eq!(r3.message, "third fails"); } @@ -187,11 +187,11 @@ fn mock_from_responses_last_entry_repeats() { }), ]); - agent.invoke(Path::new("/tmp"), "a").unwrap(); - let r2 = agent.invoke(Path::new("/tmp"), "b").unwrap(); + agent.invoke(Path::new("/tmp"), "a", &DiffScope::All).unwrap(); + let r2 = agent.invoke(Path::new("/tmp"), "b", &DiffScope::All).unwrap(); assert_eq!(r2.text, "last"); // After exhausting queue, last response repeats - let r3 = agent.invoke(Path::new("/tmp"), "c").unwrap(); + let r3 = agent.invoke(Path::new("/tmp"), "c", &DiffScope::All).unwrap(); assert_eq!(r3.text, "last"); } @@ -202,7 +202,7 @@ fn mock_with_diff() { diff: Some("--- a/foo\n+++ b/foo\n@@ ...\n-old\n+new".into()), })]); - let resp = agent.invoke(Path::new("/tmp"), "optimize").unwrap(); + let resp = agent.invoke(Path::new("/tmp"), "optimize", &DiffScope::All).unwrap(); assert!(resp.diff.is_some()); assert!(resp.diff.unwrap().contains("+new")); } @@ -486,7 +486,7 @@ fn redteam_mixed_agent_responses() { fn agent_harness_is_object_safe() { // Verify we can use AgentHarness as a trait object let agent: Box = Box::new(MockAgent::always_ok("hi")); - let resp = agent.invoke(Path::new("/tmp"), "hello").unwrap(); + let resp = agent.invoke(Path::new("/tmp"), "hello", &DiffScope::All).unwrap(); assert_eq!(resp.text, "hi"); } @@ -494,7 +494,7 @@ fn agent_harness_is_object_safe() { fn agent_harness_works_with_arc() { use std::sync::Arc; let agent: Arc = Arc::new(MockAgent::always_ok("shared")); - let resp = agent.invoke(Path::new("/tmp"), "test").unwrap(); + let resp = agent.invoke(Path::new("/tmp"), "test", &DiffScope::All).unwrap(); assert_eq!(resp.text, "shared"); } @@ -505,9 +505,14 @@ struct FirstSuccessHarness { } impl AgentHarness for FirstSuccessHarness { - fn invoke(&self, repo_dir: &Path, prompt: &str) -> Result { + fn invoke( + &self, + repo_dir: &Path, + prompt: &str, + diff_scope: &DiffScope, + ) -> Result { for agent in &self.agents { - if let Ok(resp) = agent.invoke(repo_dir, prompt) { + if let Ok(resp) = agent.invoke(repo_dir, prompt, diff_scope) { return Ok(resp); } } @@ -525,7 +530,7 @@ fn custom_multi_agent_harness() { ], }; - let resp = harness.invoke(Path::new("/tmp"), "test").unwrap(); + let resp = harness.invoke(Path::new("/tmp"), "test", &DiffScope::All).unwrap(); assert_eq!(resp.text, "agent 3 succeeded"); } @@ -538,7 +543,7 @@ fn custom_multi_agent_all_fail() { ], }; - let err = harness.invoke(Path::new("/tmp"), "test").unwrap_err(); + let err = harness.invoke(Path::new("/tmp"), "test", &DiffScope::All).unwrap_err(); assert_eq!(err.message, "All agents failed"); } From 0959fca9ffefebffd2bd9da0e1ceef531889367e Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Fri, 3 Apr 2026 16:06:21 -0400 Subject: [PATCH 42/86] feat(jolt-eval): add ProverTimeObjective with configurable guest programs Add PerfObjective for end-to-end prover time, parameterized by a GuestConfig trait (Fibonacci, Muldiv, Sha2). Setup compiles the guest, traces to determine trace length, and preprocesses. Each Criterion iteration calls prove. Extend bench_objective! macro with a config form that accepts Criterion BenchmarkGroup method calls inline: bench_objective!(expr, config: sample_size(10), sampling_mode(Flat)) The prover_time_fibonacci benchmark uses flat sampling with 10 samples and 30s measurement time to keep total benchmark duration reasonable. Simplify sync_targets.sh: bench files are hand-authored (they carry domain-specific config), so the script only syncs Cargo.toml [[bench]] entries from whatever .rs files exist in benches/. Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/Cargo.toml | 4 + jolt-eval/benches/prover_time_fibonacci.rs | 9 + jolt-eval/src/objective/mod.rs | 1 + jolt-eval/src/objective/prover_time.rs | 187 +++++++++++++++++++++ jolt-eval/src/objective/synthesis.rs | 34 +++- jolt-eval/sync_targets.sh | 51 +----- 6 files changed, 242 insertions(+), 44 deletions(-) create mode 100644 jolt-eval/benches/prover_time_fibonacci.rs create mode 100644 jolt-eval/src/objective/prover_time.rs diff --git a/jolt-eval/Cargo.toml b/jolt-eval/Cargo.toml index d2495b1b2..df818d098 100644 --- a/jolt-eval/Cargo.toml +++ b/jolt-eval/Cargo.toml @@ -38,6 +38,10 @@ harness = false name = "bind_parallel_low_to_high" harness = false +[[bench]] +name = "prover_time_fibonacci" +harness = false + [[bin]] name = "measure-objectives" path = "bin/measure_objectives.rs" diff --git a/jolt-eval/benches/prover_time_fibonacci.rs b/jolt-eval/benches/prover_time_fibonacci.rs new file mode 100644 index 000000000..ae5de8059 --- /dev/null +++ b/jolt-eval/benches/prover_time_fibonacci.rs @@ -0,0 +1,9 @@ +use jolt_eval::objective::prover_time::{Fibonacci, ProverTimeObjective}; + +jolt_eval::bench_objective!( + ProverTimeObjective::new(Fibonacci(100)), + config: + sample_size(10), + sampling_mode(::criterion::SamplingMode::Flat), + measurement_time(std::time::Duration::from_secs(30)), +); diff --git a/jolt-eval/src/objective/mod.rs b/jolt-eval/src/objective/mod.rs index 5309b5464..0f24fe8a8 100644 --- a/jolt-eval/src/objective/mod.rs +++ b/jolt-eval/src/objective/mod.rs @@ -3,6 +3,7 @@ pub mod cognitive; pub mod halstead_bugs; pub mod lloc; pub mod optimize; +pub mod prover_time; pub mod synthesis; use std::fmt; diff --git a/jolt-eval/src/objective/prover_time.rs b/jolt-eval/src/objective/prover_time.rs new file mode 100644 index 000000000..18a6c1747 --- /dev/null +++ b/jolt-eval/src/objective/prover_time.rs @@ -0,0 +1,187 @@ +use common::constants::{DEFAULT_MAX_TRUSTED_ADVICE_SIZE, DEFAULT_MAX_UNTRUSTED_ADVICE_SIZE}; +use common::jolt_device::MemoryConfig; +use jolt_core::host::Program; + +use crate::guests; + +use super::PerfObjective; + +/// Trait for configuring which guest program to benchmark. +pub trait GuestConfig: Default + Send + Sync { + /// Cargo package name (e.g. "fibonacci-guest"). + fn package(&self) -> &str; + + fn memory_config(&self) -> MemoryConfig { + // Default memory config + MemoryConfig { + max_input_size: 4096, + max_output_size: 4096, + max_untrusted_advice_size: DEFAULT_MAX_UNTRUSTED_ADVICE_SIZE, + max_trusted_advice_size: DEFAULT_MAX_TRUSTED_ADVICE_SIZE, + stack_size: 4096, + heap_size: 32768, + program_size: None, + } + } + + /// Serialized program input (postcard-encoded). + fn input(&self) -> Vec; + + /// Display name for the benchmark. + fn bench_name(&self) -> String; +} + +/// Per-iteration state: everything needed to call `prove`. +pub struct ProverTimeSetup { + pub program: guests::GuestProgram, + pub prover_pp: guests::ProverPreprocessing, + pub input: Vec, +} + +/// Benchmarks end-to-end prover time for a guest program. +/// +/// Setup compiles the guest, traces to determine trace length, +/// and preprocesses. Each iteration calls `prove`. +#[derive(Default)] +pub struct ProverTimeObjective { + guest: G, +} + +impl ProverTimeObjective { + pub fn new(guest: G) -> Self { + Self { guest } + } +} + +impl PerfObjective for ProverTimeObjective { + type Setup = ProverTimeSetup; + + fn name(&self) -> &str { + // Leak a string so we can return &str from a computed name. + // This is fine — there are only a handful of objectives. + let name = self.guest.bench_name(); + Box::leak(name.into_boxed_str()) + } + + fn setup(&self) -> ProverTimeSetup { + let mut mc = self.guest.memory_config(); + let input = self.guest.input(); + + // Compile + let target_dir = "/tmp/jolt-eval-bench-targets"; + let mut host_program = Program::new(self.guest.package()); + host_program.set_memory_config(mc); + host_program.build(target_dir); + let elf_bytes = host_program + .get_elf_contents() + .expect("guest ELF not found after build"); + + // Decode to get program_size, trace to get trace length + let (_bytecode, _memory_init, program_size, _e_entry) = + jolt_core::guest::program::decode(&elf_bytes); + mc.program_size = Some(program_size); + + let program = guests::GuestProgram::new(&elf_bytes, &mc); + let (_lazy_trace, trace, _memory, _io) = program.trace(&input, &[], &[]); + let max_trace_length = (trace.len() + 1).next_power_of_two(); + drop(trace); + + let prover_pp = guests::prover_preprocessing(&program, max_trace_length); + + ProverTimeSetup { + program, + prover_pp, + input, + } + } + + fn run(&self, setup: ProverTimeSetup) { + let (_proof, _io) = guests::prove(&setup.program, &setup.prover_pp, &setup.input); + std::hint::black_box(()); + } +} + +// ── Guest configurations ──────────────────────────────────────────── + +/// Fibonacci guest: computes fib(n). +pub struct Fibonacci(pub u32); + +impl Default for Fibonacci { + fn default() -> Self { + Self(100) + } +} + +impl GuestConfig for Fibonacci { + fn package(&self) -> &str { + "fibonacci-guest" + } + fn input(&self) -> Vec { + postcard::to_stdvec(&self.0).unwrap() + } + fn bench_name(&self) -> String { + format!("prover_time_fibonacci_{}", self.0) + } +} + +/// Muldiv guest: computes a * b / c. +pub struct Muldiv(pub u32, pub u32, pub u32); + +impl Default for Muldiv { + fn default() -> Self { + Self(12031293, 17, 92) + } +} + +impl GuestConfig for Muldiv { + fn package(&self) -> &str { + "muldiv-guest" + } + fn input(&self) -> Vec { + postcard::to_stdvec(&(self.0, self.1, self.2)).unwrap() + } + fn bench_name(&self) -> String { + "prover_time_muldiv".to_string() + } +} + +/// SHA-2 guest: computes sha256 of input bytes. +pub struct Sha2(pub Vec); + +impl Default for Sha2 { + fn default() -> Self { + Self(vec![5u8; 32]) + } +} + +impl GuestConfig for Sha2 { + fn package(&self) -> &str { + "sha2-guest" + } + fn input(&self) -> Vec { + postcard::to_stdvec(&self.0).unwrap() + } + fn bench_name(&self) -> String { + "prover_time_sha2".to_string() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn fibonacci_config_serializes() { + let g = Fibonacci(100); + assert_eq!(g.package(), "fibonacci-guest"); + assert!(!g.input().is_empty()); + assert_eq!(g.bench_name(), "prover_time_fibonacci_100"); + } + + #[test] + fn muldiv_config_serializes() { + let g = Muldiv::default(); + assert_eq!(g.package(), "muldiv-guest"); + assert!(!g.input().is_empty()); + } +} diff --git a/jolt-eval/src/objective/synthesis.rs b/jolt-eval/src/objective/synthesis.rs index 34a78f66c..2b0680ead 100644 --- a/jolt-eval/src/objective/synthesis.rs +++ b/jolt-eval/src/objective/synthesis.rs @@ -6,12 +6,42 @@ /// # Usage /// /// ```ignore -/// // benches/bind_parallel_low_to_high.rs -/// use jolt_eval::objective::bind_bench::BindLowToHighObjective; +/// // Fast benchmark (default Criterion settings, type must impl Default): /// jolt_eval::bench_objective!(BindLowToHighObjective); +/// +/// // Slow benchmark with custom Criterion config: +/// jolt_eval::bench_objective!( +/// ProverTimeObjective::new(Fibonacci(100)), +/// config: sample_size(10), sampling_mode(Flat), measurement_time(30s) +/// ); /// ``` #[macro_export] macro_rules! bench_objective { + // Expression form with config methods + ($obj_expr:expr, config: $($method:ident($($arg:expr),*)),* $(,)?) => { + use $crate::PerfObjective as _; + + fn __bench(c: &mut ::criterion::Criterion) { + let obj = $obj_expr; + let mut group = c.benchmark_group(obj.name()); + $( + group.$method($($arg),*); + )* + group.bench_function("prove", |b| { + b.iter_batched( + || obj.setup(), + |setup| obj.run(setup), + ::criterion::BatchSize::LargeInput, + ); + }); + group.finish(); + } + + ::criterion::criterion_group!(benches, __bench); + ::criterion::criterion_main!(benches); + }; + + // Simple form: just a type (uses Default + default Criterion config) ($obj_ty:ty) => { use $crate::PerfObjective as _; diff --git a/jolt-eval/sync_targets.sh b/jolt-eval/sync_targets.sh index 79ed96d1d..fc2d1ba42 100755 --- a/jolt-eval/sync_targets.sh +++ b/jolt-eval/sync_targets.sh @@ -93,53 +93,20 @@ EOF mv "$FUZZ_DIR/Cargo.toml.tmp" "$FUZZ_DIR/Cargo.toml" # ── Criterion benchmarks ───────────────────────────────────────────── +# +# Bench files are hand-authored (they carry domain-specific config). +# This script only syncs Cargo.toml [[bench]] entries from whatever +# .rs files exist in benches/. -echo "=== Syncing Criterion benchmarks ===" +echo "=== Syncing Criterion bench entries ===" mkdir -p "$BENCH_DIR" -# Find (bench_name, module_path, struct_name) for each PerfObjective -bench_entries="" -for file in "$EVAL_DIR"/src/objective/*.rs; do - [ -f "$file" ] || continue - basename_rs=$(basename "$file" .rs) - [ "$basename_rs" = "mod" ] && continue - - { grep -n 'impl PerfObjective for' "$file" 2>/dev/null || true; } | while IFS=: read -r _ rest; do - struct=$(echo "$rest" | grep -o 'for [A-Za-z_]*' | awk '{print $2}') - [ -z "$struct" ] && continue - # Try to find the NAME const - bench_name=$(grep -A5 "impl $struct" "$file" \ - | grep 'const NAME' | head -1 \ - | grep -o '"[^"]*"' | tr -d '"') || true - [ -z "$bench_name" ] && bench_name=$(to_snake "$struct" | sed 's/_objective$//') - echo "$bench_name objective::${basename_rs}::${struct}" - done -done | sort -u > /tmp/jolt_bench_entries - -# Generate missing bench files -while read -r name mod_struct; do - [ -z "$name" ] && continue - struct="${mod_struct##*::}" - bench_file="$BENCH_DIR/${name}.rs" - if [ ! -f "$bench_file" ]; then - echo " Creating benchmark: $name" - cat > "$bench_file" </dev/null; then - echo " Removing stale benchmark: $base" - rm "$f" - fi -done + basename "$f" .rs +done | sort -u > /tmp/jolt_bench_entries # Update Cargo.toml [[bench]] entries CARGO_TOML="$EVAL_DIR/Cargo.toml" @@ -156,7 +123,7 @@ awk ' # Insert new [[bench]] entries before the first [[bin]] { sed '/^\[\[bin\]\]/,$d' "$tmpfile" - while read -r name _; do + while read -r name; do [ -z "$name" ] && continue cat < Date: Fri, 3 Apr 2026 16:18:20 -0400 Subject: [PATCH 43/86] refactor(jolt-eval): organize objectives into code_quality/ and performance/ Move lloc, cognitive, halstead_bugs into objective/code_quality/. Move bind_bench, prover_time into objective/performance/. Refactor GuestConfig trait and guest configs (Fibonacci, Sha2Chain, Secp256k1EcdsaVerify) into guests/mod.rs. Remove Muldiv and Sha2 guests, replacing them with sha2-chain (iterated hashing) and secp256k1-ecdsa-verify (signature verification) which exercise more realistic code paths. Co-Authored-By: Claude Opus 4.6 (1M context) --- .omc/project-memory.json | 581 ++++++++++++++++++ .../89b3a12b-e9ae-47c5-966d-f2fe2782fb2c.json | 8 + ...89b3a12b-e9ae-47c5-966d-f2fe2782fb2c.jsonl | 7 + .omc/state/hud-state.json | 6 + .omc/state/hud-stdin-cache.json | 1 + .omc/state/idle-notif-cooldown.json | 3 + .omc/state/last-tool-error.json | 7 + .../autopilot-state.json | 10 + .../benches/bind_parallel_high_to_low.rs | 2 +- .../benches/bind_parallel_low_to_high.rs | 2 +- jolt-eval/benches/prover_time_fibonacci.rs | 3 +- jolt-eval/src/guests/mod.rs | 147 ++++- jolt-eval/src/lib.rs | 2 +- .../objective/{ => code_quality}/cognitive.rs | 2 +- .../{ => code_quality}/halstead_bugs.rs | 2 +- .../src/objective/{ => code_quality}/lloc.rs | 2 +- jolt-eval/src/objective/code_quality/mod.rs | 3 + jolt-eval/src/objective/mod.rs | 25 +- .../objective/{ => performance}/bind_bench.rs | 2 +- jolt-eval/src/objective/performance/mod.rs | 2 + .../src/objective/performance/prover_time.rs | 89 +++ jolt-eval/src/objective/prover_time.rs | 187 ------ 22 files changed, 884 insertions(+), 209 deletions(-) create mode 100644 .omc/project-memory.json create mode 100644 .omc/sessions/89b3a12b-e9ae-47c5-966d-f2fe2782fb2c.json create mode 100644 .omc/state/agent-replay-89b3a12b-e9ae-47c5-966d-f2fe2782fb2c.jsonl create mode 100644 .omc/state/hud-state.json create mode 100644 .omc/state/hud-stdin-cache.json create mode 100644 .omc/state/idle-notif-cooldown.json create mode 100644 .omc/state/last-tool-error.json create mode 100644 .omc/state/sessions/89b3a12b-e9ae-47c5-966d-f2fe2782fb2c/autopilot-state.json rename jolt-eval/src/objective/{ => code_quality}/cognitive.rs (97%) rename jolt-eval/src/objective/{ => code_quality}/halstead_bugs.rs (96%) rename jolt-eval/src/objective/{ => code_quality}/lloc.rs (97%) create mode 100644 jolt-eval/src/objective/code_quality/mod.rs rename jolt-eval/src/objective/{ => performance}/bind_bench.rs (98%) create mode 100644 jolt-eval/src/objective/performance/mod.rs create mode 100644 jolt-eval/src/objective/performance/prover_time.rs delete mode 100644 jolt-eval/src/objective/prover_time.rs diff --git a/.omc/project-memory.json b/.omc/project-memory.json new file mode 100644 index 000000000..e95d394e0 --- /dev/null +++ b/.omc/project-memory.json @@ -0,0 +1,581 @@ +{ + "version": "1.0.0", + "lastScanned": 1775233644719, + "projectRoot": "/Users/mzhu/code/jolt", + "techStack": { + "languages": [ + { + "name": "Rust", + "version": null, + "confidence": "high", + "markers": [ + "Cargo.toml" + ] + }, + { + "name": "C/C++", + "version": null, + "confidence": "high", + "markers": [ + "Makefile" + ] + } + ], + "frameworks": [], + "packageManager": "cargo", + "runtime": null + }, + "build": { + "buildCommand": "cargo build", + "testCommand": "cargo test", + "lintCommand": "cargo clippy", + "devCommand": "cargo run", + "scripts": {} + }, + "conventions": { + "namingStyle": null, + "importStyle": null, + "testPattern": null, + "fileOrganization": null + }, + "structure": { + "isMonorepo": false, + "workspaces": [], + "mainDirectories": [ + "examples", + "scripts", + "src", + "tests" + ], + "gitBranches": { + "defaultBranch": "master", + "branchingStrategy": null + } + }, + "customNotes": [], + "directoryMap": { + "agent-skills": { + "path": "agent-skills", + "purpose": null, + "fileCount": 0, + "lastAccessed": 1775233644693, + "keyFiles": [] + }, + "baseline flamegraphs": { + "path": "baseline flamegraphs", + "purpose": null, + "fileCount": 12, + "lastAccessed": 1775233644694, + "keyFiles": [ + "stage2_end_flamechart.svg", + "stage2_start_flamechart.svg", + "stage3_end_flamechart.svg", + "stage3_start_flamechart.svg", + "stage4_end_flamechart.svg" + ] + }, + "benchmark-runs": { + "path": "benchmark-runs", + "purpose": null, + "fileCount": 1, + "lastAccessed": 1775233644694, + "keyFiles": [ + "peak_memory_plot.html" + ] + }, + "book": { + "path": "book", + "purpose": null, + "fileCount": 4, + "lastAccessed": 1775233644694, + "keyFiles": [ + "README.md", + "book.toml" + ] + }, + "common": { + "path": "common", + "purpose": null, + "fileCount": 1, + "lastAccessed": 1775233644695, + "keyFiles": [ + "Cargo.toml" + ] + }, + "docker": { + "path": "docker", + "purpose": null, + "fileCount": 3, + "lastAccessed": 1775233644695, + "keyFiles": [ + "Dockerfile.spike", + "README.md", + "build.sh" + ] + }, + "examples": { + "path": "examples", + "purpose": "Example code", + "fileCount": 2, + "lastAccessed": 1775233644695, + "keyFiles": [ + "run_ci_benchmarks.sh" + ] + }, + "imgs": { + "path": "imgs", + "purpose": null, + "fileCount": 1, + "lastAccessed": 1775233644695, + "keyFiles": [ + "jolt_alpha.png" + ] + }, + "jolt-core": { + "path": "jolt-core", + "purpose": null, + "fileCount": 10, + "lastAccessed": 1775233644695, + "keyFiles": [ + "Cargo.toml", + "dory_srs_10_variables.srs", + "dory_srs_18_variables.srs", + "dory_srs_24_variables.srs", + "dory_srs_26_variables.srs" + ] + }, + "jolt-eval": { + "path": "jolt-eval", + "purpose": null, + "fileCount": 3, + "lastAccessed": 1775233644695, + "keyFiles": [ + "Cargo.toml", + "README.md", + "sync_targets.sh" + ] + }, + "jolt-inlines": { + "path": "jolt-inlines", + "purpose": null, + "fileCount": 0, + "lastAccessed": 1775233644696, + "keyFiles": [] + }, + "jolt-platform": { + "path": "jolt-platform", + "purpose": null, + "fileCount": 1, + "lastAccessed": 1775233644696, + "keyFiles": [ + "Cargo.toml" + ] + }, + "jolt-sdk": { + "path": "jolt-sdk", + "purpose": null, + "fileCount": 2, + "lastAccessed": 1775233644696, + "keyFiles": [ + "Cargo.toml" + ] + }, + "patches": { + "path": "patches", + "purpose": null, + "fileCount": 2, + "lastAccessed": 1775233644696, + "keyFiles": [ + "fix-c-extension-privilege-tests.patch", + "fix-isa-regex.patch" + ] + }, + "prebuilt-files": { + "path": "prebuilt-files", + "purpose": null, + "fileCount": 0, + "lastAccessed": 1775233644696, + "keyFiles": [] + }, + "scripts": { + "path": "scripts", + "purpose": "Build/utility scripts", + "fileCount": 9, + "lastAccessed": 1775233644696, + "keyFiles": [ + "apply-patches", + "benchmark_summary.py", + "bootstrap", + "jolt_benchmarks.sh", + "optimize_machine.sh" + ] + }, + "src": { + "path": "src", + "purpose": "Source code", + "fileCount": 4, + "lastAccessed": 1775233644696, + "keyFiles": [ + "build_wasm.rs", + "lib.rs", + "linker.ld.template", + "main.rs" + ] + }, + "target": { + "path": "target", + "purpose": null, + "fileCount": 3, + "lastAccessed": 1775233644696, + "keyFiles": [ + "CACHEDIR.TAG" + ] + }, + "tests": { + "path": "tests", + "purpose": "Test files", + "fileCount": 0, + "lastAccessed": 1775233644697, + "keyFiles": [] + }, + "third-party": { + "path": "third-party", + "purpose": null, + "fileCount": 0, + "lastAccessed": 1775233644697, + "keyFiles": [] + }, + "tracer": { + "path": "tracer", + "purpose": null, + "fileCount": 3, + "lastAccessed": 1775233644697, + "keyFiles": [ + "Cargo.toml", + "LICENSE", + "README.md" + ] + }, + "z3-verifier": { + "path": "z3-verifier", + "purpose": null, + "fileCount": 2, + "lastAccessed": 1775233644697, + "keyFiles": [ + "Cargo.toml", + "README.md" + ] + }, + "zklean-extractor": { + "path": "zklean-extractor", + "purpose": null, + "fileCount": 4, + "lastAccessed": 1775233644697, + "keyFiles": [ + "Cargo.toml", + "LICENSE", + "README.md", + "build.rs" + ] + }, + "book/src": { + "path": "book/src", + "purpose": "Source code", + "fileCount": 3, + "lastAccessed": 1775233644697, + "keyFiles": [ + "SUMMARY.md", + "intro.md" + ] + }, + "common/src": { + "path": "common/src", + "purpose": "Source code", + "fileCount": 4, + "lastAccessed": 1775233644698, + "keyFiles": [ + "attributes.rs", + "constants.rs", + "jolt_device.rs" + ] + }, + "jolt-eval/bin": { + "path": "jolt-eval/bin", + "purpose": "Executable scripts", + "fileCount": 3, + "lastAccessed": 1775233644698, + "keyFiles": [ + "measure_objectives.rs", + "optimize.rs", + "redteam.rs" + ] + }, + "jolt-eval/src": { + "path": "jolt-eval/src", + "purpose": "Source code", + "fileCount": 1, + "lastAccessed": 1775233644698, + "keyFiles": [ + "lib.rs" + ] + }, + "jolt-eval/tests": { + "path": "jolt-eval/tests", + "purpose": "Test files", + "fileCount": 3, + "lastAccessed": 1775233644698, + "keyFiles": [ + "agent_test.rs", + "integration.rs", + "macro_test.rs" + ] + }, + "jolt-platform/src": { + "path": "jolt-platform/src", + "purpose": "Source code", + "fileCount": 8, + "lastAccessed": 1775233644698, + "keyFiles": [ + "advice.rs", + "cycle_tracking.rs", + "exit.rs" + ] + }, + "jolt-sdk/src": { + "path": "jolt-sdk/src", + "purpose": "Source code", + "fileCount": 2, + "lastAccessed": 1775233644698, + "keyFiles": [ + "host_utils.rs", + "lib.rs" + ] + }, + "jolt-sdk/tests": { + "path": "jolt-sdk/tests", + "purpose": "Test files", + "fileCount": 3, + "lastAccessed": 1775233644699, + "keyFiles": [ + "README.md", + "gen-fixtures.sh", + "verifier_api.rs" + ] + }, + "tracer/src": { + "path": "tracer/src", + "purpose": "Source code", + "fileCount": 2, + "lastAccessed": 1775233644699, + "keyFiles": [ + "lib.rs", + "main.rs" + ] + }, + "z3-verifier/src": { + "path": "z3-verifier/src", + "purpose": "Source code", + "fileCount": 3, + "lastAccessed": 1775233644699, + "keyFiles": [ + "cpu_constraints.rs", + "lib.rs", + "virtual_sequences.rs" + ] + }, + "zklean-extractor/src": { + "path": "zklean-extractor/src", + "purpose": "Source code", + "fileCount": 10, + "lastAccessed": 1775233644699, + "keyFiles": [ + "constants.rs", + "instruction.rs", + "lean_tests.rs" + ] + } + }, + "hotPaths": [ + { + "path": "jolt-eval/src/agent/claude.rs", + "accessCount": 21, + "lastAccessed": 1775240914249, + "type": "file" + }, + { + "path": "jolt-eval/tests/agent_test.rs", + "accessCount": 9, + "lastAccessed": 1775241109679, + "type": "directory" + }, + { + "path": "jolt-eval/src/invariant/soundness.rs", + "accessCount": 7, + "lastAccessed": 1775240409793, + "type": "file" + }, + { + "path": "jolt-eval/src/invariant/synthesis/redteam.rs", + "accessCount": 7, + "lastAccessed": 1775240966753, + "type": "file" + }, + { + "path": "jolt-eval/src/objective/synthesis.rs", + "accessCount": 7, + "lastAccessed": 1775246445448, + "type": "file" + }, + { + "path": "jolt-eval/benches/prover_time_fibonacci.rs", + "accessCount": 7, + "lastAccessed": 1775246451584, + "type": "file" + }, + { + "path": "jolt-eval/src/invariant/mod.rs", + "accessCount": 4, + "lastAccessed": 1775240346261, + "type": "file" + }, + { + "path": "jolt-eval/src/agent/mod.rs", + "accessCount": 4, + "lastAccessed": 1775241053447, + "type": "file" + }, + { + "path": "jolt-eval/src", + "accessCount": 4, + "lastAccessed": 1775244964713, + "type": "directory" + }, + { + "path": "jolt-eval/sync_targets.sh", + "accessCount": 4, + "lastAccessed": 1775246732070, + "type": "file" + }, + { + "path": "jolt-eval/src/agent/mock.rs", + "accessCount": 3, + "lastAccessed": 1775240931907, + "type": "file" + }, + { + "path": "jolt-eval/src/objective/optimize.rs", + "accessCount": 2, + "lastAccessed": 1775240941569, + "type": "file" + }, + { + "path": "jolt-eval/Cargo.toml", + "accessCount": 2, + "lastAccessed": 1775245065190, + "type": "file" + }, + { + "path": "jolt-eval/src/objective/prover_time.rs", + "accessCount": 2, + "lastAccessed": 1775245117960, + "type": "file" + }, + { + "path": "jolt-eval/tests", + "accessCount": 1, + "lastAccessed": 1775240852235, + "type": "directory" + }, + { + "path": "jolt-eval/bin", + "accessCount": 1, + "lastAccessed": 1775240853001, + "type": "directory" + }, + { + "path": "jolt-eval/src/objective/bind_bench.rs", + "accessCount": 1, + "lastAccessed": 1775244945032, + "type": "file" + }, + { + "path": "jolt-eval/src/guests/mod.rs", + "accessCount": 1, + "lastAccessed": 1775244951427, + "type": "file" + }, + { + "path": "examples/fibonacci/guest/src", + "accessCount": 1, + "lastAccessed": 1775244971547, + "type": "directory" + }, + { + "path": "examples/muldiv/guest/src", + "accessCount": 1, + "lastAccessed": 1775244972439, + "type": "directory" + }, + { + "path": "examples/sha2-ex/guest/src", + "accessCount": 1, + "lastAccessed": 1775244973599, + "type": "directory" + }, + { + "path": "jolt-eval/src/objective/mod.rs", + "accessCount": 1, + "lastAccessed": 1775245034647, + "type": "file" + }, + { + "path": "examples/sha2-chain/guest/src", + "accessCount": 1, + "lastAccessed": 1775246992179, + "type": "directory" + }, + { + "path": "examples/secp256k1-ecdsa-verify/guest/src", + "accessCount": 1, + "lastAccessed": 1775246993084, + "type": "directory" + }, + { + "path": "examples/sha2-chain/guest/src/lib.rs", + "accessCount": 1, + "lastAccessed": 1775246993515, + "type": "file" + }, + { + "path": "examples/secp256k1-ecdsa-verify/guest/src/lib.rs", + "accessCount": 1, + "lastAccessed": 1775246994292, + "type": "file" + }, + { + "path": "examples/sha2-chain/guest/Cargo.toml", + "accessCount": 1, + "lastAccessed": 1775247000292, + "type": "file" + }, + { + "path": "examples/secp256k1-ecdsa-verify/guest/Cargo.toml", + "accessCount": 1, + "lastAccessed": 1775247001105, + "type": "file" + }, + { + "path": "examples/secp256k1-ecdsa-verify", + "accessCount": 1, + "lastAccessed": 1775247008932, + "type": "directory" + }, + { + "path": "examples/secp256k1-ecdsa-verify/src/main.rs", + "accessCount": 1, + "lastAccessed": 1775247022060, + "type": "file" + } + ], + "userDirectives": [] +} \ No newline at end of file diff --git a/.omc/sessions/89b3a12b-e9ae-47c5-966d-f2fe2782fb2c.json b/.omc/sessions/89b3a12b-e9ae-47c5-966d-f2fe2782fb2c.json new file mode 100644 index 000000000..93101ce1c --- /dev/null +++ b/.omc/sessions/89b3a12b-e9ae-47c5-966d-f2fe2782fb2c.json @@ -0,0 +1,8 @@ +{ + "session_id": "89b3a12b-e9ae-47c5-966d-f2fe2782fb2c", + "ended_at": "2026-04-03T16:27:08.623Z", + "reason": "prompt_input_exit", + "agents_spawned": 0, + "agents_completed": 0, + "modes_used": [] +} \ No newline at end of file diff --git a/.omc/state/agent-replay-89b3a12b-e9ae-47c5-966d-f2fe2782fb2c.jsonl b/.omc/state/agent-replay-89b3a12b-e9ae-47c5-966d-f2fe2782fb2c.jsonl new file mode 100644 index 000000000..a4a6feee0 --- /dev/null +++ b/.omc/state/agent-replay-89b3a12b-e9ae-47c5-966d-f2fe2782fb2c.jsonl @@ -0,0 +1,7 @@ +{"t":0,"agent":"system","event":"skill_invoked","skill_name":"oh-my-claudecode:omc-setup"} +{"t":0,"agent":"system","event":"skill_invoked","skill_name":"oh-my-claudecode:hud"} +{"t":0,"agent":"system","event":"skill_invoked","skill_name":"oh-my-claudecode:mcp-setup"} +{"t":0,"agent":"system","event":"keyword_detected","keyword":"autopilot"} +{"t":0,"agent":"system","event":"mode_change","mode_from":"none","mode_to":"autopilot"} +{"t":0,"agent":"system","event":"keyword_detected","keyword":"deep-interview"} +{"t":0,"agent":"system","event":"keyword_detected","keyword":"deep-interview"} diff --git a/.omc/state/hud-state.json b/.omc/state/hud-state.json new file mode 100644 index 000000000..cfd5ade6d --- /dev/null +++ b/.omc/state/hud-state.json @@ -0,0 +1,6 @@ +{ + "timestamp": "2026-04-03T16:31:28.279Z", + "backgroundTasks": [], + "sessionStartTimestamp": "2026-04-03T14:48:03.210Z", + "sessionId": "89b3a12b-e9ae-47c5-966d-f2fe2782fb2c" +} \ No newline at end of file diff --git a/.omc/state/hud-stdin-cache.json b/.omc/state/hud-stdin-cache.json new file mode 100644 index 000000000..627f85d27 --- /dev/null +++ b/.omc/state/hud-stdin-cache.json @@ -0,0 +1 @@ +{"session_id":"89b3a12b-e9ae-47c5-966d-f2fe2782fb2c","transcript_path":"/Users/mzhu/.claude/projects/-Users-mzhu-code-jolt/89b3a12b-e9ae-47c5-966d-f2fe2782fb2c.jsonl","cwd":"/Users/mzhu/code/jolt/jolt-eval/src/objective","session_name":"criterion-iter-batched-baseline","model":{"id":"claude-opus-4-6[1m]","display_name":"Opus 4.6 (1M context)"},"workspace":{"current_dir":"/Users/mzhu/code/jolt/jolt-eval/src/objective","project_dir":"/Users/mzhu/code/jolt","added_dirs":[]},"version":"2.1.91","output_style":{"name":"default"},"cost":{"total_cost_usd":357.49602949999957,"total_duration_ms":97181144,"total_api_duration_ms":12085031,"total_lines_added":5334,"total_lines_removed":3109},"context_window":{"total_input_tokens":272581,"total_output_tokens":370026,"context_window_size":1000000,"current_usage":{"input_tokens":3,"output_tokens":1,"cache_creation_input_tokens":310,"cache_read_input_tokens":805136},"used_percentage":81,"remaining_percentage":19},"exceeds_200k_tokens":true} \ No newline at end of file diff --git a/.omc/state/idle-notif-cooldown.json b/.omc/state/idle-notif-cooldown.json new file mode 100644 index 000000000..e58d8c57e --- /dev/null +++ b/.omc/state/idle-notif-cooldown.json @@ -0,0 +1,3 @@ +{ + "lastSentAt": "2026-04-03T20:06:26.707Z" +} \ No newline at end of file diff --git a/.omc/state/last-tool-error.json b/.omc/state/last-tool-error.json new file mode 100644 index 000000000..719f7bc24 --- /dev/null +++ b/.omc/state/last-tool-error.json @@ -0,0 +1,7 @@ +{ + "tool_name": "Bash", + "tool_input_preview": "{\"command\":\"cargo clippy -p jolt-eval --message-format=short -q --all-targets -- -D warnings 2>&1\",\"timeout\":120000,\"description\":\"Clippy check\"}", + "error": "Exit code 101\njolt-eval/benches/prover_time_fibonacci.rs:5:14: error[E0282]: type annotations needed\nerror: could not compile `jolt-eval` (bench \"prover_time_fibonacci\") due to 1 previous error", + "timestamp": "2026-04-03T19:41:01.456Z", + "retry_count": 5 +} \ No newline at end of file diff --git a/.omc/state/sessions/89b3a12b-e9ae-47c5-966d-f2fe2782fb2c/autopilot-state.json b/.omc/state/sessions/89b3a12b-e9ae-47c5-966d-f2fe2782fb2c/autopilot-state.json new file mode 100644 index 000000000..df473fc42 --- /dev/null +++ b/.omc/state/sessions/89b3a12b-e9ae-47c5-966d-f2fe2782fb2c/autopilot-state.json @@ -0,0 +1,10 @@ +{ + "active": true, + "started_at": "2026-04-03T18:18:10.711Z", + "original_prompt": "autopilot: Update the @jolt-eval/src/invariant/soundness.rs description to reflect that the patch is now handled by the harness. Also, injecting the diff into the JSON is not the cleanest solution, try to come up with something better.", + "session_id": "89b3a12b-e9ae-47c5-966d-f2fe2782fb2c", + "project_path": "/Users/mzhu/code/jolt", + "reinforcement_count": 0, + "awaiting_confirmation": true, + "last_checked_at": "2026-04-03T18:18:10.711Z" +} \ No newline at end of file diff --git a/jolt-eval/benches/bind_parallel_high_to_low.rs b/jolt-eval/benches/bind_parallel_high_to_low.rs index d3f0bb55d..ffb2343e8 100644 --- a/jolt-eval/benches/bind_parallel_high_to_low.rs +++ b/jolt-eval/benches/bind_parallel_high_to_low.rs @@ -1,2 +1,2 @@ -use jolt_eval::objective::bind_bench::BindHighToLowObjective; +use jolt_eval::objective::performance::bind_bench::BindHighToLowObjective; jolt_eval::bench_objective!(BindHighToLowObjective); diff --git a/jolt-eval/benches/bind_parallel_low_to_high.rs b/jolt-eval/benches/bind_parallel_low_to_high.rs index 5c640d6d2..3b621673e 100644 --- a/jolt-eval/benches/bind_parallel_low_to_high.rs +++ b/jolt-eval/benches/bind_parallel_low_to_high.rs @@ -1,2 +1,2 @@ -use jolt_eval::objective::bind_bench::BindLowToHighObjective; +use jolt_eval::objective::performance::bind_bench::BindLowToHighObjective; jolt_eval::bench_objective!(BindLowToHighObjective); diff --git a/jolt-eval/benches/prover_time_fibonacci.rs b/jolt-eval/benches/prover_time_fibonacci.rs index ae5de8059..bbc999bcd 100644 --- a/jolt-eval/benches/prover_time_fibonacci.rs +++ b/jolt-eval/benches/prover_time_fibonacci.rs @@ -1,4 +1,5 @@ -use jolt_eval::objective::prover_time::{Fibonacci, ProverTimeObjective}; +use jolt_eval::guests::Fibonacci; +use jolt_eval::objective::performance::prover_time::ProverTimeObjective; jolt_eval::bench_objective!( ProverTimeObjective::new(Fibonacci(100)), diff --git a/jolt-eval/src/guests/mod.rs b/jolt-eval/src/guests/mod.rs index 336e0e188..9437d3b3d 100644 --- a/jolt-eval/src/guests/mod.rs +++ b/jolt-eval/src/guests/mod.rs @@ -3,6 +3,9 @@ use jolt_core::curve::Bn254Curve; use jolt_core::poly::commitment::dory::DoryCommitmentScheme; use jolt_core::transcripts::Blake2bTranscript; +use common::constants::{DEFAULT_MAX_TRUSTED_ADVICE_SIZE, DEFAULT_MAX_UNTRUSTED_ADVICE_SIZE}; +use common::jolt_device::MemoryConfig; + pub use jolt_core::guest::program::Program as GuestProgram; pub use jolt_core::utils::errors::ProofVerifyError; pub use tracer::JoltDevice; @@ -69,7 +72,6 @@ pub fn verify_with_claims( claimed_outputs: &[u8], claimed_panic: bool, ) -> Result<(), ProofVerifyError> { - use common::jolt_device::MemoryConfig; use jolt_core::zkvm::verifier::JoltVerifier; let memory_layout = &verifier_pp.shared.memory_layout; @@ -90,3 +92,146 @@ pub fn verify_with_claims( let verifier = JoltVerifier::::new(verifier_pp, proof, io_device, None, None)?; verifier.verify() } + +// ── GuestConfig ───────────────────────────────────────────────────── + +/// Trait for configuring which guest program to benchmark. +pub trait GuestConfig: Default + Send + Sync { + /// Cargo package name (e.g. "fibonacci-guest"). + fn package(&self) -> &str; + + fn memory_config(&self) -> MemoryConfig { + MemoryConfig { + max_input_size: 4096, + max_output_size: 4096, + max_untrusted_advice_size: DEFAULT_MAX_UNTRUSTED_ADVICE_SIZE, + max_trusted_advice_size: DEFAULT_MAX_TRUSTED_ADVICE_SIZE, + stack_size: 4096, + heap_size: 32768, + program_size: None, + } + } + + /// Serialized program input (postcard-encoded). + fn input(&self) -> Vec; + + /// Display name for the benchmark. + fn bench_name(&self) -> String; +} + +// ── Concrete guest configurations ─────────────────────────────────── + +/// Fibonacci guest: computes fib(n). +pub struct Fibonacci(pub u32); + +impl Default for Fibonacci { + fn default() -> Self { + Self(100) + } +} + +impl GuestConfig for Fibonacci { + fn package(&self) -> &str { + "fibonacci-guest" + } + fn input(&self) -> Vec { + postcard::to_stdvec(&self.0).unwrap() + } + fn bench_name(&self) -> String { + format!("prover_time_fibonacci_{}", self.0) + } +} + +/// SHA-2 chain guest: iteratively hashes input `num_iters` times. +pub struct Sha2Chain { + pub input: [u8; 32], + pub num_iters: u32, +} + +impl Default for Sha2Chain { + fn default() -> Self { + Self { + input: [5u8; 32], + num_iters: 100, + } + } +} + +impl GuestConfig for Sha2Chain { + fn package(&self) -> &str { + "sha2-chain-guest" + } + fn input(&self) -> Vec { + postcard::to_stdvec(&(self.input, self.num_iters)).unwrap() + } + fn bench_name(&self) -> String { + format!("prover_time_sha2_chain_{}", self.num_iters) + } +} + +/// Secp256k1 ECDSA signature verification guest. +pub struct Secp256k1EcdsaVerify { + pub z: [u64; 4], + pub r: [u64; 4], + pub s: [u64; 4], + pub q: [u64; 8], +} + +impl Default for Secp256k1EcdsaVerify { + fn default() -> Self { + // Test vector from examples/secp256k1-ecdsa-verify: "hello world" + Self { + z: [ + 0x9088f7ace2efcde9, + 0xc484efe37a5380ee, + 0xa52e52d7da7dabfa, + 0xb94d27b9934d3e08, + ], + r: [ + 0xb8fc413b4b967ed8, + 0x248d4b0b2829ab00, + 0x587f69296af3cd88, + 0x3a5d6a386e6cf7c0, + ], + s: [ + 0x66a82f274e3dcafc, + 0x299a02486be40321, + 0x6212d714118f617e, + 0x9d452f63cf91018d, + ], + q: [ + 0x0012563f32ed0216, + 0xee00716af6a73670, + 0x91fc70e34e00e6c8, + 0xeeb6be8b9e68868b, + 0x4780de3d5fda972d, + 0xcb1b42d72491e47f, + 0xdc7f31262e4ba2b7, + 0xdc7b004d3bb2800d, + ], + } + } +} + +impl GuestConfig for Secp256k1EcdsaVerify { + fn package(&self) -> &str { + "secp256k1-ecdsa-verify-guest" + } + fn memory_config(&self) -> MemoryConfig { + MemoryConfig { + max_input_size: 4096, + max_output_size: 4096, + max_untrusted_advice_size: DEFAULT_MAX_UNTRUSTED_ADVICE_SIZE, + max_trusted_advice_size: DEFAULT_MAX_TRUSTED_ADVICE_SIZE, + stack_size: 4096, + heap_size: 100000, + program_size: None, + } + } + fn input(&self) -> Vec { + postcard::to_stdvec(&(self.z, self.r, self.s, self.q)).unwrap() + } + fn bench_name(&self) -> String { + "prover_time_secp256k1_ecdsa_verify".to_string() + } +} diff --git a/jolt-eval/src/lib.rs b/jolt-eval/src/lib.rs index 80c3ef051..f3841d008 100644 --- a/jolt-eval/src/lib.rs +++ b/jolt-eval/src/lib.rs @@ -8,7 +8,7 @@ pub mod guests; pub mod invariant; pub mod objective; -pub use guests::{GuestProgram, JoltDevice, ProofVerifyError}; +pub use guests::{GuestConfig, GuestProgram, JoltDevice, ProofVerifyError}; pub use invariant::{ CheckError, Invariant, InvariantTargets, InvariantViolation, JoltInvariants, SynthesisTarget, }; diff --git a/jolt-eval/src/objective/cognitive.rs b/jolt-eval/src/objective/code_quality/cognitive.rs similarity index 97% rename from jolt-eval/src/objective/cognitive.rs rename to jolt-eval/src/objective/code_quality/cognitive.rs index 8d07f5ce8..8c4680798 100644 --- a/jolt-eval/src/objective/cognitive.rs +++ b/jolt-eval/src/objective/code_quality/cognitive.rs @@ -3,7 +3,7 @@ use std::path::{Path, PathBuf}; use rust_code_analysis::FuncSpace; use super::lloc::{analyze_rust_file, rust_files}; -use super::{AbstractObjective, Direction, MeasurementError}; +use crate::objective::{AbstractObjective, Direction, MeasurementError}; /// Average cognitive complexity per function across all Rust files under /// `jolt-core/src/`. diff --git a/jolt-eval/src/objective/halstead_bugs.rs b/jolt-eval/src/objective/code_quality/halstead_bugs.rs similarity index 96% rename from jolt-eval/src/objective/halstead_bugs.rs rename to jolt-eval/src/objective/code_quality/halstead_bugs.rs index 0b3b6ae60..379a4de33 100644 --- a/jolt-eval/src/objective/halstead_bugs.rs +++ b/jolt-eval/src/objective/code_quality/halstead_bugs.rs @@ -3,7 +3,7 @@ use std::path::{Path, PathBuf}; use rust_code_analysis::FuncSpace; use super::lloc::{analyze_rust_file, rust_files}; -use super::{AbstractObjective, Direction, MeasurementError}; +use crate::objective::{AbstractObjective, Direction, MeasurementError}; /// Estimated number of delivered bugs across all Rust files under /// `jolt-core/src/`, based on Halstead's bug prediction formula diff --git a/jolt-eval/src/objective/lloc.rs b/jolt-eval/src/objective/code_quality/lloc.rs similarity index 97% rename from jolt-eval/src/objective/lloc.rs rename to jolt-eval/src/objective/code_quality/lloc.rs index 50f7b786f..ce1d4718f 100644 --- a/jolt-eval/src/objective/lloc.rs +++ b/jolt-eval/src/objective/code_quality/lloc.rs @@ -2,7 +2,7 @@ use std::path::{Path, PathBuf}; use rust_code_analysis::{get_function_spaces, FuncSpace, LANG}; -use super::{AbstractObjective, Direction, MeasurementError}; +use crate::objective::{AbstractObjective, Direction, MeasurementError}; /// Total logical lines of code (LLOC) across all Rust files under /// `jolt-core/src/`. diff --git a/jolt-eval/src/objective/code_quality/mod.rs b/jolt-eval/src/objective/code_quality/mod.rs new file mode 100644 index 000000000..8acfb4929 --- /dev/null +++ b/jolt-eval/src/objective/code_quality/mod.rs @@ -0,0 +1,3 @@ +pub mod cognitive; +pub mod halstead_bugs; +pub mod lloc; diff --git a/jolt-eval/src/objective/mod.rs b/jolt-eval/src/objective/mod.rs index 0f24fe8a8..2a1b1fb06 100644 --- a/jolt-eval/src/objective/mod.rs +++ b/jolt-eval/src/objective/mod.rs @@ -1,9 +1,6 @@ -pub mod bind_bench; -pub mod cognitive; -pub mod halstead_bugs; -pub mod lloc; +pub mod code_quality; pub mod optimize; -pub mod prover_time; +pub mod performance; pub mod synthesis; use std::fmt; @@ -76,17 +73,19 @@ pub trait PerfObjective: Default + Send + Sync { /// Performance objectives are handled separately via Criterion benchmarks /// (see `PerfObjective` and `bench_objective!`). pub enum Objective { - Lloc(lloc::LlocObjective), - CognitiveComplexity(cognitive::CognitiveComplexityObjective), - HalsteadBugs(halstead_bugs::HalsteadBugsObjective), + Lloc(code_quality::lloc::LlocObjective), + CognitiveComplexity(code_quality::cognitive::CognitiveComplexityObjective), + HalsteadBugs(code_quality::halstead_bugs::HalsteadBugsObjective), } impl Objective { pub fn all(root: &Path) -> Vec { vec![ - Self::Lloc(lloc::LlocObjective::new(root)), - Self::CognitiveComplexity(cognitive::CognitiveComplexityObjective::new(root)), - Self::HalsteadBugs(halstead_bugs::HalsteadBugsObjective::new(root)), + Self::Lloc(code_quality::lloc::LlocObjective::new(root)), + Self::CognitiveComplexity(code_quality::cognitive::CognitiveComplexityObjective::new( + root, + )), + Self::HalsteadBugs(code_quality::halstead_bugs::HalsteadBugsObjective::new(root)), ] } @@ -126,8 +125,8 @@ impl Objective { /// Names of all registered `PerfObjective` benchmarks. pub fn perf_objective_names() -> &'static [&'static str] { &[ - bind_bench::BindLowToHighObjective::NAME, - bind_bench::BindHighToLowObjective::NAME, + performance::bind_bench::BindLowToHighObjective::NAME, + performance::bind_bench::BindHighToLowObjective::NAME, ] } diff --git a/jolt-eval/src/objective/bind_bench.rs b/jolt-eval/src/objective/performance/bind_bench.rs similarity index 98% rename from jolt-eval/src/objective/bind_bench.rs rename to jolt-eval/src/objective/performance/bind_bench.rs index d20949c2b..9e9582392 100644 --- a/jolt-eval/src/objective/bind_bench.rs +++ b/jolt-eval/src/objective/performance/bind_bench.rs @@ -3,7 +3,7 @@ use jolt_core::field::JoltField; use jolt_core::poly::dense_mlpoly::DensePolynomial; use jolt_core::poly::multilinear_polynomial::BindingOrder; -use super::PerfObjective; +use crate::objective::PerfObjective; type Challenge = ::Challenge; diff --git a/jolt-eval/src/objective/performance/mod.rs b/jolt-eval/src/objective/performance/mod.rs new file mode 100644 index 000000000..9e5754c0f --- /dev/null +++ b/jolt-eval/src/objective/performance/mod.rs @@ -0,0 +1,2 @@ +pub mod bind_bench; +pub mod prover_time; diff --git a/jolt-eval/src/objective/performance/prover_time.rs b/jolt-eval/src/objective/performance/prover_time.rs new file mode 100644 index 000000000..5de06068e --- /dev/null +++ b/jolt-eval/src/objective/performance/prover_time.rs @@ -0,0 +1,89 @@ +use jolt_core::host::Program; + +use crate::guests::{self, GuestConfig, GuestProgram, ProverPreprocessing}; +use crate::objective::PerfObjective; + +/// Per-iteration state: everything needed to call `prove`. +pub struct ProverTimeSetup { + pub program: GuestProgram, + pub prover_pp: ProverPreprocessing, + pub input: Vec, +} + +/// Benchmarks end-to-end prover time for a guest program. +/// +/// Setup compiles the guest, traces to determine trace length, +/// and preprocesses. Each iteration calls `prove`. +#[derive(Default)] +pub struct ProverTimeObjective { + guest: G, +} + +impl ProverTimeObjective { + pub fn new(guest: G) -> Self { + Self { guest } + } +} + +impl PerfObjective for ProverTimeObjective { + type Setup = ProverTimeSetup; + + fn name(&self) -> &str { + // Leak a string so we can return &str from a computed name. + // This is fine — there are only a handful of objectives. + let name = self.guest.bench_name(); + Box::leak(name.into_boxed_str()) + } + + fn setup(&self) -> ProverTimeSetup { + let mut mc = self.guest.memory_config(); + let input = self.guest.input(); + + // Compile + let target_dir = "/tmp/jolt-eval-bench-targets"; + let mut host_program = Program::new(self.guest.package()); + host_program.set_memory_config(mc); + host_program.build(target_dir); + let elf_bytes = host_program + .get_elf_contents() + .expect("guest ELF not found after build"); + + // Decode to get program_size, trace to get trace length + let (_bytecode, _memory_init, program_size, _e_entry) = + jolt_core::guest::program::decode(&elf_bytes); + mc.program_size = Some(program_size); + + let program = GuestProgram::new(&elf_bytes, &mc); + let (_lazy_trace, trace, _memory, _io) = program.trace(&input, &[], &[]); + let max_trace_length = (trace.len() + 1).next_power_of_two(); + drop(trace); + + let prover_pp = guests::prover_preprocessing(&program, max_trace_length); + + ProverTimeSetup { + program, + prover_pp, + input, + } + } + + fn run(&self, setup: ProverTimeSetup) { + let (_proof, _io) = guests::prove(&setup.program, &setup.prover_pp, &setup.input); + std::hint::black_box(()); + } +} + +#[cfg(test)] +mod tests { + use crate::guests::Fibonacci; + + use super::*; + + #[test] + fn fibonacci_config() { + let g = Fibonacci(100); + assert_eq!(g.package(), "fibonacci-guest"); + assert!(!g.input().is_empty()); + assert_eq!(g.bench_name(), "prover_time_fibonacci_100"); + } +} diff --git a/jolt-eval/src/objective/prover_time.rs b/jolt-eval/src/objective/prover_time.rs deleted file mode 100644 index 18a6c1747..000000000 --- a/jolt-eval/src/objective/prover_time.rs +++ /dev/null @@ -1,187 +0,0 @@ -use common::constants::{DEFAULT_MAX_TRUSTED_ADVICE_SIZE, DEFAULT_MAX_UNTRUSTED_ADVICE_SIZE}; -use common::jolt_device::MemoryConfig; -use jolt_core::host::Program; - -use crate::guests; - -use super::PerfObjective; - -/// Trait for configuring which guest program to benchmark. -pub trait GuestConfig: Default + Send + Sync { - /// Cargo package name (e.g. "fibonacci-guest"). - fn package(&self) -> &str; - - fn memory_config(&self) -> MemoryConfig { - // Default memory config - MemoryConfig { - max_input_size: 4096, - max_output_size: 4096, - max_untrusted_advice_size: DEFAULT_MAX_UNTRUSTED_ADVICE_SIZE, - max_trusted_advice_size: DEFAULT_MAX_TRUSTED_ADVICE_SIZE, - stack_size: 4096, - heap_size: 32768, - program_size: None, - } - } - - /// Serialized program input (postcard-encoded). - fn input(&self) -> Vec; - - /// Display name for the benchmark. - fn bench_name(&self) -> String; -} - -/// Per-iteration state: everything needed to call `prove`. -pub struct ProverTimeSetup { - pub program: guests::GuestProgram, - pub prover_pp: guests::ProverPreprocessing, - pub input: Vec, -} - -/// Benchmarks end-to-end prover time for a guest program. -/// -/// Setup compiles the guest, traces to determine trace length, -/// and preprocesses. Each iteration calls `prove`. -#[derive(Default)] -pub struct ProverTimeObjective { - guest: G, -} - -impl ProverTimeObjective { - pub fn new(guest: G) -> Self { - Self { guest } - } -} - -impl PerfObjective for ProverTimeObjective { - type Setup = ProverTimeSetup; - - fn name(&self) -> &str { - // Leak a string so we can return &str from a computed name. - // This is fine — there are only a handful of objectives. - let name = self.guest.bench_name(); - Box::leak(name.into_boxed_str()) - } - - fn setup(&self) -> ProverTimeSetup { - let mut mc = self.guest.memory_config(); - let input = self.guest.input(); - - // Compile - let target_dir = "/tmp/jolt-eval-bench-targets"; - let mut host_program = Program::new(self.guest.package()); - host_program.set_memory_config(mc); - host_program.build(target_dir); - let elf_bytes = host_program - .get_elf_contents() - .expect("guest ELF not found after build"); - - // Decode to get program_size, trace to get trace length - let (_bytecode, _memory_init, program_size, _e_entry) = - jolt_core::guest::program::decode(&elf_bytes); - mc.program_size = Some(program_size); - - let program = guests::GuestProgram::new(&elf_bytes, &mc); - let (_lazy_trace, trace, _memory, _io) = program.trace(&input, &[], &[]); - let max_trace_length = (trace.len() + 1).next_power_of_two(); - drop(trace); - - let prover_pp = guests::prover_preprocessing(&program, max_trace_length); - - ProverTimeSetup { - program, - prover_pp, - input, - } - } - - fn run(&self, setup: ProverTimeSetup) { - let (_proof, _io) = guests::prove(&setup.program, &setup.prover_pp, &setup.input); - std::hint::black_box(()); - } -} - -// ── Guest configurations ──────────────────────────────────────────── - -/// Fibonacci guest: computes fib(n). -pub struct Fibonacci(pub u32); - -impl Default for Fibonacci { - fn default() -> Self { - Self(100) - } -} - -impl GuestConfig for Fibonacci { - fn package(&self) -> &str { - "fibonacci-guest" - } - fn input(&self) -> Vec { - postcard::to_stdvec(&self.0).unwrap() - } - fn bench_name(&self) -> String { - format!("prover_time_fibonacci_{}", self.0) - } -} - -/// Muldiv guest: computes a * b / c. -pub struct Muldiv(pub u32, pub u32, pub u32); - -impl Default for Muldiv { - fn default() -> Self { - Self(12031293, 17, 92) - } -} - -impl GuestConfig for Muldiv { - fn package(&self) -> &str { - "muldiv-guest" - } - fn input(&self) -> Vec { - postcard::to_stdvec(&(self.0, self.1, self.2)).unwrap() - } - fn bench_name(&self) -> String { - "prover_time_muldiv".to_string() - } -} - -/// SHA-2 guest: computes sha256 of input bytes. -pub struct Sha2(pub Vec); - -impl Default for Sha2 { - fn default() -> Self { - Self(vec![5u8; 32]) - } -} - -impl GuestConfig for Sha2 { - fn package(&self) -> &str { - "sha2-guest" - } - fn input(&self) -> Vec { - postcard::to_stdvec(&self.0).unwrap() - } - fn bench_name(&self) -> String { - "prover_time_sha2".to_string() - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn fibonacci_config_serializes() { - let g = Fibonacci(100); - assert_eq!(g.package(), "fibonacci-guest"); - assert!(!g.input().is_empty()); - assert_eq!(g.bench_name(), "prover_time_fibonacci_100"); - } - - #[test] - fn muldiv_config_serializes() { - let g = Muldiv::default(); - assert_eq!(g.package(), "muldiv-guest"); - assert!(!g.input().is_empty()); - } -} From cd829a63d609586a382adcb0ae87388de813f597 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Fri, 3 Apr 2026 16:19:17 -0400 Subject: [PATCH 44/86] chore: gitignore .omc/ state files --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 9a830862b..9c06fb27f 100644 --- a/.gitignore +++ b/.gitignore @@ -57,3 +57,4 @@ benchmark-runs/ /perfetto_traces/ *.pb *benchmark_results.json +/.omc/ From 374a6a4288fdfd7c471a75115c3f75326da6f522 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Fri, 3 Apr 2026 16:19:34 -0400 Subject: [PATCH 45/86] chore: remove accidentally committed .omc/ state files --- .gitignore | 2 +- .omc/project-memory.json | 581 ------------------ .../89b3a12b-e9ae-47c5-966d-f2fe2782fb2c.json | 8 - ...89b3a12b-e9ae-47c5-966d-f2fe2782fb2c.jsonl | 7 - .omc/state/hud-state.json | 6 - .omc/state/hud-stdin-cache.json | 1 - .omc/state/idle-notif-cooldown.json | 3 - .omc/state/last-tool-error.json | 7 - .../autopilot-state.json | 10 - 9 files changed, 1 insertion(+), 624 deletions(-) delete mode 100644 .omc/project-memory.json delete mode 100644 .omc/sessions/89b3a12b-e9ae-47c5-966d-f2fe2782fb2c.json delete mode 100644 .omc/state/agent-replay-89b3a12b-e9ae-47c5-966d-f2fe2782fb2c.jsonl delete mode 100644 .omc/state/hud-state.json delete mode 100644 .omc/state/hud-stdin-cache.json delete mode 100644 .omc/state/idle-notif-cooldown.json delete mode 100644 .omc/state/last-tool-error.json delete mode 100644 .omc/state/sessions/89b3a12b-e9ae-47c5-966d-f2fe2782fb2c/autopilot-state.json diff --git a/.gitignore b/.gitignore index 9c06fb27f..77ea8883c 100644 --- a/.gitignore +++ b/.gitignore @@ -57,4 +57,4 @@ benchmark-runs/ /perfetto_traces/ *.pb *benchmark_results.json -/.omc/ +.omc/ diff --git a/.omc/project-memory.json b/.omc/project-memory.json deleted file mode 100644 index e95d394e0..000000000 --- a/.omc/project-memory.json +++ /dev/null @@ -1,581 +0,0 @@ -{ - "version": "1.0.0", - "lastScanned": 1775233644719, - "projectRoot": "/Users/mzhu/code/jolt", - "techStack": { - "languages": [ - { - "name": "Rust", - "version": null, - "confidence": "high", - "markers": [ - "Cargo.toml" - ] - }, - { - "name": "C/C++", - "version": null, - "confidence": "high", - "markers": [ - "Makefile" - ] - } - ], - "frameworks": [], - "packageManager": "cargo", - "runtime": null - }, - "build": { - "buildCommand": "cargo build", - "testCommand": "cargo test", - "lintCommand": "cargo clippy", - "devCommand": "cargo run", - "scripts": {} - }, - "conventions": { - "namingStyle": null, - "importStyle": null, - "testPattern": null, - "fileOrganization": null - }, - "structure": { - "isMonorepo": false, - "workspaces": [], - "mainDirectories": [ - "examples", - "scripts", - "src", - "tests" - ], - "gitBranches": { - "defaultBranch": "master", - "branchingStrategy": null - } - }, - "customNotes": [], - "directoryMap": { - "agent-skills": { - "path": "agent-skills", - "purpose": null, - "fileCount": 0, - "lastAccessed": 1775233644693, - "keyFiles": [] - }, - "baseline flamegraphs": { - "path": "baseline flamegraphs", - "purpose": null, - "fileCount": 12, - "lastAccessed": 1775233644694, - "keyFiles": [ - "stage2_end_flamechart.svg", - "stage2_start_flamechart.svg", - "stage3_end_flamechart.svg", - "stage3_start_flamechart.svg", - "stage4_end_flamechart.svg" - ] - }, - "benchmark-runs": { - "path": "benchmark-runs", - "purpose": null, - "fileCount": 1, - "lastAccessed": 1775233644694, - "keyFiles": [ - "peak_memory_plot.html" - ] - }, - "book": { - "path": "book", - "purpose": null, - "fileCount": 4, - "lastAccessed": 1775233644694, - "keyFiles": [ - "README.md", - "book.toml" - ] - }, - "common": { - "path": "common", - "purpose": null, - "fileCount": 1, - "lastAccessed": 1775233644695, - "keyFiles": [ - "Cargo.toml" - ] - }, - "docker": { - "path": "docker", - "purpose": null, - "fileCount": 3, - "lastAccessed": 1775233644695, - "keyFiles": [ - "Dockerfile.spike", - "README.md", - "build.sh" - ] - }, - "examples": { - "path": "examples", - "purpose": "Example code", - "fileCount": 2, - "lastAccessed": 1775233644695, - "keyFiles": [ - "run_ci_benchmarks.sh" - ] - }, - "imgs": { - "path": "imgs", - "purpose": null, - "fileCount": 1, - "lastAccessed": 1775233644695, - "keyFiles": [ - "jolt_alpha.png" - ] - }, - "jolt-core": { - "path": "jolt-core", - "purpose": null, - "fileCount": 10, - "lastAccessed": 1775233644695, - "keyFiles": [ - "Cargo.toml", - "dory_srs_10_variables.srs", - "dory_srs_18_variables.srs", - "dory_srs_24_variables.srs", - "dory_srs_26_variables.srs" - ] - }, - "jolt-eval": { - "path": "jolt-eval", - "purpose": null, - "fileCount": 3, - "lastAccessed": 1775233644695, - "keyFiles": [ - "Cargo.toml", - "README.md", - "sync_targets.sh" - ] - }, - "jolt-inlines": { - "path": "jolt-inlines", - "purpose": null, - "fileCount": 0, - "lastAccessed": 1775233644696, - "keyFiles": [] - }, - "jolt-platform": { - "path": "jolt-platform", - "purpose": null, - "fileCount": 1, - "lastAccessed": 1775233644696, - "keyFiles": [ - "Cargo.toml" - ] - }, - "jolt-sdk": { - "path": "jolt-sdk", - "purpose": null, - "fileCount": 2, - "lastAccessed": 1775233644696, - "keyFiles": [ - "Cargo.toml" - ] - }, - "patches": { - "path": "patches", - "purpose": null, - "fileCount": 2, - "lastAccessed": 1775233644696, - "keyFiles": [ - "fix-c-extension-privilege-tests.patch", - "fix-isa-regex.patch" - ] - }, - "prebuilt-files": { - "path": "prebuilt-files", - "purpose": null, - "fileCount": 0, - "lastAccessed": 1775233644696, - "keyFiles": [] - }, - "scripts": { - "path": "scripts", - "purpose": "Build/utility scripts", - "fileCount": 9, - "lastAccessed": 1775233644696, - "keyFiles": [ - "apply-patches", - "benchmark_summary.py", - "bootstrap", - "jolt_benchmarks.sh", - "optimize_machine.sh" - ] - }, - "src": { - "path": "src", - "purpose": "Source code", - "fileCount": 4, - "lastAccessed": 1775233644696, - "keyFiles": [ - "build_wasm.rs", - "lib.rs", - "linker.ld.template", - "main.rs" - ] - }, - "target": { - "path": "target", - "purpose": null, - "fileCount": 3, - "lastAccessed": 1775233644696, - "keyFiles": [ - "CACHEDIR.TAG" - ] - }, - "tests": { - "path": "tests", - "purpose": "Test files", - "fileCount": 0, - "lastAccessed": 1775233644697, - "keyFiles": [] - }, - "third-party": { - "path": "third-party", - "purpose": null, - "fileCount": 0, - "lastAccessed": 1775233644697, - "keyFiles": [] - }, - "tracer": { - "path": "tracer", - "purpose": null, - "fileCount": 3, - "lastAccessed": 1775233644697, - "keyFiles": [ - "Cargo.toml", - "LICENSE", - "README.md" - ] - }, - "z3-verifier": { - "path": "z3-verifier", - "purpose": null, - "fileCount": 2, - "lastAccessed": 1775233644697, - "keyFiles": [ - "Cargo.toml", - "README.md" - ] - }, - "zklean-extractor": { - "path": "zklean-extractor", - "purpose": null, - "fileCount": 4, - "lastAccessed": 1775233644697, - "keyFiles": [ - "Cargo.toml", - "LICENSE", - "README.md", - "build.rs" - ] - }, - "book/src": { - "path": "book/src", - "purpose": "Source code", - "fileCount": 3, - "lastAccessed": 1775233644697, - "keyFiles": [ - "SUMMARY.md", - "intro.md" - ] - }, - "common/src": { - "path": "common/src", - "purpose": "Source code", - "fileCount": 4, - "lastAccessed": 1775233644698, - "keyFiles": [ - "attributes.rs", - "constants.rs", - "jolt_device.rs" - ] - }, - "jolt-eval/bin": { - "path": "jolt-eval/bin", - "purpose": "Executable scripts", - "fileCount": 3, - "lastAccessed": 1775233644698, - "keyFiles": [ - "measure_objectives.rs", - "optimize.rs", - "redteam.rs" - ] - }, - "jolt-eval/src": { - "path": "jolt-eval/src", - "purpose": "Source code", - "fileCount": 1, - "lastAccessed": 1775233644698, - "keyFiles": [ - "lib.rs" - ] - }, - "jolt-eval/tests": { - "path": "jolt-eval/tests", - "purpose": "Test files", - "fileCount": 3, - "lastAccessed": 1775233644698, - "keyFiles": [ - "agent_test.rs", - "integration.rs", - "macro_test.rs" - ] - }, - "jolt-platform/src": { - "path": "jolt-platform/src", - "purpose": "Source code", - "fileCount": 8, - "lastAccessed": 1775233644698, - "keyFiles": [ - "advice.rs", - "cycle_tracking.rs", - "exit.rs" - ] - }, - "jolt-sdk/src": { - "path": "jolt-sdk/src", - "purpose": "Source code", - "fileCount": 2, - "lastAccessed": 1775233644698, - "keyFiles": [ - "host_utils.rs", - "lib.rs" - ] - }, - "jolt-sdk/tests": { - "path": "jolt-sdk/tests", - "purpose": "Test files", - "fileCount": 3, - "lastAccessed": 1775233644699, - "keyFiles": [ - "README.md", - "gen-fixtures.sh", - "verifier_api.rs" - ] - }, - "tracer/src": { - "path": "tracer/src", - "purpose": "Source code", - "fileCount": 2, - "lastAccessed": 1775233644699, - "keyFiles": [ - "lib.rs", - "main.rs" - ] - }, - "z3-verifier/src": { - "path": "z3-verifier/src", - "purpose": "Source code", - "fileCount": 3, - "lastAccessed": 1775233644699, - "keyFiles": [ - "cpu_constraints.rs", - "lib.rs", - "virtual_sequences.rs" - ] - }, - "zklean-extractor/src": { - "path": "zklean-extractor/src", - "purpose": "Source code", - "fileCount": 10, - "lastAccessed": 1775233644699, - "keyFiles": [ - "constants.rs", - "instruction.rs", - "lean_tests.rs" - ] - } - }, - "hotPaths": [ - { - "path": "jolt-eval/src/agent/claude.rs", - "accessCount": 21, - "lastAccessed": 1775240914249, - "type": "file" - }, - { - "path": "jolt-eval/tests/agent_test.rs", - "accessCount": 9, - "lastAccessed": 1775241109679, - "type": "directory" - }, - { - "path": "jolt-eval/src/invariant/soundness.rs", - "accessCount": 7, - "lastAccessed": 1775240409793, - "type": "file" - }, - { - "path": "jolt-eval/src/invariant/synthesis/redteam.rs", - "accessCount": 7, - "lastAccessed": 1775240966753, - "type": "file" - }, - { - "path": "jolt-eval/src/objective/synthesis.rs", - "accessCount": 7, - "lastAccessed": 1775246445448, - "type": "file" - }, - { - "path": "jolt-eval/benches/prover_time_fibonacci.rs", - "accessCount": 7, - "lastAccessed": 1775246451584, - "type": "file" - }, - { - "path": "jolt-eval/src/invariant/mod.rs", - "accessCount": 4, - "lastAccessed": 1775240346261, - "type": "file" - }, - { - "path": "jolt-eval/src/agent/mod.rs", - "accessCount": 4, - "lastAccessed": 1775241053447, - "type": "file" - }, - { - "path": "jolt-eval/src", - "accessCount": 4, - "lastAccessed": 1775244964713, - "type": "directory" - }, - { - "path": "jolt-eval/sync_targets.sh", - "accessCount": 4, - "lastAccessed": 1775246732070, - "type": "file" - }, - { - "path": "jolt-eval/src/agent/mock.rs", - "accessCount": 3, - "lastAccessed": 1775240931907, - "type": "file" - }, - { - "path": "jolt-eval/src/objective/optimize.rs", - "accessCount": 2, - "lastAccessed": 1775240941569, - "type": "file" - }, - { - "path": "jolt-eval/Cargo.toml", - "accessCount": 2, - "lastAccessed": 1775245065190, - "type": "file" - }, - { - "path": "jolt-eval/src/objective/prover_time.rs", - "accessCount": 2, - "lastAccessed": 1775245117960, - "type": "file" - }, - { - "path": "jolt-eval/tests", - "accessCount": 1, - "lastAccessed": 1775240852235, - "type": "directory" - }, - { - "path": "jolt-eval/bin", - "accessCount": 1, - "lastAccessed": 1775240853001, - "type": "directory" - }, - { - "path": "jolt-eval/src/objective/bind_bench.rs", - "accessCount": 1, - "lastAccessed": 1775244945032, - "type": "file" - }, - { - "path": "jolt-eval/src/guests/mod.rs", - "accessCount": 1, - "lastAccessed": 1775244951427, - "type": "file" - }, - { - "path": "examples/fibonacci/guest/src", - "accessCount": 1, - "lastAccessed": 1775244971547, - "type": "directory" - }, - { - "path": "examples/muldiv/guest/src", - "accessCount": 1, - "lastAccessed": 1775244972439, - "type": "directory" - }, - { - "path": "examples/sha2-ex/guest/src", - "accessCount": 1, - "lastAccessed": 1775244973599, - "type": "directory" - }, - { - "path": "jolt-eval/src/objective/mod.rs", - "accessCount": 1, - "lastAccessed": 1775245034647, - "type": "file" - }, - { - "path": "examples/sha2-chain/guest/src", - "accessCount": 1, - "lastAccessed": 1775246992179, - "type": "directory" - }, - { - "path": "examples/secp256k1-ecdsa-verify/guest/src", - "accessCount": 1, - "lastAccessed": 1775246993084, - "type": "directory" - }, - { - "path": "examples/sha2-chain/guest/src/lib.rs", - "accessCount": 1, - "lastAccessed": 1775246993515, - "type": "file" - }, - { - "path": "examples/secp256k1-ecdsa-verify/guest/src/lib.rs", - "accessCount": 1, - "lastAccessed": 1775246994292, - "type": "file" - }, - { - "path": "examples/sha2-chain/guest/Cargo.toml", - "accessCount": 1, - "lastAccessed": 1775247000292, - "type": "file" - }, - { - "path": "examples/secp256k1-ecdsa-verify/guest/Cargo.toml", - "accessCount": 1, - "lastAccessed": 1775247001105, - "type": "file" - }, - { - "path": "examples/secp256k1-ecdsa-verify", - "accessCount": 1, - "lastAccessed": 1775247008932, - "type": "directory" - }, - { - "path": "examples/secp256k1-ecdsa-verify/src/main.rs", - "accessCount": 1, - "lastAccessed": 1775247022060, - "type": "file" - } - ], - "userDirectives": [] -} \ No newline at end of file diff --git a/.omc/sessions/89b3a12b-e9ae-47c5-966d-f2fe2782fb2c.json b/.omc/sessions/89b3a12b-e9ae-47c5-966d-f2fe2782fb2c.json deleted file mode 100644 index 93101ce1c..000000000 --- a/.omc/sessions/89b3a12b-e9ae-47c5-966d-f2fe2782fb2c.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "session_id": "89b3a12b-e9ae-47c5-966d-f2fe2782fb2c", - "ended_at": "2026-04-03T16:27:08.623Z", - "reason": "prompt_input_exit", - "agents_spawned": 0, - "agents_completed": 0, - "modes_used": [] -} \ No newline at end of file diff --git a/.omc/state/agent-replay-89b3a12b-e9ae-47c5-966d-f2fe2782fb2c.jsonl b/.omc/state/agent-replay-89b3a12b-e9ae-47c5-966d-f2fe2782fb2c.jsonl deleted file mode 100644 index a4a6feee0..000000000 --- a/.omc/state/agent-replay-89b3a12b-e9ae-47c5-966d-f2fe2782fb2c.jsonl +++ /dev/null @@ -1,7 +0,0 @@ -{"t":0,"agent":"system","event":"skill_invoked","skill_name":"oh-my-claudecode:omc-setup"} -{"t":0,"agent":"system","event":"skill_invoked","skill_name":"oh-my-claudecode:hud"} -{"t":0,"agent":"system","event":"skill_invoked","skill_name":"oh-my-claudecode:mcp-setup"} -{"t":0,"agent":"system","event":"keyword_detected","keyword":"autopilot"} -{"t":0,"agent":"system","event":"mode_change","mode_from":"none","mode_to":"autopilot"} -{"t":0,"agent":"system","event":"keyword_detected","keyword":"deep-interview"} -{"t":0,"agent":"system","event":"keyword_detected","keyword":"deep-interview"} diff --git a/.omc/state/hud-state.json b/.omc/state/hud-state.json deleted file mode 100644 index cfd5ade6d..000000000 --- a/.omc/state/hud-state.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "timestamp": "2026-04-03T16:31:28.279Z", - "backgroundTasks": [], - "sessionStartTimestamp": "2026-04-03T14:48:03.210Z", - "sessionId": "89b3a12b-e9ae-47c5-966d-f2fe2782fb2c" -} \ No newline at end of file diff --git a/.omc/state/hud-stdin-cache.json b/.omc/state/hud-stdin-cache.json deleted file mode 100644 index 627f85d27..000000000 --- a/.omc/state/hud-stdin-cache.json +++ /dev/null @@ -1 +0,0 @@ -{"session_id":"89b3a12b-e9ae-47c5-966d-f2fe2782fb2c","transcript_path":"/Users/mzhu/.claude/projects/-Users-mzhu-code-jolt/89b3a12b-e9ae-47c5-966d-f2fe2782fb2c.jsonl","cwd":"/Users/mzhu/code/jolt/jolt-eval/src/objective","session_name":"criterion-iter-batched-baseline","model":{"id":"claude-opus-4-6[1m]","display_name":"Opus 4.6 (1M context)"},"workspace":{"current_dir":"/Users/mzhu/code/jolt/jolt-eval/src/objective","project_dir":"/Users/mzhu/code/jolt","added_dirs":[]},"version":"2.1.91","output_style":{"name":"default"},"cost":{"total_cost_usd":357.49602949999957,"total_duration_ms":97181144,"total_api_duration_ms":12085031,"total_lines_added":5334,"total_lines_removed":3109},"context_window":{"total_input_tokens":272581,"total_output_tokens":370026,"context_window_size":1000000,"current_usage":{"input_tokens":3,"output_tokens":1,"cache_creation_input_tokens":310,"cache_read_input_tokens":805136},"used_percentage":81,"remaining_percentage":19},"exceeds_200k_tokens":true} \ No newline at end of file diff --git a/.omc/state/idle-notif-cooldown.json b/.omc/state/idle-notif-cooldown.json deleted file mode 100644 index e58d8c57e..000000000 --- a/.omc/state/idle-notif-cooldown.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "lastSentAt": "2026-04-03T20:06:26.707Z" -} \ No newline at end of file diff --git a/.omc/state/last-tool-error.json b/.omc/state/last-tool-error.json deleted file mode 100644 index 719f7bc24..000000000 --- a/.omc/state/last-tool-error.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "tool_name": "Bash", - "tool_input_preview": "{\"command\":\"cargo clippy -p jolt-eval --message-format=short -q --all-targets -- -D warnings 2>&1\",\"timeout\":120000,\"description\":\"Clippy check\"}", - "error": "Exit code 101\njolt-eval/benches/prover_time_fibonacci.rs:5:14: error[E0282]: type annotations needed\nerror: could not compile `jolt-eval` (bench \"prover_time_fibonacci\") due to 1 previous error", - "timestamp": "2026-04-03T19:41:01.456Z", - "retry_count": 5 -} \ No newline at end of file diff --git a/.omc/state/sessions/89b3a12b-e9ae-47c5-966d-f2fe2782fb2c/autopilot-state.json b/.omc/state/sessions/89b3a12b-e9ae-47c5-966d-f2fe2782fb2c/autopilot-state.json deleted file mode 100644 index df473fc42..000000000 --- a/.omc/state/sessions/89b3a12b-e9ae-47c5-966d-f2fe2782fb2c/autopilot-state.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "active": true, - "started_at": "2026-04-03T18:18:10.711Z", - "original_prompt": "autopilot: Update the @jolt-eval/src/invariant/soundness.rs description to reflect that the patch is now handled by the harness. Also, injecting the diff into the JSON is not the cleanest solution, try to come up with something better.", - "session_id": "89b3a12b-e9ae-47c5-966d-f2fe2782fb2c", - "project_path": "/Users/mzhu/code/jolt", - "reinforcement_count": 0, - "awaiting_confirmation": true, - "last_checked_at": "2026-04-03T18:18:10.711Z" -} \ No newline at end of file From 2a681e5f65b4168c6d638ff34135bd34d77c3387 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Fri, 3 Apr 2026 16:24:58 -0400 Subject: [PATCH 46/86] refactor(jolt-eval): split guest configs into own files, add new benches Move Fibonacci, Sha2Chain, Secp256k1EcdsaVerify into separate files under guests/. Add Criterion benchmarks for sha2-chain (iterated hashing) and secp256k1-ecdsa-verify (signature verification). Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/Cargo.toml | 8 ++ .../prover_time_secp256k1_ecdsa_verify.rs | 10 ++ jolt-eval/benches/prover_time_sha2_chain.rs | 10 ++ jolt-eval/src/guests/fibonacci.rs | 22 ++++ jolt-eval/src/guests/mod.rs | 124 +----------------- jolt-eval/src/guests/secp256k1_ecdsa.rs | 71 ++++++++++ jolt-eval/src/guests/sha2_chain.rs | 28 ++++ 7 files changed, 156 insertions(+), 117 deletions(-) create mode 100644 jolt-eval/benches/prover_time_secp256k1_ecdsa_verify.rs create mode 100644 jolt-eval/benches/prover_time_sha2_chain.rs create mode 100644 jolt-eval/src/guests/fibonacci.rs create mode 100644 jolt-eval/src/guests/secp256k1_ecdsa.rs create mode 100644 jolt-eval/src/guests/sha2_chain.rs diff --git a/jolt-eval/Cargo.toml b/jolt-eval/Cargo.toml index df818d098..15e380405 100644 --- a/jolt-eval/Cargo.toml +++ b/jolt-eval/Cargo.toml @@ -42,6 +42,14 @@ harness = false name = "prover_time_fibonacci" harness = false +[[bench]] +name = "prover_time_secp256k1_ecdsa_verify" +harness = false + +[[bench]] +name = "prover_time_sha2_chain" +harness = false + [[bin]] name = "measure-objectives" path = "bin/measure_objectives.rs" diff --git a/jolt-eval/benches/prover_time_secp256k1_ecdsa_verify.rs b/jolt-eval/benches/prover_time_secp256k1_ecdsa_verify.rs new file mode 100644 index 000000000..3b429c286 --- /dev/null +++ b/jolt-eval/benches/prover_time_secp256k1_ecdsa_verify.rs @@ -0,0 +1,10 @@ +use jolt_eval::guests::Secp256k1EcdsaVerify; +use jolt_eval::objective::performance::prover_time::ProverTimeObjective; + +jolt_eval::bench_objective!( + ProverTimeObjective::new(Secp256k1EcdsaVerify::default()), + config: + sample_size(10), + sampling_mode(::criterion::SamplingMode::Flat), + measurement_time(std::time::Duration::from_secs(60)), +); diff --git a/jolt-eval/benches/prover_time_sha2_chain.rs b/jolt-eval/benches/prover_time_sha2_chain.rs new file mode 100644 index 000000000..0a95ed7b9 --- /dev/null +++ b/jolt-eval/benches/prover_time_sha2_chain.rs @@ -0,0 +1,10 @@ +use jolt_eval::guests::Sha2Chain; +use jolt_eval::objective::performance::prover_time::ProverTimeObjective; + +jolt_eval::bench_objective!( + ProverTimeObjective::new(Sha2Chain::default()), + config: + sample_size(10), + sampling_mode(::criterion::SamplingMode::Flat), + measurement_time(std::time::Duration::from_secs(60)), +); diff --git a/jolt-eval/src/guests/fibonacci.rs b/jolt-eval/src/guests/fibonacci.rs new file mode 100644 index 000000000..69b161505 --- /dev/null +++ b/jolt-eval/src/guests/fibonacci.rs @@ -0,0 +1,22 @@ +use super::GuestConfig; + +/// Fibonacci guest: computes fib(n). +pub struct Fibonacci(pub u32); + +impl Default for Fibonacci { + fn default() -> Self { + Self(100) + } +} + +impl GuestConfig for Fibonacci { + fn package(&self) -> &str { + "fibonacci-guest" + } + fn input(&self) -> Vec { + postcard::to_stdvec(&self.0).unwrap() + } + fn bench_name(&self) -> String { + format!("prover_time_fibonacci_{}", self.0) + } +} diff --git a/jolt-eval/src/guests/mod.rs b/jolt-eval/src/guests/mod.rs index 9437d3b3d..dbd178688 100644 --- a/jolt-eval/src/guests/mod.rs +++ b/jolt-eval/src/guests/mod.rs @@ -1,3 +1,7 @@ +pub mod fibonacci; +pub mod secp256k1_ecdsa; +pub mod sha2_chain; + use ark_bn254::Fr; use jolt_core::curve::Bn254Curve; use jolt_core::poly::commitment::dory::DoryCommitmentScheme; @@ -6,8 +10,11 @@ use jolt_core::transcripts::Blake2bTranscript; use common::constants::{DEFAULT_MAX_TRUSTED_ADVICE_SIZE, DEFAULT_MAX_UNTRUSTED_ADVICE_SIZE}; use common::jolt_device::MemoryConfig; +pub use fibonacci::Fibonacci; pub use jolt_core::guest::program::Program as GuestProgram; pub use jolt_core::utils::errors::ProofVerifyError; +pub use secp256k1_ecdsa::Secp256k1EcdsaVerify; +pub use sha2_chain::Sha2Chain; pub use tracer::JoltDevice; pub type F = Fr; @@ -118,120 +125,3 @@ pub trait GuestConfig: Default + Send + Sync { /// Display name for the benchmark. fn bench_name(&self) -> String; } - -// ── Concrete guest configurations ─────────────────────────────────── - -/// Fibonacci guest: computes fib(n). -pub struct Fibonacci(pub u32); - -impl Default for Fibonacci { - fn default() -> Self { - Self(100) - } -} - -impl GuestConfig for Fibonacci { - fn package(&self) -> &str { - "fibonacci-guest" - } - fn input(&self) -> Vec { - postcard::to_stdvec(&self.0).unwrap() - } - fn bench_name(&self) -> String { - format!("prover_time_fibonacci_{}", self.0) - } -} - -/// SHA-2 chain guest: iteratively hashes input `num_iters` times. -pub struct Sha2Chain { - pub input: [u8; 32], - pub num_iters: u32, -} - -impl Default for Sha2Chain { - fn default() -> Self { - Self { - input: [5u8; 32], - num_iters: 100, - } - } -} - -impl GuestConfig for Sha2Chain { - fn package(&self) -> &str { - "sha2-chain-guest" - } - fn input(&self) -> Vec { - postcard::to_stdvec(&(self.input, self.num_iters)).unwrap() - } - fn bench_name(&self) -> String { - format!("prover_time_sha2_chain_{}", self.num_iters) - } -} - -/// Secp256k1 ECDSA signature verification guest. -pub struct Secp256k1EcdsaVerify { - pub z: [u64; 4], - pub r: [u64; 4], - pub s: [u64; 4], - pub q: [u64; 8], -} - -impl Default for Secp256k1EcdsaVerify { - fn default() -> Self { - // Test vector from examples/secp256k1-ecdsa-verify: "hello world" - Self { - z: [ - 0x9088f7ace2efcde9, - 0xc484efe37a5380ee, - 0xa52e52d7da7dabfa, - 0xb94d27b9934d3e08, - ], - r: [ - 0xb8fc413b4b967ed8, - 0x248d4b0b2829ab00, - 0x587f69296af3cd88, - 0x3a5d6a386e6cf7c0, - ], - s: [ - 0x66a82f274e3dcafc, - 0x299a02486be40321, - 0x6212d714118f617e, - 0x9d452f63cf91018d, - ], - q: [ - 0x0012563f32ed0216, - 0xee00716af6a73670, - 0x91fc70e34e00e6c8, - 0xeeb6be8b9e68868b, - 0x4780de3d5fda972d, - 0xcb1b42d72491e47f, - 0xdc7f31262e4ba2b7, - 0xdc7b004d3bb2800d, - ], - } - } -} - -impl GuestConfig for Secp256k1EcdsaVerify { - fn package(&self) -> &str { - "secp256k1-ecdsa-verify-guest" - } - fn memory_config(&self) -> MemoryConfig { - MemoryConfig { - max_input_size: 4096, - max_output_size: 4096, - max_untrusted_advice_size: DEFAULT_MAX_UNTRUSTED_ADVICE_SIZE, - max_trusted_advice_size: DEFAULT_MAX_TRUSTED_ADVICE_SIZE, - stack_size: 4096, - heap_size: 100000, - program_size: None, - } - } - fn input(&self) -> Vec { - postcard::to_stdvec(&(self.z, self.r, self.s, self.q)).unwrap() - } - fn bench_name(&self) -> String { - "prover_time_secp256k1_ecdsa_verify".to_string() - } -} diff --git a/jolt-eval/src/guests/secp256k1_ecdsa.rs b/jolt-eval/src/guests/secp256k1_ecdsa.rs new file mode 100644 index 000000000..2500fadab --- /dev/null +++ b/jolt-eval/src/guests/secp256k1_ecdsa.rs @@ -0,0 +1,71 @@ +use common::jolt_device::MemoryConfig; + +use super::GuestConfig; + +/// Secp256k1 ECDSA signature verification guest. +pub struct Secp256k1EcdsaVerify { + pub z: [u64; 4], + pub r: [u64; 4], + pub s: [u64; 4], + pub q: [u64; 8], +} + +impl Default for Secp256k1EcdsaVerify { + fn default() -> Self { + // Test vector from examples/secp256k1-ecdsa-verify: "hello world" + Self { + z: [ + 0x9088f7ace2efcde9, + 0xc484efe37a5380ee, + 0xa52e52d7da7dabfa, + 0xb94d27b9934d3e08, + ], + r: [ + 0xb8fc413b4b967ed8, + 0x248d4b0b2829ab00, + 0x587f69296af3cd88, + 0x3a5d6a386e6cf7c0, + ], + s: [ + 0x66a82f274e3dcafc, + 0x299a02486be40321, + 0x6212d714118f617e, + 0x9d452f63cf91018d, + ], + q: [ + 0x0012563f32ed0216, + 0xee00716af6a73670, + 0x91fc70e34e00e6c8, + 0xeeb6be8b9e68868b, + 0x4780de3d5fda972d, + 0xcb1b42d72491e47f, + 0xdc7f31262e4ba2b7, + 0xdc7b004d3bb2800d, + ], + } + } +} + +impl GuestConfig for Secp256k1EcdsaVerify { + fn package(&self) -> &str { + "secp256k1-ecdsa-verify-guest" + } + fn memory_config(&self) -> MemoryConfig { + use common::constants::{DEFAULT_MAX_TRUSTED_ADVICE_SIZE, DEFAULT_MAX_UNTRUSTED_ADVICE_SIZE}; + MemoryConfig { + max_input_size: 4096, + max_output_size: 4096, + max_untrusted_advice_size: DEFAULT_MAX_UNTRUSTED_ADVICE_SIZE, + max_trusted_advice_size: DEFAULT_MAX_TRUSTED_ADVICE_SIZE, + stack_size: 4096, + heap_size: 100000, + program_size: None, + } + } + fn input(&self) -> Vec { + postcard::to_stdvec(&(self.z, self.r, self.s, self.q)).unwrap() + } + fn bench_name(&self) -> String { + "prover_time_secp256k1_ecdsa_verify".to_string() + } +} diff --git a/jolt-eval/src/guests/sha2_chain.rs b/jolt-eval/src/guests/sha2_chain.rs new file mode 100644 index 000000000..f4c14eb59 --- /dev/null +++ b/jolt-eval/src/guests/sha2_chain.rs @@ -0,0 +1,28 @@ +use super::GuestConfig; + +/// SHA-2 chain guest: iteratively hashes input `num_iters` times. +pub struct Sha2Chain { + pub input: [u8; 32], + pub num_iters: u32, +} + +impl Default for Sha2Chain { + fn default() -> Self { + Self { + input: [5u8; 32], + num_iters: 100, + } + } +} + +impl GuestConfig for Sha2Chain { + fn package(&self) -> &str { + "sha2-chain-guest" + } + fn input(&self) -> Vec { + postcard::to_stdvec(&(self.input, self.num_iters)).unwrap() + } + fn bench_name(&self) -> String { + format!("prover_time_sha2_chain_{}", self.num_iters) + } +} From 84001070f27bbc45d57104617d6b640a0317ef87 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Fri, 3 Apr 2026 16:27:36 -0400 Subject: [PATCH 47/86] docs(jolt-eval): update README to reflect current architecture Rewrite to match actual invariants (split_eq_bind, soundness), code quality objectives (lloc, cognitive, halstead), performance benchmarks (bind_parallel, prover_time), macro syntax (#[invariant(Test, Fuzz)], fuzz_invariant!, bench_objective!), DiffScope, enrich_input, Criterion integration, sync_targets.sh, and removed features. Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/README.md | 215 ++++++++++++++++++++++++-------------------- 1 file changed, 116 insertions(+), 99 deletions(-) diff --git a/jolt-eval/README.md b/jolt-eval/README.md index 713af3947..b22a7480b 100644 --- a/jolt-eval/README.md +++ b/jolt-eval/README.md @@ -3,50 +3,56 @@ Mechanically checkable **invariants** and **objectives** for the Jolt zkVM. The motivation is twofold: -1. **Maximize agent productivity** -- give AI agents a way to check their work without a human in the loop. -2. **Minimize the human verification surface** -- humans gain assurance about the larger codebase while only focusing on a smaller kernel of invariants and objectives. +1. **Maximize agent productivity** — give AI agents a way to check their work without a human in the loop. +2. **Minimize the human verification surface** — humans gain assurance about the larger codebase while only focusing on a smaller kernel of invariants and objectives. ## Concepts -**Invariants** are evaluations with a binary outcome -- things that must always hold. From a single invariant description (a small amount of Rust), the framework can synthesize: -- A `#[test]` -- A `libfuzzer_sys` fuzz target +**Invariants** are evaluations with a binary outcome — things that must always hold. From a single invariant description (a small amount of Rust), the framework can synthesize: +- A `#[test]` (via the `#[invariant(Test, Fuzz)]` macro) +- A `libfuzzer_sys` fuzz target (via the `fuzz_invariant!` macro) - A "red team" harness for AI agents to try to find a violation -**Objectives** are evaluations with a numerical outcome -- things we want to optimize. They serve as building blocks for AI-driven optimization loops. +**Objectives** come in two flavors: +- **Code quality** (static analysis) — measured via `rust-code-analysis`: LLOC, cognitive complexity, Halstead bugs +- **Performance** (benchmarks) — measured via Criterion: polynomial binding, end-to-end prover time ## Built-in invariants -| Invariant | Description | -|---|---| -| **Soundness** | Mutated proofs must be rejected by the verifier | -| **Verifier completeness** | Honest proofs must be accepted by the verifier | -| **Prover completeness** | The prover must not panic on valid inputs | -| **Determinism** | Same program + input produces byte-identical proofs | -| **Serialization roundtrip** | `deserialize(serialize(proof)) == proof` | -| **ZK consistency** | Prove + verify succeeds in the current compilation mode (run with both `--features host` and `--features host,zk`) | +| Invariant | Targets | Description | +|---|---|---| +| `split_eq_bind_low_high` | Test, Fuzz | `GruenSplitEqPolynomial::bind` (LowToHigh) matches `DensePolynomial::bound_poly_var_bot` | +| `split_eq_bind_high_low` | Test, Fuzz | `GruenSplitEqPolynomial::bind` (HighToLow) matches `DensePolynomial::bound_poly_var_top` | +| `soundness` | RedTeam | For any deterministic guest program + input, only one (output, panic) pair is accepted by the verifier | ## Built-in objectives -| Objective | Direction | Description | -|---|---|---| -| `peak_rss` | Minimize | Peak resident set size during proving (MB) | -| `prover_time` | Minimize | Wall-clock prover time (seconds) | -| `proof_size` | Minimize | Serialized proof byte length | -| `verifier_time` | Minimize | Wall-clock verifier time (seconds) | -| `guest_cycle_count` | Minimize | Guest instruction cycle count | -| `inline_lengths` | Maximize | Count of optimized inline instructions | -| `wrapping_cost` | Minimize | Constraint system size (padded trace length) | +### Code quality (static analysis) + +| Objective | Direction | Units | Description | +|---|---|---|---| +| `lloc` | Minimize | lines | Total logical lines of code in `jolt-core/src/` | +| `cognitive_complexity_avg` | Minimize | — | Average cognitive complexity per function | +| `halstead_bugs` | Minimize | — | Estimated delivered bugs (Halstead volume / 3000) | + +### Performance (Criterion benchmarks) + +| Benchmark | Description | +|---|---| +| `bind_parallel_low_to_high` | `DensePolynomial::bind_parallel` with LowToHigh binding (2^20 evaluations) | +| `bind_parallel_high_to_low` | `DensePolynomial::bind_parallel` with HighToLow binding (2^20 evaluations) | +| `prover_time_fibonacci_100` | End-to-end prover time for `fibonacci(100)` | +| `prover_time_sha2_chain_100` | End-to-end prover time for 100 iterations of SHA-256 chain | +| `prover_time_secp256k1_ecdsa_verify` | End-to-end prover time for secp256k1 ECDSA signature verification | ## Usage ### Defining an invariant ```rust -use jolt_eval::{Invariant, InvariantViolation, SynthesisTarget}; -use enumset::EnumSet; +use jolt_eval::{Invariant, CheckError, InvariantViolation}; -#[jolt_eval_macros::invariant(targets = [Test, Fuzz, RedTeam])] +#[jolt_eval_macros::invariant(Test, Fuzz)] #[derive(Default)] pub struct MyInvariant; @@ -58,11 +64,8 @@ impl Invariant for MyInvariant { fn description(&self) -> String { "Human-readable description, also used as context for AI red-teaming.".into() } - fn targets(&self) -> EnumSet { - SynthesisTarget::Test | SynthesisTarget::Fuzz | SynthesisTarget::RedTeam - } fn setup(&self) -> Self::Setup {} - fn check(&self, _setup: &(), input: u64) -> Result<(), InvariantViolation> { + fn check(&self, _setup: &(), input: u64) -> Result<(), CheckError> { // ... check the invariant ... Ok(()) } @@ -72,112 +75,126 @@ impl Invariant for MyInvariant { } ``` -The `#[invariant]` macro generates: -- `#[test] fn seed_corpus()` -- runs every seed input through `check` -- `#[test] fn random_inputs()` -- runs 10 randomly-generated `Arbitrary` inputs -- `my_invariant_fuzz_check(data: &[u8])` -- call from a `fuzz_target!` body -- `my_invariant_redteam_description() -> String` -- for the red-team harness +The `#[invariant(Test, Fuzz)]` macro generates: +- `InvariantTargets` impl returning the listed targets +- `#[test] fn seed_corpus()` — runs every seed input through `check` +- `#[test] fn random_inputs()` — runs `JOLT_RANDOM_ITERS` (default 10) randomly-generated inputs -### Running invariants with the CLI +### Fuzzing -```bash -# Check all invariants against a compiled guest ELF -cargo run --bin check-invariants -- --elf path/to/guest.elf +Each fuzz target is a 3-line file in `fuzz/fuzz_targets/`: -# Check a specific invariant with more random inputs -cargo run --bin check-invariants -- --elf path/to/guest.elf \ - --invariant soundness --num-random 100 +```rust +#![no_main] +use jolt_eval::invariant::split_eq_bind::SplitEqBindLowHighInvariant; +jolt_eval::fuzz_invariant!(SplitEqBindLowHighInvariant::default()); ``` -### Fuzzing - -The `fuzz` binary runs randomized inputs (via the `Arbitrary` trait) against invariants that include `SynthesisTarget::Fuzz`: - +Run with `cargo fuzz`: ```bash -# Fuzz all invariants with 1000 random inputs -cargo run --bin fuzz -- --elf path/to/guest.elf --iterations 1000 - -# Fuzz a specific invariant with a time limit -cargo run --bin fuzz -- --elf path/to/guest.elf \ - --invariant soundness --duration 5m - -# List available fuzzable invariants -cargo run --bin fuzz -- --list +cd jolt-eval/fuzz +cargo fuzz run split_eq_bind_low_high ``` -For deeper coverage, the `#[invariant]` macro generates a `_fuzz_check` function suitable for use with `cargo fuzz` / `libfuzzer_sys`: +### Measuring objectives -```rust -// fuzz/fuzz_targets/soundness.rs -#![no_main] -use libfuzzer_sys::fuzz_target; +```bash +# All objectives (static analysis + Criterion benchmarks) +cargo run -p jolt-eval --bin measure-objectives -fuzz_target!(|data: &[u8]| { - my_crate::my_soundness_invariant_fuzz_check(data); -}); +# Static analysis only (skip benchmarks) +cargo run -p jolt-eval --bin measure-objectives -- --no-bench + +# A specific objective +cargo run -p jolt-eval --bin measure-objectives -- --objective lloc ``` -### Measuring objectives +### Running Criterion benchmarks directly ```bash -# Measure all objectives -cargo run --bin measure-objectives -- --elf path/to/guest.elf +# All benchmarks +cargo bench -p jolt-eval + +# A specific benchmark +cargo bench -p jolt-eval --bench prover_time_fibonacci -# Measure a specific objective with multiple samples -cargo run --bin measure-objectives -- --elf path/to/guest.elf \ - --objective prover_time --samples 5 +# Quick mode (fewer samples) +cargo bench -p jolt-eval --bench bind_parallel_low_to_high -- --quick ``` +Criterion results are saved to `target/criterion/` (symlinked from `jolt-eval/benches/results/`). + ### AI red-teaming ```bash -cargo run --bin redteam -- --elf path/to/guest.elf \ - --invariant soundness --iterations 10 --model claude-sonnet-4-20250514 +cargo run --release -p jolt-eval --bin redteam -- \ + --invariant soundness --iterations 10 \ + --hint "Look for edge cases in the memory layout" ``` -The red-team harness runs the AI agent in an isolated git worktree. The invariant is checked in the original working tree so the agent cannot cheat. +The red-team harness runs the AI agent in an isolated git worktree. For the soundness invariant, the agent can edit `guest-sandbox/` directly — the harness captures the diff automatically via `git diff`. ### AI-driven optimization ```bash -# Optimize prover_time and proof_size over 5 iterations -cargo run --bin optimize -- --elf path/to/guest.elf \ - --objectives prover_time,proof_size --iterations 5 - -# With a hint to guide the agent -cargo run --bin optimize -- --elf path/to/guest.elf \ - --hint "Focus on the sumcheck inner loop in jolt-core/src/subprotocols/" +cargo run --release -p jolt-eval --bin optimize -- \ + --objectives lloc,cognitive_complexity_avg --iterations 5 \ + --hint "Focus on reducing complexity in jolt-core/src/subprotocols/" ``` -Each iteration: the agent works in an isolated worktree, the diff is applied to the real repo, objectives are re-measured, invariants are checked, and the change is committed or reverted. +Each iteration: the agent works in an isolated worktree, the diff is applied, objectives are re-measured (including Criterion benchmarks with `--save-baseline`), invariants are checked, and the change is committed or reverted. + +### Defining a performance benchmark + +Implement `PerfObjective` and create a bench file: + +```rust +// src/objective/performance/my_bench.rs +use crate::objective::PerfObjective; + +#[derive(Default)] +pub struct MyBenchObjective; -### Programmatic API +impl PerfObjective for MyBenchObjective { + type Setup = MySetup; + fn name(&self) -> &str { "my_bench" } + fn setup(&self) -> MySetup { /* one-time setup */ } + fn run(&self, setup: MySetup) { /* hot path */ } +} +``` ```rust -use std::sync::Arc; -use jolt_eval::{TestCase, SharedSetup, check_all_invariants}; -use jolt_eval::invariant::soundness::SoundnessInvariant; - -// Create a test case from a compiled guest program -let test_case = Arc::new(TestCase { elf_contents, memory_config, max_trace_length: 65536 }); - -// Run a specific invariant -let inv = SoundnessInvariant::new(Arc::clone(&test_case), default_inputs); -let results = inv.run_checks(/* num_random */ 10); - -// Or measure objectives -use jolt_eval::objective::{Objective, prover_time::ProverTimeObjective}; -let setup = SharedSetup::new(test_case); -let obj = ProverTimeObjective::new(setup.test_case, setup.prover_preprocessing, inputs); -let seconds = obj.collect_measurement().unwrap(); +// benches/my_bench.rs +use jolt_eval::objective::performance::my_bench::MyBenchObjective; +jolt_eval::bench_objective!(MyBenchObjective); + +// Or with custom Criterion config for slow benchmarks: +jolt_eval::bench_objective!( + MyBenchObjective::default(), + config: sample_size(10), sampling_mode(::criterion::SamplingMode::Flat), +); +``` + +Then run `./sync_targets.sh` to update `Cargo.toml` bench entries. + +## Syncing targets + +```bash +./jolt-eval/sync_targets.sh ``` +This script: +- Scans `src/invariant/` for `#[invariant(...Fuzz...)]` structs and generates/removes fuzz target files + `fuzz/Cargo.toml` entries +- Scans `benches/*.rs` and updates `Cargo.toml` `[[bench]]` entries + +Bench files are hand-authored (they carry domain-specific Criterion config). The script only syncs `Cargo.toml` entries from existing files. + ## Framing tasks in terms of invariants and objectives | Task | Invariants | Objectives | |---|---|---| | **New feature** | Add new invariants capturing the feature's behavior; modify existing invariants as necessary | Document expected impact; mechanically validate | | **Bug fix** | Add/modify invariant to fail without the fix; verify all others still hold | Document impact | -| **Security review** | Try to find a counterexample to some invariant (via red-team) | -- | -| **Optimization** | Ensure all invariants still hold | Maximize an objective function $f(o_1, \ldots, o_n)$ | +| **Security review** | Try to find a counterexample to some invariant (via red-team) | — | +| **Optimization** | Ensure all invariants still hold | Maximize an objective function f(o₁, …, oₙ) | | **Refactor** | Ensure all invariants still hold | Special case of optimization where the objective captures code quality | From 88807b23e22d02a7d2caf372cd02b450f1a379a6 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Fri, 3 Apr 2026 16:38:33 -0400 Subject: [PATCH 48/86] fix(jolt-eval): register jolt-inlines for secp256k1 and sha2 benchmarks Add jolt-inlines-secp256k1 and jolt-inlines-sha2 as dependencies with the host feature. Use extern crate in lib.rs to force the linker to keep the inventory::submit! inline registrations, preventing "No inline registered for opcode" panics at runtime. Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/Cargo.toml | 3 +++ jolt-eval/src/lib.rs | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/jolt-eval/Cargo.toml b/jolt-eval/Cargo.toml index 15e380405..606e2f9f0 100644 --- a/jolt-eval/Cargo.toml +++ b/jolt-eval/Cargo.toml @@ -25,6 +25,9 @@ tempfile = "3" rust-code-analysis = "0.0.24" +jolt-inlines-secp256k1 = { workspace = true, features = ["host"] } +jolt-inlines-sha2 = { workspace = true, features = ["host"] } + jolt-eval-macros = { path = "macros" } [dev-dependencies] diff --git a/jolt-eval/src/lib.rs b/jolt-eval/src/lib.rs index f3841d008..8b911a239 100644 --- a/jolt-eval/src/lib.rs +++ b/jolt-eval/src/lib.rs @@ -3,6 +3,12 @@ // Allow `jolt_eval::` paths in macro-generated code within this crate. extern crate self as jolt_eval; +// Force the linker to keep inline instruction registrations from these +// crates. Without this, inventory::submit! symbols get dead-stripped +// and the tracer panics with "No inline registered for opcode=...". +extern crate jolt_inlines_secp256k1; +extern crate jolt_inlines_sha2; + pub mod agent; pub mod guests; pub mod invariant; From e3bae5d3a7373ac1ff92fa11858fc8aaa45212b6 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Fri, 3 Apr 2026 16:41:29 -0400 Subject: [PATCH 49/86] rename(jolt-eval): bind_bench.rs -> binding.rs --- Cargo.lock | 2 ++ jolt-eval/benches/bind_parallel_high_to_low.rs | 2 +- jolt-eval/benches/bind_parallel_low_to_high.rs | 2 +- jolt-eval/src/objective/mod.rs | 4 ++-- .../src/objective/performance/{bind_bench.rs => binding.rs} | 0 jolt-eval/src/objective/performance/mod.rs | 2 +- 6 files changed, 7 insertions(+), 5 deletions(-) rename jolt-eval/src/objective/performance/{bind_bench.rs => binding.rs} (100%) diff --git a/Cargo.lock b/Cargo.lock index 701f3f15b..049f53926 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2971,6 +2971,8 @@ dependencies = [ "eyre", "jolt-core", "jolt-eval-macros", + "jolt-inlines-secp256k1", + "jolt-inlines-sha2", "postcard", "rand 0.8.5", "rust-code-analysis", diff --git a/jolt-eval/benches/bind_parallel_high_to_low.rs b/jolt-eval/benches/bind_parallel_high_to_low.rs index ffb2343e8..e776a5fb2 100644 --- a/jolt-eval/benches/bind_parallel_high_to_low.rs +++ b/jolt-eval/benches/bind_parallel_high_to_low.rs @@ -1,2 +1,2 @@ -use jolt_eval::objective::performance::bind_bench::BindHighToLowObjective; +use jolt_eval::objective::performance::binding::BindHighToLowObjective; jolt_eval::bench_objective!(BindHighToLowObjective); diff --git a/jolt-eval/benches/bind_parallel_low_to_high.rs b/jolt-eval/benches/bind_parallel_low_to_high.rs index 3b621673e..2af064f94 100644 --- a/jolt-eval/benches/bind_parallel_low_to_high.rs +++ b/jolt-eval/benches/bind_parallel_low_to_high.rs @@ -1,2 +1,2 @@ -use jolt_eval::objective::performance::bind_bench::BindLowToHighObjective; +use jolt_eval::objective::performance::binding::BindLowToHighObjective; jolt_eval::bench_objective!(BindLowToHighObjective); diff --git a/jolt-eval/src/objective/mod.rs b/jolt-eval/src/objective/mod.rs index 2a1b1fb06..630be7ec4 100644 --- a/jolt-eval/src/objective/mod.rs +++ b/jolt-eval/src/objective/mod.rs @@ -125,8 +125,8 @@ impl Objective { /// Names of all registered `PerfObjective` benchmarks. pub fn perf_objective_names() -> &'static [&'static str] { &[ - performance::bind_bench::BindLowToHighObjective::NAME, - performance::bind_bench::BindHighToLowObjective::NAME, + performance::binding::BindLowToHighObjective::NAME, + performance::binding::BindHighToLowObjective::NAME, ] } diff --git a/jolt-eval/src/objective/performance/bind_bench.rs b/jolt-eval/src/objective/performance/binding.rs similarity index 100% rename from jolt-eval/src/objective/performance/bind_bench.rs rename to jolt-eval/src/objective/performance/binding.rs diff --git a/jolt-eval/src/objective/performance/mod.rs b/jolt-eval/src/objective/performance/mod.rs index 9e5754c0f..12df6f00c 100644 --- a/jolt-eval/src/objective/performance/mod.rs +++ b/jolt-eval/src/objective/performance/mod.rs @@ -1,2 +1,2 @@ -pub mod bind_bench; +pub mod binding; pub mod prover_time; From c03f880d97ebe03bcd618a3e864e7cf16b674782 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Fri, 3 Apr 2026 16:46:13 -0400 Subject: [PATCH 50/86] refactor(jolt-eval): move tests into their relevant source directories Move agent_test.rs -> src/agent/tests.rs Move macro_test.rs -> src/invariant/macro_tests.rs Inline integration.rs tests into invariant/mod.rs and objective/mod.rs as #[cfg(test)] modules. Remove the tests/ directory. Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/src/agent/mod.rs | 2 + .../agent_test.rs => src/agent/tests.rs} | 11 +- .../invariant/macro_tests.rs} | 2 +- jolt-eval/src/invariant/mod.rs | 51 ++++++++ jolt-eval/src/objective/mod.rs | 42 ++++++ jolt-eval/tests/integration.rs | 120 ------------------ 6 files changed, 102 insertions(+), 126 deletions(-) rename jolt-eval/{tests/agent_test.rs => src/agent/tests.rs} (99%) rename jolt-eval/{tests/macro_test.rs => src/invariant/macro_tests.rs} (97%) delete mode 100644 jolt-eval/tests/integration.rs diff --git a/jolt-eval/src/agent/mod.rs b/jolt-eval/src/agent/mod.rs index 19d370fb2..4d3fc377b 100644 --- a/jolt-eval/src/agent/mod.rs +++ b/jolt-eval/src/agent/mod.rs @@ -1,5 +1,7 @@ pub mod claude; pub mod mock; +#[cfg(test)] +mod tests; use std::fmt; use std::path::Path; diff --git a/jolt-eval/tests/agent_test.rs b/jolt-eval/src/agent/tests.rs similarity index 99% rename from jolt-eval/tests/agent_test.rs rename to jolt-eval/src/agent/tests.rs index a85ef418c..f5d62784a 100644 --- a/jolt-eval/tests/agent_test.rs +++ b/jolt-eval/src/agent/tests.rs @@ -2,13 +2,14 @@ use std::collections::HashMap; use std::path::Path; use enumset::EnumSet; -use jolt_eval::agent::{AgentError, AgentHarness, AgentResponse, DiffScope, MockAgent}; -use jolt_eval::invariant::synthesis::redteam::{auto_redteam, RedTeamConfig, RedTeamResult}; -use jolt_eval::invariant::{ + +use crate::agent::{AgentError, AgentHarness, AgentResponse, DiffScope, MockAgent}; +use crate::invariant::synthesis::redteam::{auto_redteam, RedTeamConfig, RedTeamResult}; +use crate::invariant::{ CheckError, Invariant, InvariantTargets, InvariantViolation, SynthesisTarget, }; -use jolt_eval::objective::optimize::{auto_optimize, OptimizeConfig, OptimizeEnv}; -use jolt_eval::objective::Direction; +use crate::objective::optimize::{auto_optimize, OptimizeConfig, OptimizeEnv}; +use crate::objective::Direction; // ========================================================================= // Test invariants diff --git a/jolt-eval/tests/macro_test.rs b/jolt-eval/src/invariant/macro_tests.rs similarity index 97% rename from jolt-eval/tests/macro_test.rs rename to jolt-eval/src/invariant/macro_tests.rs index e935a78cb..d87448252 100644 --- a/jolt-eval/tests/macro_test.rs +++ b/jolt-eval/src/invariant/macro_tests.rs @@ -1,4 +1,4 @@ -use jolt_eval::invariant::{CheckError, Invariant, InvariantViolation}; +use crate::invariant::{CheckError, Invariant, InvariantViolation}; // --------------------------------------------------------------------------- // AlwaysPass: trivial invariant to test macro synthesis diff --git a/jolt-eval/src/invariant/mod.rs b/jolt-eval/src/invariant/mod.rs index 56898f9f8..b6f780872 100644 --- a/jolt-eval/src/invariant/mod.rs +++ b/jolt-eval/src/invariant/mod.rs @@ -1,6 +1,8 @@ pub mod soundness; pub mod split_eq_bind; pub mod synthesis; +#[cfg(test)] +mod macro_tests; use std::fmt; @@ -253,3 +255,52 @@ pub fn extract_json(text: &str) -> Option { None } + +#[cfg(test)] +mod integration_tests { + use super::*; + + struct TrivialInvariant; + impl InvariantTargets for TrivialInvariant {} + + impl Invariant for TrivialInvariant { + type Setup = (); + type Input = u8; + fn name(&self) -> &str { "trivial" } + fn description(&self) -> String { "Always passes".into() } + fn setup(&self) {} + fn check(&self, _: &(), _: u8) -> Result<(), CheckError> { Ok(()) } + fn seed_corpus(&self) -> Vec { vec![0, 1, 255] } + } + + struct FailingInvariant; + impl InvariantTargets for FailingInvariant {} + + impl Invariant for FailingInvariant { + type Setup = (); + type Input = u8; + fn name(&self) -> &str { "failing" } + fn description(&self) -> String { "Always fails".into() } + fn setup(&self) {} + fn check(&self, _: &(), input: u8) -> Result<(), CheckError> { + Err(CheckError::Violation(InvariantViolation::new(format!("failed for input {input}")))) + } + fn seed_corpus(&self) -> Vec { vec![42] } + } + + #[test] + fn trivial_invariant_passes() { + let inv = TrivialInvariant; + for input in inv.seed_corpus() { + inv.check(&(), input).unwrap(); + } + } + + #[test] + fn failing_invariant_reports_violations() { + let inv = FailingInvariant; + for input in inv.seed_corpus() { + assert!(inv.check(&(), input).is_err()); + } + } +} diff --git a/jolt-eval/src/objective/mod.rs b/jolt-eval/src/objective/mod.rs index 630be7ec4..fefd62e6a 100644 --- a/jolt-eval/src/objective/mod.rs +++ b/jolt-eval/src/objective/mod.rs @@ -137,3 +137,45 @@ pub struct OptimizationAttempt { pub measurements: std::collections::HashMap, pub invariants_passed: bool, } + +#[cfg(test)] +mod tests { + use super::*; + + struct ConstantObjective { + label: &'static str, + value: f64, + direction: Direction, + } + + impl AbstractObjective for ConstantObjective { + fn name(&self) -> &str { self.label } + fn collect_measurement(&self) -> Result { Ok(self.value) } + fn direction(&self) -> Direction { self.direction } + } + + #[test] + fn constant_objective() { + let obj = ConstantObjective { + label: "latency", + value: 42.0, + direction: Direction::Minimize, + }; + assert_eq!(obj.name(), "latency"); + assert_eq!(obj.collect_measurement().unwrap(), 42.0); + assert_eq!(obj.direction(), Direction::Minimize); + } + + #[test] + fn objective_all() { + let root = std::path::Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .unwrap(); + let objectives = Objective::all(root); + assert_eq!(objectives.len(), 3); + for obj in &objectives { + let val = obj.collect_measurement().unwrap(); + assert!(val > 0.0, "{} should be > 0, got {val}", obj.name()); + } + } +} diff --git a/jolt-eval/tests/integration.rs b/jolt-eval/tests/integration.rs deleted file mode 100644 index 5b48d9f87..000000000 --- a/jolt-eval/tests/integration.rs +++ /dev/null @@ -1,120 +0,0 @@ -use jolt_eval::invariant::{CheckError, Invariant, InvariantTargets, InvariantViolation}; -use jolt_eval::objective::{AbstractObjective, Direction, MeasurementError}; - -/// A trivial invariant for testing the framework itself. -struct TrivialInvariant; -impl InvariantTargets for TrivialInvariant {} - -impl Invariant for TrivialInvariant { - type Setup = (); - type Input = u8; - - fn name(&self) -> &str { - "trivial" - } - - fn description(&self) -> String { - "Always passes".to_string() - } - - fn setup(&self) -> Self::Setup {} - - fn check(&self, _setup: &Self::Setup, _input: u8) -> Result<(), CheckError> { - Ok(()) - } - - fn seed_corpus(&self) -> Vec { - vec![0, 1, 255] - } -} - -/// An invariant that always fails, for testing violation reporting. -struct FailingInvariant; -impl InvariantTargets for FailingInvariant {} - -impl Invariant for FailingInvariant { - type Setup = (); - type Input = u8; - - fn name(&self) -> &str { - "failing" - } - - fn description(&self) -> String { - "Always fails".to_string() - } - - fn setup(&self) -> Self::Setup {} - - fn check(&self, _setup: &Self::Setup, input: u8) -> Result<(), CheckError> { - Err(CheckError::Violation(InvariantViolation::new(format!( - "failed for input {input}" - )))) - } - - fn seed_corpus(&self) -> Vec { - vec![42] - } -} - -/// A trivial objective for testing the framework. -struct ConstantObjective { - label: &'static str, - value: f64, - direction: Direction, -} - -impl AbstractObjective for ConstantObjective { - fn name(&self) -> &str { - self.label - } - - fn collect_measurement(&self) -> Result { - Ok(self.value) - } - - fn direction(&self) -> Direction { - self.direction - } -} - -#[test] -fn test_trivial_invariant_passes() { - let inv = TrivialInvariant; - for input in inv.seed_corpus() { - inv.check(&(), input).unwrap(); - } -} - -#[test] -fn test_failing_invariant_reports_violations() { - let inv = FailingInvariant; - for input in inv.seed_corpus() { - assert!(inv.check(&(), input).is_err()); - } -} - -#[test] -fn test_constant_objective() { - let obj = ConstantObjective { - label: "latency", - value: 42.0, - direction: Direction::Minimize, - }; - assert_eq!(obj.name(), "latency"); - assert_eq!(obj.collect_measurement().unwrap(), 42.0); - assert_eq!(obj.direction(), Direction::Minimize); -} - -#[test] -fn test_objective_all() { - let root = std::path::Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .unwrap(); - let objectives = jolt_eval::Objective::all(root); - assert_eq!(objectives.len(), 3); - for obj in &objectives { - let val = obj.collect_measurement().unwrap(); - assert!(val > 0.0, "{} should be > 0, got {val}", obj.name()); - } -} From 07a0c6035b33f477f83f556e5148721b466bd7ba Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Fri, 3 Apr 2026 17:02:17 -0400 Subject: [PATCH 51/86] refactor(jolt-eval): replace Direction with ObjectiveFunction trait Remove the Direction enum. The optimizer always minimizes a scalar ObjectiveFunction that combines raw measurements into a single f64. To maximize something, negate it in the ObjectiveFunction impl. Add ObjectiveFunction trait with description() and evaluate() methods. Add SingleObjective for the common case of minimizing one named measurement. OptimizeEnv no longer has directions(). Simplify auto_optimize to compare scalar scores. Update optimize binary to take --objective (single name). Simplify measure-objectives output (no Direction column). Update all tests. Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/bin/measure_objectives.rs | 21 +- jolt-eval/bin/optimize.rs | 111 ++---- jolt-eval/src/agent/tests.rs | 357 ++++++------------ jolt-eval/src/lib.rs | 2 +- .../src/objective/code_quality/cognitive.rs | 5 +- .../objective/code_quality/halstead_bugs.rs | 5 +- jolt-eval/src/objective/code_quality/lloc.rs | 6 +- jolt-eval/src/objective/mod.rs | 27 -- jolt-eval/src/objective/optimize.rs | 150 +++++--- 9 files changed, 240 insertions(+), 444 deletions(-) diff --git a/jolt-eval/bin/measure_objectives.rs b/jolt-eval/bin/measure_objectives.rs index 80368ce92..f85e61086 100644 --- a/jolt-eval/bin/measure_objectives.rs +++ b/jolt-eval/bin/measure_objectives.rs @@ -19,14 +19,14 @@ struct Cli { fn print_header() { println!( - "{:<35} {:>15} {:>8} {:>10}", - "Objective", "Value", "Units", "Direction" + "{:<35} {:>15} {:>8}", + "Objective", "Value", "Units" ); - println!("{}", "-".repeat(70)); + println!("{}", "-".repeat(60)); } -fn print_row(name: &str, val: f64, units: &str, dir: &str) { - println!("{:<35} {:>15.6} {:>8} {:>10}", name, val, units, dir); +fn print_row(name: &str, val: f64, units: &str) { + println!("{:<35} {:>15.6} {:>8}", name, val, units); } fn main() -> eyre::Result<()> { @@ -70,7 +70,7 @@ fn main() -> eyre::Result<()> { } } match read_criterion_estimate(name) { - Some(secs) => print_row(name, secs, "s", "min"), + Some(secs) => print_row(name, secs, "s"), None => { println!("{:<35} {:>15}", name, "NO DATA"); } @@ -93,12 +93,8 @@ fn main() -> eyre::Result<()> { } match obj.collect_measurement() { Ok(val) => { - let dir = match obj.direction() { - jolt_eval::Direction::Minimize => "min", - jolt_eval::Direction::Maximize => "max", - }; let units = obj.units().unwrap_or("-"); - print_row(obj.name(), val, units, dir); + print_row(obj.name(), val, units); } Err(e) => { println!("{:<35} {:>15}", obj.name(), format!("ERROR: {e}")); @@ -110,8 +106,6 @@ fn main() -> eyre::Result<()> { } /// Read the point estimate (mean, in seconds) from Criterion's output. -/// -/// Criterion writes to `target/criterion//new/estimates.json`. fn read_criterion_estimate(bench_name: &str) -> Option { let path = Path::new("target/criterion") .join(bench_name) @@ -119,7 +113,6 @@ fn read_criterion_estimate(bench_name: &str) -> Option { .join("estimates.json"); let data = std::fs::read_to_string(path).ok()?; let json: serde_json::Value = serde_json::from_str(&data).ok()?; - // Criterion stores times in nanoseconds let nanos = json.get("mean")?.get("point_estimate")?.as_f64()?; Some(nanos / 1e9) } diff --git a/jolt-eval/bin/optimize.rs b/jolt-eval/bin/optimize.rs index b1a1dc12f..bc7eb2242 100644 --- a/jolt-eval/bin/optimize.rs +++ b/jolt-eval/bin/optimize.rs @@ -6,16 +6,19 @@ use clap::Parser; use jolt_eval::agent::ClaudeCodeAgent; use jolt_eval::invariant::JoltInvariants; -use jolt_eval::objective::optimize::{auto_optimize, OptimizeConfig, OptimizeEnv}; -use jolt_eval::objective::{perf_objective_names, Direction, Objective}; +use jolt_eval::objective::optimize::{ + auto_optimize, ObjectiveFunction, OptimizeConfig, OptimizeEnv, SingleObjective, +}; +use jolt_eval::objective::{perf_objective_names, Objective}; #[derive(Parser)] #[command(name = "optimize")] #[command(about = "AI-driven optimization of Jolt codebase objectives")] struct Cli { - /// Objectives to optimize (comma-separated). Default: all. + /// Objective to minimize (e.g. "lloc", "prover_time_fibonacci_100"). + /// Default: all measurements are taken but you must specify which to optimize. #[arg(long)] - objectives: Option, + objective: String, /// Number of optimization iterations #[arg(long, default_value = "5")] @@ -38,7 +41,6 @@ struct RealEnv { objectives: Vec, invariants: Vec, repo_dir: std::path::PathBuf, - /// Whether to include perf benchmarks in measurements. bench_perf: bool, } @@ -88,22 +90,6 @@ impl OptimizeEnv for RealEnv { }) } - fn directions(&self) -> HashMap { - let mut dirs: HashMap = self - .objectives - .iter() - .map(|o| (o.name().to_string(), o.direction())) - .collect(); - - if self.bench_perf { - for &name in perf_objective_names() { - dirs.insert(name.to_string(), Direction::Minimize); - } - } - - dirs - } - fn apply_diff(&mut self, diff: &str) { if let Err(e) = jolt_eval::agent::apply_diff(&self.repo_dir, diff) { tracing::warn!("Failed to apply diff: {e}"); @@ -137,43 +123,16 @@ fn main() -> eyre::Result<()> { let cli = Cli::parse(); let repo_dir = std::env::current_dir()?; - let all_objectives = Objective::all(&repo_dir); - let all_names: Vec = all_objectives - .iter() - .map(|o| o.name().to_string()) - .chain(perf_objective_names().iter().map(|s| s.to_string())) - .collect(); - - let filter_names: Option> = cli - .objectives - .as_ref() - .map(|s| s.split(',').map(|n| n.trim().to_string()).collect()); - - let bench_perf = filter_names.as_ref().is_none_or(|names| { - perf_objective_names() - .iter() - .any(|p| names.contains(&p.to_string())) - }); - - let objectives: Vec = if let Some(names) = &filter_names { - all_objectives - .into_iter() - .filter(|o| names.contains(&o.name().to_string())) - .collect() - } else { - all_objectives - }; + let objectives = Objective::all(&repo_dir); - if objectives.is_empty() && !bench_perf { - eprintln!( - "No matching objectives. Available: {}", - all_names.join(", ") - ); - std::process::exit(1); - } + let bench_perf = perf_objective_names().contains(&cli.objective.as_str()); let invariants = JoltInvariants::all(); + let objective_fn = SingleObjective { + name: cli.objective.clone(), + }; + let mut env = RealEnv { objectives, invariants, @@ -181,10 +140,11 @@ fn main() -> eyre::Result<()> { bench_perf, }; - println!("=== Baseline measurements ==="); + println!("=== Baseline ==="); let baseline = env.measure(); - print_measurements(&env.directions(), &baseline); - println!(); + let baseline_score = objective_fn.evaluate(&baseline); + print_measurements(&baseline); + println!("Objective: {} = {:.6}\n", cli.objective, baseline_score); let agent = ClaudeCodeAgent::new(&cli.model, cli.max_turns); let config = OptimizeConfig { @@ -192,44 +152,33 @@ fn main() -> eyre::Result<()> { hint: cli.hint.clone(), }; - let result = auto_optimize(&agent, &mut env, &config, &repo_dir); + let result = auto_optimize(&agent, &mut env, &objective_fn, &config, &repo_dir); - println!("=== Optimization summary ==="); + println!("=== Summary ==="); println!( - "{}/{} iterations produced improvements.", + "{}/{} iterations improved the objective.", result .attempts .iter() - .filter(|a| a.invariants_passed - && a.measurements - .iter() - .any(|(name, &val)| { result.baseline.get(name).is_some_and(|&b| val < b) })) + .filter(|a| a.invariants_passed && a.score < result.baseline_score) .count(), result.attempts.len() ); - println!(); - println!("Final measurements:"); - print_measurements(&env.directions(), &result.best); + println!( + "Score: {:.6} -> {:.6}", + result.baseline_score, result.best_score + ); + println!("\nFinal measurements:"); + print_measurements(&result.best_measurements); Ok(()) } -fn print_measurements( - directions: &HashMap, - measurements: &HashMap, -) { - let mut names: Vec<_> = directions.keys().collect(); +fn print_measurements(measurements: &HashMap) { + let mut names: Vec<_> = measurements.keys().collect(); names.sort(); for name in names { - let val = measurements - .get(name) - .map(|v| format!("{v:.6}")) - .unwrap_or_else(|| "N/A".to_string()); - let dir = match directions[name] { - Direction::Minimize => "min", - Direction::Maximize => "max", - }; - println!(" {:<35} {:>15} {:>6}", name, val, dir); + println!(" {:<35} {:>15.6}", name, measurements[name]); } } diff --git a/jolt-eval/src/agent/tests.rs b/jolt-eval/src/agent/tests.rs index f5d62784a..ea9464b6f 100644 --- a/jolt-eval/src/agent/tests.rs +++ b/jolt-eval/src/agent/tests.rs @@ -8,8 +8,9 @@ use crate::invariant::synthesis::redteam::{auto_redteam, RedTeamConfig, RedTeamR use crate::invariant::{ CheckError, Invariant, InvariantTargets, InvariantViolation, SynthesisTarget, }; -use crate::objective::optimize::{auto_optimize, OptimizeConfig, OptimizeEnv}; -use crate::objective::Direction; +use crate::objective::optimize::{ + auto_optimize, ObjectiveFunction, OptimizeConfig, OptimizeEnv, SingleObjective, +}; // ========================================================================= // Test invariants @@ -576,14 +577,7 @@ fn custom_harness_plugs_into_auto_redteam() { // Mock OptimizeEnv // ========================================================================= -/// Mock environment for testing the optimization loop. -/// -/// - `measurements` is a queue of measurement snapshots. Each call to -/// `measure()` pops the front. When one entry remains it repeats. -/// - `invariants_pass` controls whether invariants pass each iteration. -/// - Side effects (apply/accept/reject) are recorded for assertions. struct MockOptimizeEnv { - directions: HashMap, measurements: Vec>, measure_index: usize, invariants_pass: Vec, @@ -594,9 +588,8 @@ struct MockOptimizeEnv { } impl MockOptimizeEnv { - fn new(directions: HashMap) -> Self { + fn new() -> Self { Self { - directions, measurements: vec![], measure_index: 0, invariants_pass: vec![true], @@ -637,10 +630,6 @@ impl OptimizeEnv for MockOptimizeEnv { self.invariants_pass[idx] } - fn directions(&self) -> HashMap { - self.directions.clone() - } - fn apply_diff(&mut self, diff: &str) { self.applied_diffs.push(diff.to_string()); } @@ -658,8 +647,8 @@ fn m(pairs: &[(&str, f64)]) -> HashMap { pairs.iter().map(|(k, v)| (k.to_string(), *v)).collect() } -fn d(pairs: &[(&str, Direction)]) -> HashMap { - pairs.iter().map(|(k, v)| (k.to_string(), *v)).collect() +fn time_obj() -> SingleObjective { + SingleObjective { name: "time".into() } } // ========================================================================= @@ -673,25 +662,20 @@ fn optimize_accepts_improvement() { diff: Some("fake diff".into()), })]); - let mut env = - MockOptimizeEnv::new(d(&[("time", Direction::Minimize)])).with_measurements(vec![ - m(&[("time", 10.0)]), // baseline - m(&[("time", 8.0)]), // improved - ]); - - let config = OptimizeConfig { - num_iterations: 1, - hint: None, - }; + let mut env = MockOptimizeEnv::new().with_measurements(vec![ + m(&[("time", 10.0)]), + m(&[("time", 8.0)]), + ]); - let result = auto_optimize(&agent, &mut env, &config, Path::new("/tmp")); + let config = OptimizeConfig { num_iterations: 1, hint: None }; + let obj = time_obj(); + let result = auto_optimize(&agent, &mut env, &obj, &config, Path::new("/tmp")); assert_eq!(result.attempts.len(), 1); assert!(result.attempts[0].invariants_passed); - assert_eq!(result.best["time"], 8.0); + assert_eq!(result.best_score, 8.0); assert_eq!(env.accepted, vec![1]); assert_eq!(env.rejected, 0); - assert_eq!(env.applied_diffs.len(), 1); } #[test] @@ -701,23 +685,17 @@ fn optimize_rejects_regression() { diff: Some("bad diff".into()), })]); - let mut env = - MockOptimizeEnv::new(d(&[("time", Direction::Minimize)])).with_measurements(vec![ - m(&[("time", 10.0)]), // baseline - m(&[("time", 12.0)]), // regression - ]); - - let config = OptimizeConfig { - num_iterations: 1, - hint: None, - }; + let mut env = MockOptimizeEnv::new().with_measurements(vec![ + m(&[("time", 10.0)]), + m(&[("time", 12.0)]), + ]); - let result = auto_optimize(&agent, &mut env, &config, Path::new("/tmp")); + let config = OptimizeConfig { num_iterations: 1, hint: None }; + let obj = time_obj(); + let result = auto_optimize(&agent, &mut env, &obj, &config, Path::new("/tmp")); assert_eq!(result.attempts.len(), 1); - assert!(result.attempts[0].invariants_passed); - // Best stays at baseline because regression was rejected - assert_eq!(result.best["time"], 10.0); + assert_eq!(result.best_score, 10.0); assert!(env.accepted.is_empty()); assert_eq!(env.rejected, 1); } @@ -729,72 +707,50 @@ fn optimize_rejects_when_invariants_fail() { diff: Some("breaking diff".into()), })]); - let mut env = MockOptimizeEnv::new(d(&[("time", Direction::Minimize)])) + let mut env = MockOptimizeEnv::new() .with_measurements(vec![ - m(&[("time", 10.0)]), // baseline - m(&[("time", 5.0)]), // looks improved but invariants fail + m(&[("time", 10.0)]), + m(&[("time", 5.0)]), ]) .with_invariants(vec![false]); - let config = OptimizeConfig { - num_iterations: 1, - hint: None, - }; - - let result = auto_optimize(&agent, &mut env, &config, Path::new("/tmp")); + let config = OptimizeConfig { num_iterations: 1, hint: None }; + let obj = time_obj(); + let result = auto_optimize(&agent, &mut env, &obj, &config, Path::new("/tmp")); - assert_eq!(result.attempts.len(), 1); assert!(!result.attempts[0].invariants_passed); - assert_eq!(result.best["time"], 10.0); // rejected despite improvement + assert_eq!(result.best_score, 10.0); assert!(env.accepted.is_empty()); - assert_eq!(env.rejected, 1); // rejected because invariants failed + assert_eq!(env.rejected, 1); } #[test] -fn optimize_maximize_direction() { - let agent = MockAgent::from_responses(vec![Ok(AgentResponse { - text: "more inlines".into(), - diff: Some("diff".into()), - })]); - - let mut env = - MockOptimizeEnv::new(d(&[("inlines", Direction::Maximize)])).with_measurements(vec![ - m(&[("inlines", 100.0)]), // baseline - m(&[("inlines", 150.0)]), // improvement (higher is better) - ]); - - let config = OptimizeConfig { - num_iterations: 1, - hint: None, - }; - - let result = auto_optimize(&agent, &mut env, &config, Path::new("/tmp")); - - assert_eq!(result.best["inlines"], 150.0); - assert_eq!(env.accepted, vec![1]); -} +fn optimize_custom_objective_function() { + // Weighted sum: 2*time + size. Agent improves time but regresses size. + struct WeightedSum; + impl ObjectiveFunction for WeightedSum { + fn description(&self) -> String { "Minimize 2*time + size".into() } + fn evaluate(&self, m: &HashMap) -> f64 { + 2.0 * m.get("time").unwrap_or(&0.0) + m.get("size").unwrap_or(&0.0) + } + } -#[test] -fn optimize_maximize_rejects_decrease() { let agent = MockAgent::from_responses(vec![Ok(AgentResponse { - text: "oops".into(), + text: "optimized".into(), diff: Some("diff".into()), })]); - let mut env = - MockOptimizeEnv::new(d(&[("inlines", Direction::Maximize)])).with_measurements(vec![ - m(&[("inlines", 100.0)]), - m(&[("inlines", 80.0)]), // regression for Maximize - ]); - - let config = OptimizeConfig { - num_iterations: 1, - hint: None, - }; + let mut env = MockOptimizeEnv::new().with_measurements(vec![ + m(&[("time", 10.0), ("size", 100.0)]), // score = 120 + m(&[("time", 8.0), ("size", 110.0)]), // score = 126 (regression!) + ]); - let result = auto_optimize(&agent, &mut env, &config, Path::new("/tmp")); + let config = OptimizeConfig { num_iterations: 1, hint: None }; + let obj = WeightedSum; + let result = auto_optimize(&agent, &mut env, &obj, &config, Path::new("/tmp")); - assert_eq!(result.best["inlines"], 100.0); + // Rejected because 126 > 120 + assert_eq!(result.best_score, 120.0); assert!(env.accepted.is_empty()); assert_eq!(env.rejected, 1); } @@ -802,87 +758,58 @@ fn optimize_maximize_rejects_decrease() { #[test] fn optimize_multi_iteration_progressive_improvement() { let agent = MockAgent::from_responses(vec![ - Ok(AgentResponse { - text: "iter 1".into(), - diff: Some("diff1".into()), - }), - Ok(AgentResponse { - text: "iter 2".into(), - diff: Some("diff2".into()), - }), - Ok(AgentResponse { - text: "iter 3".into(), - diff: Some("diff3".into()), - }), + Ok(AgentResponse { text: "iter 1".into(), diff: Some("diff1".into()) }), + Ok(AgentResponse { text: "iter 2".into(), diff: Some("diff2".into()) }), + Ok(AgentResponse { text: "iter 3".into(), diff: Some("diff3".into()) }), ]); - let mut env = - MockOptimizeEnv::new(d(&[("time", Direction::Minimize)])).with_measurements(vec![ - m(&[("time", 10.0)]), // baseline - m(&[("time", 8.0)]), // iter 1: improvement - m(&[("time", 9.0)]), // iter 2: regression from 8.0 - m(&[("time", 6.0)]), // iter 3: improvement from 8.0 - ]); - - let config = OptimizeConfig { - num_iterations: 3, - hint: None, - }; + let mut env = MockOptimizeEnv::new().with_measurements(vec![ + m(&[("time", 10.0)]), + m(&[("time", 8.0)]), + m(&[("time", 9.0)]), + m(&[("time", 6.0)]), + ]); - let result = auto_optimize(&agent, &mut env, &config, Path::new("/tmp")); + let config = OptimizeConfig { num_iterations: 3, hint: None }; + let obj = time_obj(); + let result = auto_optimize(&agent, &mut env, &obj, &config, Path::new("/tmp")); assert_eq!(result.attempts.len(), 3); - assert_eq!(result.best["time"], 6.0); - assert_eq!(env.accepted, vec![1, 3]); // iters 1 and 3 accepted - assert_eq!(env.rejected, 1); // iter 2 rejected + assert_eq!(result.best_score, 6.0); + assert_eq!(env.accepted, vec![1, 3]); + assert_eq!(env.rejected, 1); } #[test] fn optimize_stops_when_agent_produces_no_diff() { let agent = MockAgent::from_responses(vec![ - Ok(AgentResponse { - text: "I made a change".into(), - diff: Some("diff1".into()), - }), - Ok(AgentResponse { - text: "I couldn't find anything else".into(), - diff: None, // no diff -> should stop - }), + Ok(AgentResponse { text: "changed".into(), diff: Some("diff1".into()) }), + Ok(AgentResponse { text: "nothing else".into(), diff: None }), ]); - let mut env = MockOptimizeEnv::new(d(&[("time", Direction::Minimize)])) + let mut env = MockOptimizeEnv::new() .with_measurements(vec![m(&[("time", 10.0)]), m(&[("time", 9.0)])]); - let config = OptimizeConfig { - num_iterations: 5, - hint: None, - }; - - let result = auto_optimize(&agent, &mut env, &config, Path::new("/tmp")); + let config = OptimizeConfig { num_iterations: 5, hint: None }; + let obj = time_obj(); + let result = auto_optimize(&agent, &mut env, &obj, &config, Path::new("/tmp")); - // Only 1 attempt recorded (second iteration stopped before measurement) assert_eq!(result.attempts.len(), 1); } #[test] fn optimize_stops_when_agent_errors() { let agent = MockAgent::from_responses(vec![ - Ok(AgentResponse { - text: "change 1".into(), - diff: Some("diff".into()), - }), + Ok(AgentResponse { text: "change".into(), diff: Some("diff".into()) }), Err(AgentError::new("agent crashed")), ]); - let mut env = MockOptimizeEnv::new(d(&[("time", Direction::Minimize)])) + let mut env = MockOptimizeEnv::new() .with_measurements(vec![m(&[("time", 10.0)]), m(&[("time", 10.0)])]); - let config = OptimizeConfig { - num_iterations: 5, - hint: None, - }; - - let result = auto_optimize(&agent, &mut env, &config, Path::new("/tmp")); + let config = OptimizeConfig { num_iterations: 5, hint: None }; + let obj = time_obj(); + let result = auto_optimize(&agent, &mut env, &obj, &config, Path::new("/tmp")); assert_eq!(result.attempts.len(), 1); } @@ -890,52 +817,19 @@ fn optimize_stops_when_agent_errors() { #[test] fn optimize_zero_iterations() { let agent = MockAgent::always_ok("should not be called"); - let mut env = MockOptimizeEnv::new(d(&[("time", Direction::Minimize)])) + let mut env = MockOptimizeEnv::new() .with_measurements(vec![m(&[("time", 10.0)])]); - let config = OptimizeConfig { - num_iterations: 0, - hint: None, - }; - - let result = auto_optimize(&agent, &mut env, &config, Path::new("/tmp")); + let config = OptimizeConfig { num_iterations: 0, hint: None }; + let obj = time_obj(); + let result = auto_optimize(&agent, &mut env, &obj, &config, Path::new("/tmp")); assert!(result.attempts.is_empty()); - assert_eq!(result.baseline["time"], 10.0); - assert_eq!(result.best["time"], 10.0); + assert_eq!(result.baseline_score, 10.0); + assert_eq!(result.best_score, 10.0); assert!(agent.recorded_prompts().is_empty()); } -#[test] -fn optimize_multiple_objectives() { - let agent = MockAgent::from_responses(vec![Ok(AgentResponse { - text: "optimized".into(), - diff: Some("diff".into()), - })]); - - // time improves, size regresses, but any improvement triggers accept - let mut env = MockOptimizeEnv::new(d(&[ - ("time", Direction::Minimize), - ("size", Direction::Minimize), - ])) - .with_measurements(vec![ - m(&[("time", 10.0), ("size", 100.0)]), - m(&[("time", 8.0), ("size", 110.0)]), - ]); - - let config = OptimizeConfig { - num_iterations: 1, - hint: None, - }; - - let result = auto_optimize(&agent, &mut env, &config, Path::new("/tmp")); - - // Accepted because time improved (even though size regressed) - assert_eq!(env.accepted, vec![1]); - assert_eq!(result.best["time"], 8.0); - assert_eq!(result.best["size"], 110.0); -} - #[test] fn optimize_prompt_includes_measurements_and_hint() { let agent = MockAgent::from_responses(vec![Ok(AgentResponse { @@ -943,55 +837,42 @@ fn optimize_prompt_includes_measurements_and_hint() { diff: Some("diff".into()), })]); - let mut env = MockOptimizeEnv::new(d(&[("time", Direction::Minimize)])) + let mut env = MockOptimizeEnv::new() .with_measurements(vec![m(&[("time", 42.0)]), m(&[("time", 42.0)])]); let config = OptimizeConfig { num_iterations: 1, hint: Some("Focus on the inner loop".into()), }; - - auto_optimize(&agent, &mut env, &config, Path::new("/tmp")); + let obj = time_obj(); + auto_optimize(&agent, &mut env, &obj, &config, Path::new("/tmp")); let prompts = agent.recorded_prompts(); assert_eq!(prompts.len(), 1); assert!(prompts[0].contains("42.0")); - assert!(prompts[0].contains("lower is better")); assert!(prompts[0].contains("Focus on the inner loop")); } #[test] fn optimize_prompt_includes_past_attempts() { let agent = MockAgent::from_responses(vec![ - Ok(AgentResponse { - text: "attempt 1".into(), - diff: Some("d1".into()), - }), - Ok(AgentResponse { - text: "attempt 2".into(), - diff: Some("d2".into()), - }), + Ok(AgentResponse { text: "attempt 1".into(), diff: Some("d1".into()) }), + Ok(AgentResponse { text: "attempt 2".into(), diff: Some("d2".into()) }), ]); - let mut env = - MockOptimizeEnv::new(d(&[("time", Direction::Minimize)])).with_measurements(vec![ - m(&[("time", 10.0)]), - m(&[("time", 10.0)]), // no improvement - m(&[("time", 10.0)]), - ]); - - let config = OptimizeConfig { - num_iterations: 2, - hint: None, - }; + let mut env = MockOptimizeEnv::new().with_measurements(vec![ + m(&[("time", 10.0)]), + m(&[("time", 10.0)]), + m(&[("time", 10.0)]), + ]); - auto_optimize(&agent, &mut env, &config, Path::new("/tmp")); + let config = OptimizeConfig { num_iterations: 2, hint: None }; + let obj = time_obj(); + auto_optimize(&agent, &mut env, &obj, &config, Path::new("/tmp")); let prompts = agent.recorded_prompts(); assert_eq!(prompts.len(), 2); - // First prompt: no past attempts assert!(!prompts[0].contains("Previous attempts")); - // Second prompt: includes iteration 1's results assert!(prompts[1].contains("Previous attempts")); assert!(prompts[1].contains("iteration 1")); } @@ -1003,15 +884,12 @@ fn optimize_diff_is_applied() { diff: Some("--- a/x\n+++ b/x\n".into()), })]); - let mut env = MockOptimizeEnv::new(d(&[("time", Direction::Minimize)])) + let mut env = MockOptimizeEnv::new() .with_measurements(vec![m(&[("time", 10.0)]), m(&[("time", 10.0)])]); - let config = OptimizeConfig { - num_iterations: 1, - hint: None, - }; - - auto_optimize(&agent, &mut env, &config, Path::new("/tmp")); + let config = OptimizeConfig { num_iterations: 1, hint: None }; + let obj = time_obj(); + auto_optimize(&agent, &mut env, &obj, &config, Path::new("/tmp")); assert_eq!(env.applied_diffs.len(), 1); assert!(env.applied_diffs[0].contains("--- a/x")); @@ -1019,44 +897,31 @@ fn optimize_diff_is_applied() { #[test] fn optimize_invariant_failure_mid_sequence() { - // 3 iterations: improve, invariant fail, improve let agent = MockAgent::from_responses(vec![ - Ok(AgentResponse { - text: "i1".into(), - diff: Some("d1".into()), - }), - Ok(AgentResponse { - text: "i2".into(), - diff: Some("d2".into()), - }), - Ok(AgentResponse { - text: "i3".into(), - diff: Some("d3".into()), - }), + Ok(AgentResponse { text: "i1".into(), diff: Some("d1".into()) }), + Ok(AgentResponse { text: "i2".into(), diff: Some("d2".into()) }), + Ok(AgentResponse { text: "i3".into(), diff: Some("d3".into()) }), ]); - let mut env = MockOptimizeEnv::new(d(&[("time", Direction::Minimize)])) + let mut env = MockOptimizeEnv::new() .with_measurements(vec![ - m(&[("time", 10.0)]), // baseline - m(&[("time", 8.0)]), // iter 1: improvement - m(&[("time", 5.0)]), // iter 2: would be improvement but invariants fail - m(&[("time", 7.0)]), // iter 3: improvement + m(&[("time", 10.0)]), + m(&[("time", 8.0)]), + m(&[("time", 5.0)]), + m(&[("time", 7.0)]), ]) .with_invariants(vec![true, false, true]); - let config = OptimizeConfig { - num_iterations: 3, - hint: None, - }; - - let result = auto_optimize(&agent, &mut env, &config, Path::new("/tmp")); + let config = OptimizeConfig { num_iterations: 3, hint: None }; + let obj = time_obj(); + let result = auto_optimize(&agent, &mut env, &obj, &config, Path::new("/tmp")); assert_eq!(result.attempts.len(), 3); assert!(result.attempts[0].invariants_passed); assert!(!result.attempts[1].invariants_passed); assert!(result.attempts[2].invariants_passed); assert_eq!(env.accepted, vec![1, 3]); - assert_eq!(env.rejected, 1); // iter 2 rejected (invariant failure) - assert_eq!(result.best["time"], 7.0); + assert_eq!(env.rejected, 1); + assert_eq!(result.best_score, 7.0); } diff --git a/jolt-eval/src/lib.rs b/jolt-eval/src/lib.rs index 8b911a239..1006d26e1 100644 --- a/jolt-eval/src/lib.rs +++ b/jolt-eval/src/lib.rs @@ -18,7 +18,7 @@ pub use guests::{GuestConfig, GuestProgram, JoltDevice, ProofVerifyError}; pub use invariant::{ CheckError, Invariant, InvariantTargets, InvariantViolation, JoltInvariants, SynthesisTarget, }; -pub use objective::{AbstractObjective, Direction, MeasurementError, Objective, PerfObjective}; +pub use objective::{AbstractObjective, MeasurementError, Objective, PerfObjective}; // Re-exports used by the #[invariant] proc macro generated code. pub use arbitrary; diff --git a/jolt-eval/src/objective/code_quality/cognitive.rs b/jolt-eval/src/objective/code_quality/cognitive.rs index 8c4680798..5c72aeafb 100644 --- a/jolt-eval/src/objective/code_quality/cognitive.rs +++ b/jolt-eval/src/objective/code_quality/cognitive.rs @@ -3,7 +3,7 @@ use std::path::{Path, PathBuf}; use rust_code_analysis::FuncSpace; use super::lloc::{analyze_rust_file, rust_files}; -use crate::objective::{AbstractObjective, Direction, MeasurementError}; +use crate::objective::{AbstractObjective, MeasurementError}; /// Average cognitive complexity per function across all Rust files under /// `jolt-core/src/`. @@ -43,9 +43,6 @@ impl AbstractObjective for CognitiveComplexityObjective { Ok(total / count as f64) } - fn direction(&self) -> Direction { - Direction::Minimize - } } /// Walk the function-space tree and collect cognitive complexity from diff --git a/jolt-eval/src/objective/code_quality/halstead_bugs.rs b/jolt-eval/src/objective/code_quality/halstead_bugs.rs index 379a4de33..4c24b5a0e 100644 --- a/jolt-eval/src/objective/code_quality/halstead_bugs.rs +++ b/jolt-eval/src/objective/code_quality/halstead_bugs.rs @@ -3,7 +3,7 @@ use std::path::{Path, PathBuf}; use rust_code_analysis::FuncSpace; use super::lloc::{analyze_rust_file, rust_files}; -use crate::objective::{AbstractObjective, Direction, MeasurementError}; +use crate::objective::{AbstractObjective, MeasurementError}; /// Estimated number of delivered bugs across all Rust files under /// `jolt-core/src/`, based on Halstead's bug prediction formula @@ -38,9 +38,6 @@ impl AbstractObjective for HalsteadBugsObjective { Ok(total) } - fn direction(&self) -> Direction { - Direction::Minimize - } } /// Sum Halstead bugs across all function spaces in the tree, diff --git a/jolt-eval/src/objective/code_quality/lloc.rs b/jolt-eval/src/objective/code_quality/lloc.rs index ce1d4718f..77515aa5b 100644 --- a/jolt-eval/src/objective/code_quality/lloc.rs +++ b/jolt-eval/src/objective/code_quality/lloc.rs @@ -2,7 +2,7 @@ use std::path::{Path, PathBuf}; use rust_code_analysis::{get_function_spaces, FuncSpace, LANG}; -use crate::objective::{AbstractObjective, Direction, MeasurementError}; +use crate::objective::{AbstractObjective, MeasurementError}; /// Total logical lines of code (LLOC) across all Rust files under /// `jolt-core/src/`. @@ -34,10 +34,6 @@ impl AbstractObjective for LlocObjective { Ok(total) } - fn direction(&self) -> Direction { - Direction::Minimize - } - fn units(&self) -> Option<&str> { Some("lines") } diff --git a/jolt-eval/src/objective/mod.rs b/jolt-eval/src/objective/mod.rs index fefd62e6a..25e00e708 100644 --- a/jolt-eval/src/objective/mod.rs +++ b/jolt-eval/src/objective/mod.rs @@ -6,13 +6,6 @@ pub mod synthesis; use std::fmt; use std::path::Path; -/// Whether lower or higher values are better. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum Direction { - Minimize, - Maximize, -} - /// Error during objective measurement. #[derive(Debug, Clone)] pub struct MeasurementError { @@ -39,7 +32,6 @@ impl MeasurementError { pub trait AbstractObjective: Send + Sync { fn name(&self) -> &str; fn collect_measurement(&self) -> Result; - fn direction(&self) -> Direction; fn units(&self) -> Option<&str> { None } @@ -112,14 +104,6 @@ impl Objective { Self::HalsteadBugs(o) => o.units(), } } - - pub fn direction(&self) -> Direction { - match self { - Self::Lloc(o) => o.direction(), - Self::CognitiveComplexity(o) => o.direction(), - Self::HalsteadBugs(o) => o.direction(), - } - } } /// Names of all registered `PerfObjective` benchmarks. @@ -130,13 +114,6 @@ pub fn perf_objective_names() -> &'static [&'static str] { ] } -/// Record of a single optimization attempt for post-hoc analysis. -pub struct OptimizationAttempt { - pub description: String, - pub diff: String, - pub measurements: std::collections::HashMap, - pub invariants_passed: bool, -} #[cfg(test)] mod tests { @@ -145,13 +122,11 @@ mod tests { struct ConstantObjective { label: &'static str, value: f64, - direction: Direction, } impl AbstractObjective for ConstantObjective { fn name(&self) -> &str { self.label } fn collect_measurement(&self) -> Result { Ok(self.value) } - fn direction(&self) -> Direction { self.direction } } #[test] @@ -159,11 +134,9 @@ mod tests { let obj = ConstantObjective { label: "latency", value: 42.0, - direction: Direction::Minimize, }; assert_eq!(obj.name(), "latency"); assert_eq!(obj.collect_measurement().unwrap(), 42.0); - assert_eq!(obj.direction(), Direction::Minimize); } #[test] diff --git a/jolt-eval/src/objective/optimize.rs b/jolt-eval/src/objective/optimize.rs index ffc950743..43ce3fee6 100644 --- a/jolt-eval/src/objective/optimize.rs +++ b/jolt-eval/src/objective/optimize.rs @@ -2,7 +2,35 @@ use std::collections::HashMap; use std::path::Path; use crate::agent::{truncate, AgentHarness, DiffScope}; -use crate::objective::{Direction, OptimizationAttempt}; + +/// A function that combines raw objective measurements into a single +/// scalar value to minimize. +/// +/// The optimizer always minimizes. To maximize something, negate it +/// in your implementation. +pub trait ObjectiveFunction: Send + Sync { + /// Human-readable description of what this function optimizes, + /// included in the agent prompt. + fn description(&self) -> String; + + /// Combine raw measurements into a single scalar to minimize. + fn evaluate(&self, measurements: &HashMap) -> f64; +} + +/// A simple objective function that returns a single named measurement. +pub struct SingleObjective { + pub name: String, +} + +impl ObjectiveFunction for SingleObjective { + fn description(&self) -> String { + format!("Minimize {}", self.name) + } + + fn evaluate(&self, measurements: &HashMap) -> f64 { + measurements.get(&self.name).copied().unwrap_or(f64::INFINITY) + } +} /// Configuration for an optimization run. pub struct OptimizeConfig { @@ -22,56 +50,63 @@ impl Default for OptimizeConfig { /// Result of a complete optimization run. pub struct OptimizeResult { pub attempts: Vec, - pub baseline: HashMap, - pub best: HashMap, + pub baseline_score: f64, + pub best_score: f64, + pub best_measurements: HashMap, +} + +/// Record of a single optimization attempt. +pub struct OptimizationAttempt { + pub description: String, + pub diff: String, + pub measurements: HashMap, + pub score: f64, + pub invariants_passed: bool, } /// Environment trait that decouples the optimization loop from side effects. -/// -/// The real binary implements this with actual measurement, invariant -/// checking, and git operations. Tests supply a mock implementation. pub trait OptimizeEnv { - /// Measure all objectives. Returns name -> value. + /// Measure all raw objectives. Returns name -> value. fn measure(&mut self) -> HashMap; - /// Check all invariants. Returns `true` if they all pass. + /// Check all invariants. Returns `true` if they all pass. fn check_invariants(&mut self) -> bool; - /// Return the direction for each objective (name -> direction). - fn directions(&self) -> HashMap; - /// Apply an agent-produced diff to the working tree. fn apply_diff(&mut self, diff: &str); - /// Called when a change is accepted (measurements improved, invariants passed). + /// Called when a change is accepted. fn accept(&mut self, iteration: usize); - /// Called when a change is rejected (no improvement, or invariants failed). + /// Called when a change is rejected. fn reject(&mut self); } /// Run an AI-driven optimization loop. /// -/// Each iteration: -/// 1. Builds a prompt from objective directions, current best measurements, -/// past attempts, and an optional hint. -/// 2. Invokes the agent via [`AgentHarness`]. -/// 3. If the agent produced a diff, applies it via [`OptimizeEnv::apply_diff`]. -/// 4. Re-measures objectives and checks invariants. -/// 5. Accepts or rejects the change. +/// The agent tries to minimize `objective.evaluate(measurements)`. +/// Each iteration: invoke agent, apply diff, re-measure, accept/reject. pub fn auto_optimize( agent: &A, env: &mut E, + objective: &dyn ObjectiveFunction, config: &OptimizeConfig, repo_dir: &Path, ) -> OptimizeResult { - let directions = env.directions(); let baseline = env.measure(); - let mut best = baseline.clone(); + let baseline_score = objective.evaluate(&baseline); + let mut best_score = baseline_score; + let mut best_measurements = baseline.clone(); let mut attempts = Vec::new(); for iteration in 0..config.num_iterations { - let prompt = build_optimize_prompt(&directions, &best, &attempts, config.hint.as_deref()); + let prompt = build_optimize_prompt( + objective, + best_score, + &best_measurements, + &attempts, + config.hint.as_deref(), + ); let diff_scope = DiffScope::Exclude(vec!["jolt-eval/".into()]); let response = match agent.invoke(repo_dir, &prompt, &diff_scope) { @@ -94,38 +129,27 @@ pub fn auto_optimize( }; let new_measurements = env.measure(); + let new_score = objective.evaluate(&new_measurements); let invariants_passed = env.check_invariants(); if !invariants_passed { env.reject(); } - let improved = if invariants_passed { - directions.iter().any(|(name, dir)| { - let old = best.get(name); - let new = new_measurements.get(name); - match (old, new) { - (Some(&o), Some(&n)) => match dir { - Direction::Minimize => n < o, - Direction::Maximize => n > o, - }, - _ => false, - } - }) - } else { - false - }; + let improved = invariants_passed && new_score < best_score; let attempt = OptimizationAttempt { description: format!("iteration {}", iteration + 1), diff: truncate(&diff_text, 5000).to_string(), measurements: new_measurements.clone(), + score: new_score, invariants_passed, }; attempts.push(attempt); if improved { - best = new_measurements; + best_score = new_score; + best_measurements = new_measurements; env.accept(iteration + 1); } else if invariants_passed { env.reject(); @@ -134,14 +158,16 @@ pub fn auto_optimize( OptimizeResult { attempts, - baseline, - best, + baseline_score, + best_score, + best_measurements, } } fn build_optimize_prompt( - directions: &HashMap, - current_best: &HashMap, + objective: &dyn ObjectiveFunction, + current_best_score: f64, + current_best_measurements: &HashMap, past_attempts: &[OptimizationAttempt], hint: Option<&str>, ) -> String { @@ -149,24 +175,21 @@ fn build_optimize_prompt( prompt.push_str( "You are an expert performance engineer optimizing a zkVM (Jolt). \ - Your goal is to make code changes that improve the following objectives.\n\n", + Your goal is to make code changes that MINIMIZE the objective function.\n\n", ); - prompt.push_str("## Objectives to optimize\n\n"); - let mut names: Vec<_> = directions.keys().collect(); + prompt.push_str("## Objective function\n\n"); + prompt.push_str(&objective.description()); + prompt.push_str(&format!( + "\n\nCurrent best score: {current_best_score:.6}\n\n" + )); + + prompt.push_str("## Current measurements\n\n"); + let mut names: Vec<_> = current_best_measurements.keys().collect(); names.sort(); for name in &names { - let dir = match directions[*name] { - Direction::Minimize => "lower is better", - Direction::Maximize => "higher is better", - }; - let current = current_best - .get(*name) - .map(|v| format!("{v:.4}")) - .unwrap_or_else(|| "unknown".to_string()); - prompt.push_str(&format!( - "- **{name}**: current = {current}, direction = {dir}\n", - )); + let val = current_best_measurements[*name]; + prompt.push_str(&format!("- **{name}**: {val:.6}\n")); } prompt.push('\n'); @@ -174,12 +197,12 @@ fn build_optimize_prompt( "## Instructions\n\n\ 1. Read the relevant source code (especially `jolt-core/src/`) to understand \ hot paths and potential optimization opportunities.\n\ - 2. Make targeted code changes that you believe will improve the objectives.\n\ + 2. Make targeted code changes that you believe will reduce the objective function.\n\ 3. Focus on changes to `jolt-core/` -- do NOT modify `jolt-eval/`.\n\ 4. Prefer changes that are safe, correct, and unlikely to break invariants.\n\ 5. Run `cargo clippy -p jolt-core --features host --message-format=short -q` \ to verify your changes compile.\n\ - 6. Summarize what you changed and why you expect it to improve the objectives.\n\n", + 6. Summarize what you changed and why you expect improvement.\n\n", ); if let Some(h) = hint { @@ -196,12 +219,15 @@ fn build_optimize_prompt( } else { "INVARIANTS FAILED" }; - prompt.push_str(&format!("- **{}** ({}): ", attempt.description, status)); + prompt.push_str(&format!( + "- **{}** ({}, score={:.6}): ", + attempt.description, status, attempt.score + )); let mut keys: Vec<_> = attempt.measurements.keys().collect(); keys.sort(); for name in keys { let val = attempt.measurements[name]; - prompt.push_str(&format!("{name}={val:.4} ")); + prompt.push_str(&format!("{name}={val:.6} ")); } prompt.push('\n'); } @@ -212,7 +238,7 @@ fn build_optimize_prompt( "## Output\n\n\ Make your code changes directly. After you're done, summarize:\n\ - What you changed\n\ - - Why you expect improvement\n\ + - Why you expect the objective function to decrease\n\ - Any risks or trade-offs\n", ); From 9c6c756b2608376076bfc8c510294793faed729e Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Fri, 3 Apr 2026 20:22:49 -0400 Subject: [PATCH 52/86] refactor(jolt-eval): unify objective traits, type-safe ObjectiveFunction struct - Unify AbstractObjective + PerfObjective into single Objective trait with collect_measurement() and run() as separate default-impl methods - Replace Objective enum with three enums: StaticAnalysisObjective, PerformanceObjective, and OptimizationObjective (union of both) - Discriminant-based Hash/Eq on enums for use as HashMap keys - Replace ObjectiveFunction trait with const-constructible struct containing name, inputs (&'static [OptimizationObjective]), and evaluate fn pointer - Declare concrete ObjectiveFunctions as consts (MINIMIZE_LLOC, etc.) with registry via ObjectiveFunction::all() and by_name() - Use &'static str instead of PathBuf in objective structs to enable const construction; resolve to PathBuf at measurement time - Add --list flag to optimize binary for discovering objective functions - Remove description() from ObjectiveFunction; prompt references the implementation source directly Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/bin/measure_objectives.rs | 49 +-- jolt-eval/bin/optimize.rs | 94 +++-- jolt-eval/src/agent/tests.rs | 387 ++++++++++++------ jolt-eval/src/lib.rs | 6 +- .../src/objective/code_quality/cognitive.rs | 41 +- .../objective/code_quality/halstead_bugs.rs | 39 +- jolt-eval/src/objective/code_quality/lloc.rs | 27 +- jolt-eval/src/objective/mod.rs | 239 +++++++++-- jolt-eval/src/objective/objective_fn/mod.rs | 124 ++++++ jolt-eval/src/objective/optimize.rs | 85 ++-- .../src/objective/performance/binding.rs | 31 +- .../src/objective/performance/prover_time.rs | 8 +- jolt-eval/src/objective/synthesis.rs | 6 +- 13 files changed, 788 insertions(+), 348 deletions(-) create mode 100644 jolt-eval/src/objective/objective_fn/mod.rs diff --git a/jolt-eval/bin/measure_objectives.rs b/jolt-eval/bin/measure_objectives.rs index f85e61086..9e32ce862 100644 --- a/jolt-eval/bin/measure_objectives.rs +++ b/jolt-eval/bin/measure_objectives.rs @@ -2,7 +2,7 @@ use std::path::Path; use clap::Parser; -use jolt_eval::objective::{perf_objective_names, Objective}; +use jolt_eval::objective::{PerformanceObjective, StaticAnalysisObjective}; #[derive(Parser)] #[command(name = "measure-objectives")] @@ -18,10 +18,7 @@ struct Cli { } fn print_header() { - println!( - "{:<35} {:>15} {:>8}", - "Objective", "Value", "Units" - ); + println!("{:<35} {:>15} {:>8}", "Objective", "Value", "Units"); println!("{}", "-".repeat(60)); } @@ -37,23 +34,31 @@ fn main() -> eyre::Result<()> { // Performance objectives (from Criterion) if !cli.no_bench { - let perf_names = perf_objective_names(); + let perf = PerformanceObjective::all(); let run_bench = cli .objective .as_ref() - .is_none_or(|name| perf_names.contains(&name.as_str())); + .is_none_or(|name| perf.iter().any(|p| p.name() == name.as_str())); if run_bench { eprintln!("Running Criterion benchmarks..."); let mut any_succeeded = false; - for &name in perf_names { + for p in &perf { if let Some(ref filter) = cli.objective { - if name != filter.as_str() { + if p.name() != filter.as_str() { continue; } } let status = std::process::Command::new("cargo") - .args(["bench", "-p", "jolt-eval", "--bench", name, "--", "--quick"]) + .args([ + "bench", + "-p", + "jolt-eval", + "--bench", + p.name(), + "--", + "--quick", + ]) .status(); if matches!(status, Ok(s) if s.success()) { any_succeeded = true; @@ -63,16 +68,16 @@ fn main() -> eyre::Result<()> { if any_succeeded { println!(); print_header(); - for &name in perf_names { + for p in &perf { if let Some(ref filter) = cli.objective { - if name != filter.as_str() { + if p.name() != filter.as_str() { continue; } } - match read_criterion_estimate(name) { - Some(secs) => print_row(name, secs, "s"), + match read_criterion_estimate(p.name()) { + Some(secs) => print_row(p.name(), secs, "s"), None => { - println!("{:<35} {:>15}", name, "NO DATA"); + println!("{:<35} {:>15}", p.name(), "NO DATA"); } } } @@ -84,20 +89,19 @@ fn main() -> eyre::Result<()> { } // Static-analysis objectives - let objectives = Objective::all(&repo_root); - for obj in &objectives { + for sa in StaticAnalysisObjective::all(&repo_root) { if let Some(ref name) = cli.objective { - if obj.name() != name.as_str() { + if sa.name() != name.as_str() { continue; } } - match obj.collect_measurement() { + match sa.collect_measurement() { Ok(val) => { - let units = obj.units().unwrap_or("-"); - print_row(obj.name(), val, units); + let units = sa.units().unwrap_or("-"); + print_row(sa.name(), val, units); } Err(e) => { - println!("{:<35} {:>15}", obj.name(), format!("ERROR: {e}")); + println!("{:<35} {:>15}", sa.name(), format!("ERROR: {e}")); } } } @@ -105,7 +109,6 @@ fn main() -> eyre::Result<()> { Ok(()) } -/// Read the point estimate (mean, in seconds) from Criterion's output. fn read_criterion_estimate(bench_name: &str) -> Option { let path = Path::new("target/criterion") .join(bench_name) diff --git a/jolt-eval/bin/optimize.rs b/jolt-eval/bin/optimize.rs index bc7eb2242..4591e9002 100644 --- a/jolt-eval/bin/optimize.rs +++ b/jolt-eval/bin/optimize.rs @@ -6,19 +6,22 @@ use clap::Parser; use jolt_eval::agent::ClaudeCodeAgent; use jolt_eval::invariant::JoltInvariants; -use jolt_eval::objective::optimize::{ - auto_optimize, ObjectiveFunction, OptimizeConfig, OptimizeEnv, SingleObjective, -}; -use jolt_eval::objective::{perf_objective_names, Objective}; +use jolt_eval::objective::objective_fn::ObjectiveFunction; +use jolt_eval::objective::optimize::{auto_optimize, OptimizeConfig, OptimizeEnv}; +use jolt_eval::objective::{OptimizationObjective, PerformanceObjective, StaticAnalysisObjective}; #[derive(Parser)] #[command(name = "optimize")] #[command(about = "AI-driven optimization of Jolt codebase objectives")] struct Cli { - /// Objective to minimize (e.g. "lloc", "prover_time_fibonacci_100"). - /// Default: all measurements are taken but you must specify which to optimize. + /// Objective function to minimize. + /// Run with --list to see available functions. #[arg(long)] - objective: String, + objective: Option, + + /// List all available objective functions and exit. + #[arg(long)] + list: bool, /// Number of optimization iterations #[arg(long, default_value = "5")] @@ -38,25 +41,23 @@ struct Cli { } struct RealEnv { - objectives: Vec, - invariants: Vec, repo_dir: std::path::PathBuf, + invariants: Vec, bench_perf: bool, } impl OptimizeEnv for RealEnv { - fn measure(&mut self) -> HashMap { - let mut results: HashMap = self - .objectives - .iter() - .filter_map(|o| { - let name = o.name().to_string(); - o.collect_measurement().ok().map(|v| (name, v)) - }) - .collect(); + fn measure(&mut self) -> HashMap { + let mut results = HashMap::new(); + + for sa in StaticAnalysisObjective::all(&self.repo_dir) { + if let Ok(v) = sa.collect_measurement() { + results.insert(OptimizationObjective::StaticAnalysis(sa), v); + } + } if self.bench_perf { - for &name in perf_objective_names() { + for p in PerformanceObjective::all() { let status = Command::new("cargo") .current_dir(&self.repo_dir) .args([ @@ -64,7 +65,7 @@ impl OptimizeEnv for RealEnv { "-p", "jolt-eval", "--bench", - name, + p.name(), "--", "--quick", "--save-baseline", @@ -73,8 +74,8 @@ impl OptimizeEnv for RealEnv { .status(); if matches!(status, Ok(s) if s.success()) { - if let Some(secs) = read_criterion_estimate(name) { - results.insert(name.to_string(), secs); + if let Some(secs) = read_criterion_estimate(p.name()) { + results.insert(OptimizationObjective::Performance(p), secs); } } } @@ -122,29 +123,44 @@ fn main() -> eyre::Result<()> { tracing_subscriber::fmt::init(); let cli = Cli::parse(); - let repo_dir = std::env::current_dir()?; - let objectives = Objective::all(&repo_dir); + if cli.list { + println!("Available objective functions:\n"); + for f in ObjectiveFunction::all() { + let inputs: Vec<_> = f.inputs.iter().map(|i| i.name().to_string()).collect(); + println!(" {:<35} inputs: {}", f.name, inputs.join(", ")); + } + return Ok(()); + } - let bench_perf = perf_objective_names().contains(&cli.objective.as_str()); + let objective_name = cli + .objective + .as_deref() + .expect("--objective is required (use --list to see options)"); - let invariants = JoltInvariants::all(); + let objective = ObjectiveFunction::by_name(objective_name).unwrap_or_else(|| { + eprintln!("Unknown objective function: {objective_name}"); + eprintln!("Available:"); + for f in ObjectiveFunction::all() { + eprintln!(" {}", f.name); + } + std::process::exit(1); + }); - let objective_fn = SingleObjective { - name: cli.objective.clone(), - }; + let repo_dir = std::env::current_dir()?; + let bench_perf = objective.inputs.iter().any(|i| i.is_perf()); + let invariants = JoltInvariants::all(); let mut env = RealEnv { - objectives, - invariants, repo_dir: repo_dir.clone(), + invariants, bench_perf, }; println!("=== Baseline ==="); let baseline = env.measure(); - let baseline_score = objective_fn.evaluate(&baseline); + let baseline_score = (objective.evaluate)(&baseline); print_measurements(&baseline); - println!("Objective: {} = {:.6}\n", cli.objective, baseline_score); + println!("Objective: {} = {:.6}\n", objective.name, baseline_score); let agent = ClaudeCodeAgent::new(&cli.model, cli.max_turns); let config = OptimizeConfig { @@ -152,7 +168,7 @@ fn main() -> eyre::Result<()> { hint: cli.hint.clone(), }; - let result = auto_optimize(&agent, &mut env, &objective_fn, &config, &repo_dir); + let result = auto_optimize(&agent, &mut env, objective, &config, &repo_dir); println!("=== Summary ==="); println!( @@ -174,11 +190,11 @@ fn main() -> eyre::Result<()> { Ok(()) } -fn print_measurements(measurements: &HashMap) { - let mut names: Vec<_> = measurements.keys().collect(); - names.sort(); - for name in names { - println!(" {:<35} {:>15.6}", name, measurements[name]); +fn print_measurements(measurements: &HashMap) { + let mut entries: Vec<_> = measurements.iter().collect(); + entries.sort_by_key(|(k, _)| k.name()); + for (key, val) in entries { + println!(" {:<35} {:>15.6}", key.name(), val); } } diff --git a/jolt-eval/src/agent/tests.rs b/jolt-eval/src/agent/tests.rs index ea9464b6f..3f3dc1b44 100644 --- a/jolt-eval/src/agent/tests.rs +++ b/jolt-eval/src/agent/tests.rs @@ -8,15 +8,14 @@ use crate::invariant::synthesis::redteam::{auto_redteam, RedTeamConfig, RedTeamR use crate::invariant::{ CheckError, Invariant, InvariantTargets, InvariantViolation, SynthesisTarget, }; -use crate::objective::optimize::{ - auto_optimize, ObjectiveFunction, OptimizeConfig, OptimizeEnv, SingleObjective, -}; +use crate::objective::objective_fn::ObjectiveFunction; +use crate::objective::optimize::{auto_optimize, OptimizeConfig, OptimizeEnv}; +use crate::objective::{OptimizationObjective, HALSTEAD_BUGS, LLOC}; // ========================================================================= // Test invariants // ========================================================================= -/// Always passes -- the red-team loop should never find a violation. struct AlwaysPassInvariant; impl InvariantTargets for AlwaysPassInvariant { fn targets(&self) -> EnumSet { @@ -41,7 +40,6 @@ impl Invariant for AlwaysPassInvariant { } } -/// Always fails -- the red-team loop should find a violation immediately. struct AlwaysFailInvariant; impl InvariantTargets for AlwaysFailInvariant { fn targets(&self) -> EnumSet { @@ -68,7 +66,6 @@ impl Invariant for AlwaysFailInvariant { } } -/// Fails only when the input is 0 -- tests that fuzz inputs can trigger it. struct FailsOnZeroInvariant; impl InvariantTargets for FailsOnZeroInvariant { fn targets(&self) -> EnumSet { @@ -95,7 +92,7 @@ impl Invariant for FailsOnZeroInvariant { } } fn seed_corpus(&self) -> Vec { - vec![1, 2, 3] // seed corpus avoids 0 + vec![1, 2, 3] } } @@ -106,7 +103,9 @@ impl Invariant for FailsOnZeroInvariant { #[test] fn mock_always_ok_returns_text() { let agent = MockAgent::always_ok("hello world"); - let resp = agent.invoke(Path::new("/tmp"), "test prompt", &DiffScope::All).unwrap(); + let resp = agent + .invoke(Path::new("/tmp"), "test prompt", &DiffScope::All) + .unwrap(); assert_eq!(resp.text, "hello world"); assert!(resp.diff.is_none()); } @@ -114,16 +113,24 @@ fn mock_always_ok_returns_text() { #[test] fn mock_always_err_returns_error() { let agent = MockAgent::always_err("boom"); - let err = agent.invoke(Path::new("/tmp"), "test", &DiffScope::All).unwrap_err(); + let err = agent + .invoke(Path::new("/tmp"), "test", &DiffScope::All) + .unwrap_err(); assert_eq!(err.message, "boom"); } #[test] fn mock_records_prompts() { let agent = MockAgent::always_ok("ok"); - agent.invoke(Path::new("/tmp"), "prompt 1", &DiffScope::All).unwrap(); - agent.invoke(Path::new("/tmp"), "prompt 2", &DiffScope::All).unwrap(); - agent.invoke(Path::new("/tmp"), "prompt 3", &DiffScope::All).unwrap(); + agent + .invoke(Path::new("/tmp"), "prompt 1", &DiffScope::All) + .unwrap(); + agent + .invoke(Path::new("/tmp"), "prompt 2", &DiffScope::All) + .unwrap(); + agent + .invoke(Path::new("/tmp"), "prompt 3", &DiffScope::All) + .unwrap(); let prompts = agent.recorded_prompts(); assert_eq!(prompts.len(), 3); @@ -136,7 +143,9 @@ fn mock_records_prompts() { fn mock_always_ok_repeats_indefinitely() { let agent = MockAgent::always_ok("same"); for _ in 0..100 { - let resp = agent.invoke(Path::new("/tmp"), "x", &DiffScope::All).unwrap(); + let resp = agent + .invoke(Path::new("/tmp"), "x", &DiffScope::All) + .unwrap(); assert_eq!(resp.text, "same"); } } @@ -145,7 +154,9 @@ fn mock_always_ok_repeats_indefinitely() { fn mock_always_err_repeats_indefinitely() { let agent = MockAgent::always_err("fail"); for _ in 0..100 { - let err = agent.invoke(Path::new("/tmp"), "x", &DiffScope::All).unwrap_err(); + let err = agent + .invoke(Path::new("/tmp"), "x", &DiffScope::All) + .unwrap_err(); assert_eq!(err.message, "fail"); } } @@ -164,15 +175,21 @@ fn mock_from_responses_returns_in_order() { Err(AgentError::new("third fails")), ]); - let r1 = agent.invoke(Path::new("/tmp"), "a", &DiffScope::All).unwrap(); + let r1 = agent + .invoke(Path::new("/tmp"), "a", &DiffScope::All) + .unwrap(); assert_eq!(r1.text, "first"); assert!(r1.diff.is_none()); - let r2 = agent.invoke(Path::new("/tmp"), "b", &DiffScope::All).unwrap(); + let r2 = agent + .invoke(Path::new("/tmp"), "b", &DiffScope::All) + .unwrap(); assert_eq!(r2.text, "second"); assert_eq!(r2.diff.as_deref(), Some("diff")); - let r3 = agent.invoke(Path::new("/tmp"), "c", &DiffScope::All).unwrap_err(); + let r3 = agent + .invoke(Path::new("/tmp"), "c", &DiffScope::All) + .unwrap_err(); assert_eq!(r3.message, "third fails"); } @@ -189,11 +206,16 @@ fn mock_from_responses_last_entry_repeats() { }), ]); - agent.invoke(Path::new("/tmp"), "a", &DiffScope::All).unwrap(); - let r2 = agent.invoke(Path::new("/tmp"), "b", &DiffScope::All).unwrap(); + agent + .invoke(Path::new("/tmp"), "a", &DiffScope::All) + .unwrap(); + let r2 = agent + .invoke(Path::new("/tmp"), "b", &DiffScope::All) + .unwrap(); assert_eq!(r2.text, "last"); - // After exhausting queue, last response repeats - let r3 = agent.invoke(Path::new("/tmp"), "c", &DiffScope::All).unwrap(); + let r3 = agent + .invoke(Path::new("/tmp"), "c", &DiffScope::All) + .unwrap(); assert_eq!(r3.text, "last"); } @@ -204,7 +226,9 @@ fn mock_with_diff() { diff: Some("--- a/foo\n+++ b/foo\n@@ ...\n-old\n+new".into()), })]); - let resp = agent.invoke(Path::new("/tmp"), "optimize", &DiffScope::All).unwrap(); + let resp = agent + .invoke(Path::new("/tmp"), "optimize", &DiffScope::All) + .unwrap(); assert!(resp.diff.is_some()); assert!(resp.diff.unwrap().contains("+new")); } @@ -213,7 +237,6 @@ fn mock_with_diff() { // auto_redteam tests with MockAgent // ========================================================================= -/// Helper: build a structured envelope response string. fn envelope(analysis: &str, counterexample: impl serde::Serialize) -> String { serde_json::json!({ "analysis": analysis, @@ -226,7 +249,10 @@ fn envelope(analysis: &str, counterexample: impl serde::Serialize) -> String { fn redteam_no_violation_when_invariant_always_passes() { let invariant = AlwaysPassInvariant; let agent = MockAgent::always_ok(&envelope("I analyzed the code.", 42)); - let config = RedTeamConfig { num_iterations: 3, ..Default::default() }; + let config = RedTeamConfig { + num_iterations: 3, + ..Default::default() + }; let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); @@ -247,10 +273,12 @@ fn redteam_no_violation_when_invariant_always_passes() { #[test] fn redteam_finds_violation_with_structured_response() { - // AlwaysFailInvariant rejects every input. let invariant = AlwaysFailInvariant; let agent = MockAgent::always_ok(&envelope("I found a bug!", 99)); - let config = RedTeamConfig { num_iterations: 10, ..Default::default() }; + let config = RedTeamConfig { + num_iterations: 10, + ..Default::default() + }; let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); @@ -271,10 +299,12 @@ fn redteam_finds_violation_with_structured_response() { #[test] fn redteam_finds_violation_with_targeted_input() { - // FailsOnZeroInvariant only fails for input 0. let invariant = FailsOnZeroInvariant; let agent = MockAgent::always_ok(&envelope("Try zero", 0)); - let config = RedTeamConfig { num_iterations: 5, ..Default::default() }; + let config = RedTeamConfig { + num_iterations: 5, + ..Default::default() + }; let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); @@ -295,7 +325,10 @@ fn redteam_finds_violation_with_targeted_input() { fn redteam_no_violation_when_agent_misses() { let invariant = FailsOnZeroInvariant; let agent = MockAgent::always_ok(&envelope("Trying 1", 1)); - let config = RedTeamConfig { num_iterations: 2, ..Default::default() }; + let config = RedTeamConfig { + num_iterations: 2, + ..Default::default() + }; let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); @@ -314,7 +347,10 @@ fn redteam_no_violation_when_agent_misses() { fn redteam_handles_agent_errors_gracefully() { let invariant = AlwaysPassInvariant; let agent = MockAgent::always_err("network timeout"); - let config = RedTeamConfig { num_iterations: 3, ..Default::default() }; + let config = RedTeamConfig { + num_iterations: 3, + ..Default::default() + }; let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); @@ -334,10 +370,12 @@ fn redteam_handles_agent_errors_gracefully() { #[test] fn redteam_handles_no_json_in_response() { - // Agent returns plain text (no envelope, no code block) let invariant = AlwaysPassInvariant; let agent = MockAgent::always_ok("I looked around but have no candidate to offer."); - let config = RedTeamConfig { num_iterations: 1, ..Default::default() }; + let config = RedTeamConfig { + num_iterations: 1, + ..Default::default() + }; let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); @@ -354,10 +392,12 @@ fn redteam_handles_no_json_in_response() { #[test] fn redteam_handles_invalid_counterexample_type() { - // Structured envelope with wrong counterexample type for Input=u8 let invariant = AlwaysPassInvariant; let agent = MockAgent::always_ok(&envelope("Here", "not_a_number")); - let config = RedTeamConfig { num_iterations: 1, ..Default::default() }; + let config = RedTeamConfig { + num_iterations: 1, + ..Default::default() + }; let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); @@ -374,10 +414,12 @@ fn redteam_handles_invalid_counterexample_type() { #[test] fn redteam_fallback_extracts_json_from_freeform_text() { - // Agent doesn't return structured envelope, but has a code block let invariant = AlwaysFailInvariant; let agent = MockAgent::always_ok("Found it!\n```json\n77\n```"); - let config = RedTeamConfig { num_iterations: 1, ..Default::default() }; + let config = RedTeamConfig { + num_iterations: 1, + ..Default::default() + }; let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); @@ -393,7 +435,10 @@ fn redteam_fallback_extracts_json_from_freeform_text() { fn redteam_prompt_includes_invariant_description() { let invariant = AlwaysPassInvariant; let agent = MockAgent::always_ok(&envelope("ok", 0)); - let config = RedTeamConfig { num_iterations: 1, ..Default::default() }; + let config = RedTeamConfig { + num_iterations: 1, + ..Default::default() + }; auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); @@ -407,7 +452,10 @@ fn redteam_prompt_includes_invariant_description() { fn redteam_prompt_includes_input_example() { let invariant = AlwaysPassInvariant; let agent = MockAgent::always_ok(&envelope("ok", 0)); - let config = RedTeamConfig { num_iterations: 1, ..Default::default() }; + let config = RedTeamConfig { + num_iterations: 1, + ..Default::default() + }; auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); @@ -420,7 +468,10 @@ fn redteam_prompt_includes_input_example() { fn redteam_prompt_includes_failed_attempts_after_first_iteration() { let invariant = AlwaysPassInvariant; let agent = MockAgent::always_ok(&envelope("Tried something", 42)); - let config = RedTeamConfig { num_iterations: 3, ..Default::default() }; + let config = RedTeamConfig { + num_iterations: 3, + ..Default::default() + }; auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); @@ -437,7 +488,10 @@ fn redteam_prompt_includes_failed_attempts_after_first_iteration() { fn redteam_zero_iterations_returns_immediately() { let invariant = AlwaysPassInvariant; let agent = MockAgent::always_ok("should not be called"); - let config = RedTeamConfig { num_iterations: 0, ..Default::default() }; + let config = RedTeamConfig { + num_iterations: 0, + ..Default::default() + }; let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); @@ -465,7 +519,10 @@ fn redteam_mixed_agent_responses() { diff: None, }), ]); - let config = RedTeamConfig { num_iterations: 3, ..Default::default() }; + let config = RedTeamConfig { + num_iterations: 3, + ..Default::default() + }; let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); @@ -486,9 +543,10 @@ fn redteam_mixed_agent_responses() { #[test] fn agent_harness_is_object_safe() { - // Verify we can use AgentHarness as a trait object let agent: Box = Box::new(MockAgent::always_ok("hi")); - let resp = agent.invoke(Path::new("/tmp"), "hello", &DiffScope::All).unwrap(); + let resp = agent + .invoke(Path::new("/tmp"), "hello", &DiffScope::All) + .unwrap(); assert_eq!(resp.text, "hi"); } @@ -496,12 +554,12 @@ fn agent_harness_is_object_safe() { fn agent_harness_works_with_arc() { use std::sync::Arc; let agent: Arc = Arc::new(MockAgent::always_ok("shared")); - let resp = agent.invoke(Path::new("/tmp"), "test", &DiffScope::All).unwrap(); + let resp = agent + .invoke(Path::new("/tmp"), "test", &DiffScope::All) + .unwrap(); assert_eq!(resp.text, "shared"); } -/// A custom multi-agent harness that fans out to N agents and returns the -/// first successful response. Demonstrates the trait's extensibility. struct FirstSuccessHarness { agents: Vec>, } @@ -532,7 +590,9 @@ fn custom_multi_agent_harness() { ], }; - let resp = harness.invoke(Path::new("/tmp"), "test", &DiffScope::All).unwrap(); + let resp = harness + .invoke(Path::new("/tmp"), "test", &DiffScope::All) + .unwrap(); assert_eq!(resp.text, "agent 3 succeeded"); } @@ -545,7 +605,9 @@ fn custom_multi_agent_all_fail() { ], }; - let err = harness.invoke(Path::new("/tmp"), "test", &DiffScope::All).unwrap_err(); + let err = harness + .invoke(Path::new("/tmp"), "test", &DiffScope::All) + .unwrap_err(); assert_eq!(err.message, "All agents failed"); } @@ -559,14 +621,16 @@ fn custom_harness_plugs_into_auto_redteam() { }; let invariant = AlwaysPassInvariant; - let config = RedTeamConfig { num_iterations: 2, ..Default::default() }; + let config = RedTeamConfig { + num_iterations: 2, + ..Default::default() + }; let result = auto_redteam(&invariant, &config, &harness, Path::new("/tmp")); match result { RedTeamResult::NoViolation { attempts } => { assert_eq!(attempts.len(), 2); - // The harness should have used agent 2's response assert!(attempts[0].approach.contains("agent 2 found nothing")); } _ => panic!("Expected NoViolation"), @@ -577,8 +641,16 @@ fn custom_harness_plugs_into_auto_redteam() { // Mock OptimizeEnv // ========================================================================= +fn lloc() -> OptimizationObjective { + LLOC +} + +fn halstead() -> OptimizationObjective { + HALSTEAD_BUGS +} + struct MockOptimizeEnv { - measurements: Vec>, + measurements: Vec>, measure_index: usize, invariants_pass: Vec, invariant_index: usize, @@ -600,7 +672,7 @@ impl MockOptimizeEnv { } } - fn with_measurements(mut self, measurements: Vec>) -> Self { + fn with_measurements(mut self, measurements: Vec>) -> Self { self.measurements = measurements; self } @@ -612,7 +684,7 @@ impl MockOptimizeEnv { } impl OptimizeEnv for MockOptimizeEnv { - fn measure(&mut self) -> HashMap { + fn measure(&mut self) -> HashMap { if self.measurements.is_empty() { return HashMap::new(); } @@ -643,12 +715,24 @@ impl OptimizeEnv for MockOptimizeEnv { } } -fn m(pairs: &[(&str, f64)]) -> HashMap { - pairs.iter().map(|(k, v)| (k.to_string(), *v)).collect() +fn m(pairs: &[(OptimizationObjective, f64)]) -> HashMap { + pairs.iter().cloned().collect() } -fn time_obj() -> SingleObjective { - SingleObjective { name: "time".into() } +fn lloc_obj() -> ObjectiveFunction { + const INPUTS: &[OptimizationObjective] = &[LLOC]; + ObjectiveFunction { + name: "test_lloc", + inputs: INPUTS, + evaluate: |m| m.get(&LLOC).copied().unwrap_or(f64::INFINITY), + } +} + +fn opt_config(iterations: usize) -> OptimizeConfig { + OptimizeConfig { + num_iterations: iterations, + hint: None, + } } // ========================================================================= @@ -662,13 +746,11 @@ fn optimize_accepts_improvement() { diff: Some("fake diff".into()), })]); - let mut env = MockOptimizeEnv::new().with_measurements(vec![ - m(&[("time", 10.0)]), - m(&[("time", 8.0)]), - ]); + let mut env = + MockOptimizeEnv::new().with_measurements(vec![m(&[(lloc(), 10.0)]), m(&[(lloc(), 8.0)])]); - let config = OptimizeConfig { num_iterations: 1, hint: None }; - let obj = time_obj(); + let config = opt_config(1); + let obj = lloc_obj(); let result = auto_optimize(&agent, &mut env, &obj, &config, Path::new("/tmp")); assert_eq!(result.attempts.len(), 1); @@ -685,13 +767,11 @@ fn optimize_rejects_regression() { diff: Some("bad diff".into()), })]); - let mut env = MockOptimizeEnv::new().with_measurements(vec![ - m(&[("time", 10.0)]), - m(&[("time", 12.0)]), - ]); + let mut env = + MockOptimizeEnv::new().with_measurements(vec![m(&[(lloc(), 10.0)]), m(&[(lloc(), 12.0)])]); - let config = OptimizeConfig { num_iterations: 1, hint: None }; - let obj = time_obj(); + let config = opt_config(1); + let obj = lloc_obj(); let result = auto_optimize(&agent, &mut env, &obj, &config, Path::new("/tmp")); assert_eq!(result.attempts.len(), 1); @@ -708,14 +788,11 @@ fn optimize_rejects_when_invariants_fail() { })]); let mut env = MockOptimizeEnv::new() - .with_measurements(vec![ - m(&[("time", 10.0)]), - m(&[("time", 5.0)]), - ]) + .with_measurements(vec![m(&[(lloc(), 10.0)]), m(&[(lloc(), 5.0)])]) .with_invariants(vec![false]); - let config = OptimizeConfig { num_iterations: 1, hint: None }; - let obj = time_obj(); + let config = opt_config(1); + let obj = lloc_obj(); let result = auto_optimize(&agent, &mut env, &obj, &config, Path::new("/tmp")); assert!(!result.attempts[0].invariants_passed); @@ -726,14 +803,12 @@ fn optimize_rejects_when_invariants_fail() { #[test] fn optimize_custom_objective_function() { - // Weighted sum: 2*time + size. Agent improves time but regresses size. - struct WeightedSum; - impl ObjectiveFunction for WeightedSum { - fn description(&self) -> String { "Minimize 2*time + size".into() } - fn evaluate(&self, m: &HashMap) -> f64 { - 2.0 * m.get("time").unwrap_or(&0.0) + m.get("size").unwrap_or(&0.0) - } - } + const INPUTS: &[OptimizationObjective] = &[LLOC, HALSTEAD_BUGS]; + let weighted = ObjectiveFunction { + name: "weighted", + inputs: INPUTS, + evaluate: |m| 2.0 * m.get(&LLOC).unwrap_or(&0.0) + m.get(&HALSTEAD_BUGS).unwrap_or(&0.0), + }; let agent = MockAgent::from_responses(vec![Ok(AgentResponse { text: "optimized".into(), @@ -741,15 +816,13 @@ fn optimize_custom_objective_function() { })]); let mut env = MockOptimizeEnv::new().with_measurements(vec![ - m(&[("time", 10.0), ("size", 100.0)]), // score = 120 - m(&[("time", 8.0), ("size", 110.0)]), // score = 126 (regression!) + m(&[(lloc(), 10.0), (halstead(), 100.0)]), // score = 120 + m(&[(lloc(), 8.0), (halstead(), 110.0)]), // score = 126 (regression!) ]); - let config = OptimizeConfig { num_iterations: 1, hint: None }; - let obj = WeightedSum; - let result = auto_optimize(&agent, &mut env, &obj, &config, Path::new("/tmp")); + let config = opt_config(1); + let result = auto_optimize(&agent, &mut env, &weighted, &config, Path::new("/tmp")); - // Rejected because 126 > 120 assert_eq!(result.best_score, 120.0); assert!(env.accepted.is_empty()); assert_eq!(env.rejected, 1); @@ -758,20 +831,29 @@ fn optimize_custom_objective_function() { #[test] fn optimize_multi_iteration_progressive_improvement() { let agent = MockAgent::from_responses(vec![ - Ok(AgentResponse { text: "iter 1".into(), diff: Some("diff1".into()) }), - Ok(AgentResponse { text: "iter 2".into(), diff: Some("diff2".into()) }), - Ok(AgentResponse { text: "iter 3".into(), diff: Some("diff3".into()) }), + Ok(AgentResponse { + text: "iter 1".into(), + diff: Some("diff1".into()), + }), + Ok(AgentResponse { + text: "iter 2".into(), + diff: Some("diff2".into()), + }), + Ok(AgentResponse { + text: "iter 3".into(), + diff: Some("diff3".into()), + }), ]); let mut env = MockOptimizeEnv::new().with_measurements(vec![ - m(&[("time", 10.0)]), - m(&[("time", 8.0)]), - m(&[("time", 9.0)]), - m(&[("time", 6.0)]), + m(&[(lloc(), 10.0)]), + m(&[(lloc(), 8.0)]), + m(&[(lloc(), 9.0)]), + m(&[(lloc(), 6.0)]), ]); - let config = OptimizeConfig { num_iterations: 3, hint: None }; - let obj = time_obj(); + let config = opt_config(3); + let obj = lloc_obj(); let result = auto_optimize(&agent, &mut env, &obj, &config, Path::new("/tmp")); assert_eq!(result.attempts.len(), 3); @@ -783,15 +865,21 @@ fn optimize_multi_iteration_progressive_improvement() { #[test] fn optimize_stops_when_agent_produces_no_diff() { let agent = MockAgent::from_responses(vec![ - Ok(AgentResponse { text: "changed".into(), diff: Some("diff1".into()) }), - Ok(AgentResponse { text: "nothing else".into(), diff: None }), + Ok(AgentResponse { + text: "changed".into(), + diff: Some("diff1".into()), + }), + Ok(AgentResponse { + text: "nothing else".into(), + diff: None, + }), ]); - let mut env = MockOptimizeEnv::new() - .with_measurements(vec![m(&[("time", 10.0)]), m(&[("time", 9.0)])]); + let mut env = + MockOptimizeEnv::new().with_measurements(vec![m(&[(lloc(), 10.0)]), m(&[(lloc(), 9.0)])]); - let config = OptimizeConfig { num_iterations: 5, hint: None }; - let obj = time_obj(); + let config = opt_config(5); + let obj = lloc_obj(); let result = auto_optimize(&agent, &mut env, &obj, &config, Path::new("/tmp")); assert_eq!(result.attempts.len(), 1); @@ -800,15 +888,18 @@ fn optimize_stops_when_agent_produces_no_diff() { #[test] fn optimize_stops_when_agent_errors() { let agent = MockAgent::from_responses(vec![ - Ok(AgentResponse { text: "change".into(), diff: Some("diff".into()) }), + Ok(AgentResponse { + text: "change".into(), + diff: Some("diff".into()), + }), Err(AgentError::new("agent crashed")), ]); - let mut env = MockOptimizeEnv::new() - .with_measurements(vec![m(&[("time", 10.0)]), m(&[("time", 10.0)])]); + let mut env = + MockOptimizeEnv::new().with_measurements(vec![m(&[(lloc(), 10.0)]), m(&[(lloc(), 10.0)])]); - let config = OptimizeConfig { num_iterations: 5, hint: None }; - let obj = time_obj(); + let config = opt_config(5); + let obj = lloc_obj(); let result = auto_optimize(&agent, &mut env, &obj, &config, Path::new("/tmp")); assert_eq!(result.attempts.len(), 1); @@ -817,11 +908,10 @@ fn optimize_stops_when_agent_errors() { #[test] fn optimize_zero_iterations() { let agent = MockAgent::always_ok("should not be called"); - let mut env = MockOptimizeEnv::new() - .with_measurements(vec![m(&[("time", 10.0)])]); + let mut env = MockOptimizeEnv::new().with_measurements(vec![m(&[(lloc(), 10.0)])]); - let config = OptimizeConfig { num_iterations: 0, hint: None }; - let obj = time_obj(); + let config = opt_config(0); + let obj = lloc_obj(); let result = auto_optimize(&agent, &mut env, &obj, &config, Path::new("/tmp")); assert!(result.attempts.is_empty()); @@ -837,14 +927,14 @@ fn optimize_prompt_includes_measurements_and_hint() { diff: Some("diff".into()), })]); - let mut env = MockOptimizeEnv::new() - .with_measurements(vec![m(&[("time", 42.0)]), m(&[("time", 42.0)])]); + let mut env = + MockOptimizeEnv::new().with_measurements(vec![m(&[(lloc(), 42.0)]), m(&[(lloc(), 42.0)])]); let config = OptimizeConfig { num_iterations: 1, hint: Some("Focus on the inner loop".into()), }; - let obj = time_obj(); + let obj = lloc_obj(); auto_optimize(&agent, &mut env, &obj, &config, Path::new("/tmp")); let prompts = agent.recorded_prompts(); @@ -856,18 +946,24 @@ fn optimize_prompt_includes_measurements_and_hint() { #[test] fn optimize_prompt_includes_past_attempts() { let agent = MockAgent::from_responses(vec![ - Ok(AgentResponse { text: "attempt 1".into(), diff: Some("d1".into()) }), - Ok(AgentResponse { text: "attempt 2".into(), diff: Some("d2".into()) }), + Ok(AgentResponse { + text: "attempt 1".into(), + diff: Some("d1".into()), + }), + Ok(AgentResponse { + text: "attempt 2".into(), + diff: Some("d2".into()), + }), ]); let mut env = MockOptimizeEnv::new().with_measurements(vec![ - m(&[("time", 10.0)]), - m(&[("time", 10.0)]), - m(&[("time", 10.0)]), + m(&[(lloc(), 10.0)]), + m(&[(lloc(), 10.0)]), + m(&[(lloc(), 10.0)]), ]); - let config = OptimizeConfig { num_iterations: 2, hint: None }; - let obj = time_obj(); + let config = opt_config(2); + let obj = lloc_obj(); auto_optimize(&agent, &mut env, &obj, &config, Path::new("/tmp")); let prompts = agent.recorded_prompts(); @@ -884,11 +980,11 @@ fn optimize_diff_is_applied() { diff: Some("--- a/x\n+++ b/x\n".into()), })]); - let mut env = MockOptimizeEnv::new() - .with_measurements(vec![m(&[("time", 10.0)]), m(&[("time", 10.0)])]); + let mut env = + MockOptimizeEnv::new().with_measurements(vec![m(&[(lloc(), 10.0)]), m(&[(lloc(), 10.0)])]); - let config = OptimizeConfig { num_iterations: 1, hint: None }; - let obj = time_obj(); + let config = opt_config(1); + let obj = lloc_obj(); auto_optimize(&agent, &mut env, &obj, &config, Path::new("/tmp")); assert_eq!(env.applied_diffs.len(), 1); @@ -898,22 +994,31 @@ fn optimize_diff_is_applied() { #[test] fn optimize_invariant_failure_mid_sequence() { let agent = MockAgent::from_responses(vec![ - Ok(AgentResponse { text: "i1".into(), diff: Some("d1".into()) }), - Ok(AgentResponse { text: "i2".into(), diff: Some("d2".into()) }), - Ok(AgentResponse { text: "i3".into(), diff: Some("d3".into()) }), + Ok(AgentResponse { + text: "i1".into(), + diff: Some("d1".into()), + }), + Ok(AgentResponse { + text: "i2".into(), + diff: Some("d2".into()), + }), + Ok(AgentResponse { + text: "i3".into(), + diff: Some("d3".into()), + }), ]); let mut env = MockOptimizeEnv::new() .with_measurements(vec![ - m(&[("time", 10.0)]), - m(&[("time", 8.0)]), - m(&[("time", 5.0)]), - m(&[("time", 7.0)]), + m(&[(lloc(), 10.0)]), + m(&[(lloc(), 8.0)]), + m(&[(lloc(), 5.0)]), + m(&[(lloc(), 7.0)]), ]) .with_invariants(vec![true, false, true]); - let config = OptimizeConfig { num_iterations: 3, hint: None }; - let obj = time_obj(); + let config = opt_config(3); + let obj = lloc_obj(); let result = auto_optimize(&agent, &mut env, &obj, &config, Path::new("/tmp")); assert_eq!(result.attempts.len(), 3); @@ -925,3 +1030,13 @@ fn optimize_invariant_failure_mid_sequence() { assert_eq!(result.best_score, 7.0); } +#[test] +fn objective_function_struct_evaluates() { + let obj = lloc_obj(); + let mut m = HashMap::new(); + m.insert(lloc(), 3.5); + assert_eq!((obj.evaluate)(&m), 3.5); + + let empty = HashMap::new(); + assert_eq!((obj.evaluate)(&empty), f64::INFINITY); +} diff --git a/jolt-eval/src/lib.rs b/jolt-eval/src/lib.rs index 1006d26e1..bee3888e0 100644 --- a/jolt-eval/src/lib.rs +++ b/jolt-eval/src/lib.rs @@ -18,7 +18,11 @@ pub use guests::{GuestConfig, GuestProgram, JoltDevice, ProofVerifyError}; pub use invariant::{ CheckError, Invariant, InvariantTargets, InvariantViolation, JoltInvariants, SynthesisTarget, }; -pub use objective::{AbstractObjective, MeasurementError, Objective, PerfObjective}; +pub use objective::objective_fn::ObjectiveFunction; +pub use objective::{ + MeasurementError, Objective, OptimizationObjective, PerformanceObjective, + StaticAnalysisObjective, +}; // Re-exports used by the #[invariant] proc macro generated code. pub use arbitrary; diff --git a/jolt-eval/src/objective/code_quality/cognitive.rs b/jolt-eval/src/objective/code_quality/cognitive.rs index 5c72aeafb..4207ec38a 100644 --- a/jolt-eval/src/objective/code_quality/cognitive.rs +++ b/jolt-eval/src/objective/code_quality/cognitive.rs @@ -1,35 +1,42 @@ -use std::path::{Path, PathBuf}; +use std::path::Path; use rust_code_analysis::FuncSpace; use super::lloc::{analyze_rust_file, rust_files}; -use crate::objective::{AbstractObjective, MeasurementError}; +use crate::objective::{ + MeasurementError, Objective, OptimizationObjective, StaticAnalysisObjective, +}; + +pub const COGNITIVE_COMPLEXITY: OptimizationObjective = OptimizationObjective::StaticAnalysis( + StaticAnalysisObjective::CognitiveComplexity(CognitiveComplexityObjective { root: "" }), +); /// Average cognitive complexity per function across all Rust files under /// `jolt-core/src/`. -/// -/// Cognitive complexity measures how difficult code is to understand, -/// penalizing deeply nested control flow, recursion, and breaks in -/// linear flow. Lower is better. +#[derive(Clone, Copy)] pub struct CognitiveComplexityObjective { - root: PathBuf, + pub(crate) root: &'static str, } impl CognitiveComplexityObjective { pub fn new(root: &Path) -> Self { Self { - root: root.to_path_buf(), + root: Box::leak(root.to_string_lossy().into_owned().into_boxed_str()), } } } -impl AbstractObjective for CognitiveComplexityObjective { +impl Objective for CognitiveComplexityObjective { + type Setup = (); + fn name(&self) -> &str { "cognitive_complexity_avg" } + fn setup(&self) {} + fn collect_measurement(&self) -> Result { - let src_dir = self.root.join("jolt-core/src"); + let src_dir = std::path::PathBuf::from(self.root).join("jolt-core/src"); let mut total = 0.0; let mut count = 0usize; for path in rust_files(&src_dir)? { @@ -42,11 +49,8 @@ impl AbstractObjective for CognitiveComplexityObjective { } Ok(total / count as f64) } - } -/// Walk the function-space tree and collect cognitive complexity from -/// leaf functions (functions with no child spaces). fn collect_leaf_cognitive(space: &FuncSpace, total: &mut f64, count: &mut usize) { if space.spaces.is_empty() { let c = space.metrics.cognitive.cognitive(); @@ -78,13 +82,16 @@ mod tests { fn cognitive_on_single_file() { let source = b"fn simple() { let x = 1; }".to_vec(); let path = Path::new("test.rs"); - let space = - rust_code_analysis::get_function_spaces(&rust_code_analysis::LANG::Rust, source, path, None) - .unwrap(); + let space = rust_code_analysis::get_function_spaces( + &rust_code_analysis::LANG::Rust, + source, + path, + None, + ) + .unwrap(); let mut total = 0.0; let mut count = 0; collect_leaf_cognitive(&space, &mut total, &mut count); - // A straight-line function has 0 cognitive complexity assert_eq!(total, 0.0); } } diff --git a/jolt-eval/src/objective/code_quality/halstead_bugs.rs b/jolt-eval/src/objective/code_quality/halstead_bugs.rs index 4c24b5a0e..f6a29aafc 100644 --- a/jolt-eval/src/objective/code_quality/halstead_bugs.rs +++ b/jolt-eval/src/objective/code_quality/halstead_bugs.rs @@ -1,34 +1,43 @@ -use std::path::{Path, PathBuf}; +use std::path::Path; use rust_code_analysis::FuncSpace; use super::lloc::{analyze_rust_file, rust_files}; -use crate::objective::{AbstractObjective, MeasurementError}; +use crate::objective::{ + MeasurementError, Objective, OptimizationObjective, StaticAnalysisObjective, +}; + +pub const HALSTEAD_BUGS: OptimizationObjective = OptimizationObjective::StaticAnalysis( + StaticAnalysisObjective::HalsteadBugs(HalsteadBugsObjective { root: "" }), +); /// Estimated number of delivered bugs across all Rust files under /// `jolt-core/src/`, based on Halstead's bug prediction formula /// (B = V / 3000, where V is program volume). -/// -/// Lower is better. +#[derive(Clone, Copy)] pub struct HalsteadBugsObjective { - root: PathBuf, + pub(crate) root: &'static str, } impl HalsteadBugsObjective { pub fn new(root: &Path) -> Self { Self { - root: root.to_path_buf(), + root: Box::leak(root.to_string_lossy().into_owned().into_boxed_str()), } } } -impl AbstractObjective for HalsteadBugsObjective { +impl Objective for HalsteadBugsObjective { + type Setup = (); + fn name(&self) -> &str { "halstead_bugs" } + fn setup(&self) {} + fn collect_measurement(&self) -> Result { - let src_dir = self.root.join("jolt-core/src"); + let src_dir = std::path::PathBuf::from(self.root).join("jolt-core/src"); let mut total = 0.0; for path in rust_files(&src_dir)? { if let Some(space) = analyze_rust_file(&path) { @@ -37,11 +46,8 @@ impl AbstractObjective for HalsteadBugsObjective { } Ok(total) } - } -/// Sum Halstead bugs across all function spaces in the tree, -/// skipping NaN values (empty functions produce 0/0). fn sum_bugs(space: &FuncSpace) -> f64 { let b = space.metrics.halstead.bugs(); let mut total = if b.is_finite() { b } else { 0.0 }; @@ -67,11 +73,14 @@ mod tests { fn halstead_bugs_on_trivial_code() { let source = b"fn f() { let x = 1 + 2; }".to_vec(); let path = Path::new("test.rs"); - let space = - rust_code_analysis::get_function_spaces(&rust_code_analysis::LANG::Rust, source, path, None) - .unwrap(); + let space = rust_code_analysis::get_function_spaces( + &rust_code_analysis::LANG::Rust, + source, + path, + None, + ) + .unwrap(); let bugs = sum_bugs(&space); - // Trivial code should have very low estimated bugs assert!(bugs < 1.0, "trivial code bugs should be < 1, got {bugs}"); } } diff --git a/jolt-eval/src/objective/code_quality/lloc.rs b/jolt-eval/src/objective/code_quality/lloc.rs index 77515aa5b..680733679 100644 --- a/jolt-eval/src/objective/code_quality/lloc.rs +++ b/jolt-eval/src/objective/code_quality/lloc.rs @@ -2,29 +2,41 @@ use std::path::{Path, PathBuf}; use rust_code_analysis::{get_function_spaces, FuncSpace, LANG}; -use crate::objective::{AbstractObjective, MeasurementError}; +use crate::objective::{ + MeasurementError, Objective, OptimizationObjective, StaticAnalysisObjective, +}; + +pub const LLOC: OptimizationObjective = + OptimizationObjective::StaticAnalysis(StaticAnalysisObjective::Lloc(LlocObjective { + root: "", + })); /// Total logical lines of code (LLOC) across all Rust files under /// `jolt-core/src/`. +#[derive(Clone, Copy)] pub struct LlocObjective { - root: PathBuf, + pub(crate) root: &'static str, } impl LlocObjective { pub fn new(root: &Path) -> Self { Self { - root: root.to_path_buf(), + root: Box::leak(root.to_string_lossy().into_owned().into_boxed_str()), } } } -impl AbstractObjective for LlocObjective { +impl Objective for LlocObjective { + type Setup = (); + fn name(&self) -> &str { "lloc" } + fn setup(&self) {} + fn collect_measurement(&self) -> Result { - let src_dir = self.root.join("jolt-core/src"); + let src_dir = PathBuf::from(self.root).join("jolt-core/src"); let mut total = 0.0; for path in rust_files(&src_dir)? { if let Some(space) = analyze_rust_file(&path) { @@ -85,7 +97,10 @@ mod tests { let path = Path::new("test.rs"); let space = get_function_spaces(&LANG::Rust, source, path, None).unwrap(); let lloc = space.metrics.loc.lloc(); - assert!(lloc >= 2.0, "two statements should give lloc >= 2, got {lloc}"); + assert!( + lloc >= 2.0, + "two statements should give lloc >= 2, got {lloc}" + ); } #[test] diff --git a/jolt-eval/src/objective/mod.rs b/jolt-eval/src/objective/mod.rs index 25e00e708..8d1e38f34 100644 --- a/jolt-eval/src/objective/mod.rs +++ b/jolt-eval/src/objective/mod.rs @@ -1,9 +1,11 @@ pub mod code_quality; +pub mod objective_fn; pub mod optimize; pub mod performance; pub mod synthesis; use std::fmt; +use std::hash::{Hash, Hasher}; use std::path::Path; /// Error during objective measurement. @@ -28,56 +30,57 @@ impl MeasurementError { } } -/// Core objective trait for measurable properties. -pub trait AbstractObjective: Send + Sync { - fn name(&self) -> &str; - fn collect_measurement(&self) -> Result; - fn units(&self) -> Option<&str> { - None - } -} - -/// A performance objective suitable for Criterion benchmarking. +/// Unified objective trait. /// -/// Separates setup (run once) from the hot path (run many times in -/// Criterion's `b.iter()` loop). Use the `bench_objective!` macro to -/// generate a Criterion benchmark harness from a `PerfObjective`. -pub trait PerfObjective: Default + Send + Sync { +/// Static-analysis objectives override [`collect_measurement`](Objective::collect_measurement) +/// and set `Setup = ()`. +/// +/// Performance objectives override [`setup`](Objective::setup) + +/// [`run`](Objective::run) and leave `collect_measurement` as the +/// default (returns an error). +pub trait Objective: Send + Sync { type Setup: Send; fn name(&self) -> &str; - /// Per-iteration setup (e.g. clone a polynomial). Called by - /// `iter_batched` before each measured iteration. - fn setup(&self) -> Self::Setup; + fn units(&self) -> Option<&str> { + None + } - /// The hot path to benchmark. Takes owned setup so the clone cost - /// is excluded from measurement via `iter_batched`. - fn run(&self, setup: Self::Setup); + /// Per-iteration setup for Criterion benchmarks. + fn setup(&self) -> Self::Setup; - fn units(&self) -> &str { - "s" + /// Override for static-analysis objectives that produce a direct measurement. + fn collect_measurement(&self) -> Result { + Err(MeasurementError::new("not directly measurable")) } + + /// Override for performance objectives benchmarked by Criterion. + fn run(&self, _setup: Self::Setup) {} } -/// Centralized enum for static-analysis objectives. -/// -/// Performance objectives are handled separately via Criterion benchmarks -/// (see `PerfObjective` and `bench_objective!`). -pub enum Objective { +// ========================================================================= +// Data-containing enums — Hash/Eq based on discriminant only +// ========================================================================= + +/// Static-analysis objectives. +#[derive(Clone, Copy)] +pub enum StaticAnalysisObjective { Lloc(code_quality::lloc::LlocObjective), CognitiveComplexity(code_quality::cognitive::CognitiveComplexityObjective), HalsteadBugs(code_quality::halstead_bugs::HalsteadBugsObjective), } -impl Objective { +impl StaticAnalysisObjective { pub fn all(root: &Path) -> Vec { vec![ Self::Lloc(code_quality::lloc::LlocObjective::new(root)), Self::CognitiveComplexity(code_quality::cognitive::CognitiveComplexityObjective::new( root, )), - Self::HalsteadBugs(code_quality::halstead_bugs::HalsteadBugsObjective::new(root)), + Self::HalsteadBugs(code_quality::halstead_bugs::HalsteadBugsObjective::new( + root, + )), ] } @@ -106,14 +109,123 @@ impl Objective { } } -/// Names of all registered `PerfObjective` benchmarks. -pub fn perf_objective_names() -> &'static [&'static str] { - &[ - performance::binding::BindLowToHighObjective::NAME, - performance::binding::BindHighToLowObjective::NAME, - ] +impl PartialEq for StaticAnalysisObjective { + fn eq(&self, other: &Self) -> bool { + std::mem::discriminant(self) == std::mem::discriminant(other) + } +} +impl Eq for StaticAnalysisObjective {} +impl Hash for StaticAnalysisObjective { + fn hash(&self, state: &mut H) { + std::mem::discriminant(self).hash(state); + } +} + +/// Criterion-benchmarked performance objectives. +#[derive(Clone, Copy)] +pub enum PerformanceObjective { + BindLowToHigh(performance::binding::BindLowToHighObjective), + BindHighToLow(performance::binding::BindHighToLowObjective), } +impl PerformanceObjective { + pub fn all() -> Vec { + vec![ + Self::BindLowToHigh(performance::binding::BindLowToHighObjective), + Self::BindHighToLow(performance::binding::BindHighToLowObjective), + ] + } + + pub fn name(&self) -> &str { + match self { + Self::BindLowToHigh(o) => o.name(), + Self::BindHighToLow(o) => o.name(), + } + } + + pub fn units(&self) -> Option<&str> { + match self { + Self::BindLowToHigh(o) => o.units(), + Self::BindHighToLow(o) => o.units(), + } + } +} + +impl PartialEq for PerformanceObjective { + fn eq(&self, other: &Self) -> bool { + std::mem::discriminant(self) == std::mem::discriminant(other) + } +} +impl Eq for PerformanceObjective {} +impl Hash for PerformanceObjective { + fn hash(&self, state: &mut H) { + std::mem::discriminant(self).hash(state); + } +} + +/// Union of all known objectives — used as a type-safe HashMap key. +#[derive(Clone, Copy)] +pub enum OptimizationObjective { + StaticAnalysis(StaticAnalysisObjective), + Performance(PerformanceObjective), +} + +// Re-export the const objective keys from their defining modules. +pub use code_quality::cognitive::COGNITIVE_COMPLEXITY; +pub use code_quality::halstead_bugs::HALSTEAD_BUGS; +pub use code_quality::lloc::LLOC; +pub use performance::binding::{BIND_HIGH_TO_LOW, BIND_LOW_TO_HIGH}; + +impl OptimizationObjective { + pub fn all(root: &Path) -> Vec { + let mut all = Vec::new(); + for s in StaticAnalysisObjective::all(root) { + all.push(Self::StaticAnalysis(s)); + } + for p in PerformanceObjective::all() { + all.push(Self::Performance(p)); + } + all + } + + pub fn name(&self) -> &str { + match self { + Self::StaticAnalysis(s) => s.name(), + Self::Performance(p) => p.name(), + } + } + + pub fn units(&self) -> Option<&str> { + match self { + Self::StaticAnalysis(s) => s.units(), + Self::Performance(p) => p.units(), + } + } + + pub fn is_perf(&self) -> bool { + matches!(self, Self::Performance(_)) + } +} + +impl PartialEq for OptimizationObjective { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Self::StaticAnalysis(a), Self::StaticAnalysis(b)) => a == b, + (Self::Performance(a), Self::Performance(b)) => a == b, + _ => false, + } + } +} +impl Eq for OptimizationObjective {} +impl Hash for OptimizationObjective { + fn hash(&self, state: &mut H) { + std::mem::discriminant(self).hash(state); + match self { + Self::StaticAnalysis(s) => s.hash(state), + Self::Performance(p) => p.hash(state), + } + } +} #[cfg(test)] mod tests { @@ -124,9 +236,15 @@ mod tests { value: f64, } - impl AbstractObjective for ConstantObjective { - fn name(&self) -> &str { self.label } - fn collect_measurement(&self) -> Result { Ok(self.value) } + impl Objective for ConstantObjective { + type Setup = (); + fn name(&self) -> &str { + self.label + } + fn setup(&self) {} + fn collect_measurement(&self) -> Result { + Ok(self.value) + } } #[test] @@ -140,15 +258,48 @@ mod tests { } #[test] - fn objective_all() { + fn static_analysis_all_measures() { let root = std::path::Path::new(env!("CARGO_MANIFEST_DIR")) .parent() .unwrap(); - let objectives = Objective::all(root); - assert_eq!(objectives.len(), 3); - for obj in &objectives { - let val = obj.collect_measurement().unwrap(); - assert!(val > 0.0, "{} should be > 0, got {val}", obj.name()); + for sa in StaticAnalysisObjective::all(root) { + let val = sa.collect_measurement().unwrap(); + assert!(val > 0.0, "{} should be > 0, got {val}", sa.name()); } } + + #[test] + fn optimization_objective_hashmap_key() { + use std::collections::HashMap; + let root = std::path::Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .unwrap(); + let lloc = OptimizationObjective::StaticAnalysis(StaticAnalysisObjective::Lloc( + code_quality::lloc::LlocObjective::new(root), + )); + let bind = OptimizationObjective::Performance(PerformanceObjective::BindLowToHigh( + performance::binding::BindLowToHighObjective, + )); + let mut m = HashMap::new(); + m.insert(lloc, 100.0); + m.insert(bind, 0.5); + + // Look up with a freshly constructed key — works because Hash/Eq + // is discriminant-based. + let lloc2 = OptimizationObjective::StaticAnalysis(StaticAnalysisObjective::Lloc( + code_quality::lloc::LlocObjective::new(Path::new("/other")), + )); + assert_eq!(m[&lloc2], 100.0); + } + + #[test] + fn optimization_objective_all() { + let root = std::path::Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .unwrap(); + let all = OptimizationObjective::all(root); + assert_eq!(all.len(), 5); // 3 static + 2 perf + assert!(all.iter().any(|o| o.is_perf())); + assert!(all.iter().any(|o| !o.is_perf())); + } } diff --git a/jolt-eval/src/objective/objective_fn/mod.rs b/jolt-eval/src/objective/objective_fn/mod.rs new file mode 100644 index 000000000..b2fb86e6a --- /dev/null +++ b/jolt-eval/src/objective/objective_fn/mod.rs @@ -0,0 +1,124 @@ +use std::collections::HashMap; + +use super::{ + OptimizationObjective, BIND_HIGH_TO_LOW, BIND_LOW_TO_HIGH, COGNITIVE_COMPLEXITY, HALSTEAD_BUGS, + LLOC, +}; + +/// A concrete objective function that the optimizer minimizes. +/// +/// Contains the list of measurements it depends on and a pure +/// function that combines them into a scalar. +#[derive(Clone, Copy)] +pub struct ObjectiveFunction { + /// CLI-visible name (e.g. `"minimize_lloc"`). + pub name: &'static str, + /// The [`OptimizationObjective`]s this function reads. + pub inputs: &'static [OptimizationObjective], + /// Combine measurements into a scalar to minimize. + /// The HashMap is guaranteed to contain all keys from [`inputs`]. + pub evaluate: fn(&HashMap) -> f64, +} + +impl ObjectiveFunction { + /// All registered objective functions. + pub fn all() -> &'static [ObjectiveFunction] { + &[ + MINIMIZE_LLOC, + MINIMIZE_COGNITIVE_COMPLEXITY, + MINIMIZE_HALSTEAD_BUGS, + MINIMIZE_BIND_LOW_TO_HIGH, + MINIMIZE_BIND_HIGH_TO_LOW, + ] + } + + /// Look up an objective function by CLI name. + pub fn by_name(name: &str) -> Option<&'static ObjectiveFunction> { + Self::all().iter().find(|f| f.name == name) + } +} + +pub const MINIMIZE_LLOC: ObjectiveFunction = ObjectiveFunction { + name: "minimize_lloc", + inputs: &[LLOC], + evaluate: |m| m.get(&LLOC).copied().unwrap_or(f64::INFINITY), +}; + +pub const MINIMIZE_COGNITIVE_COMPLEXITY: ObjectiveFunction = ObjectiveFunction { + name: "minimize_cognitive_complexity", + inputs: &[COGNITIVE_COMPLEXITY], + evaluate: |m| { + m.get(&COGNITIVE_COMPLEXITY) + .copied() + .unwrap_or(f64::INFINITY) + }, +}; + +pub const MINIMIZE_HALSTEAD_BUGS: ObjectiveFunction = ObjectiveFunction { + name: "minimize_halstead_bugs", + inputs: &[HALSTEAD_BUGS], + evaluate: |m| m.get(&HALSTEAD_BUGS).copied().unwrap_or(f64::INFINITY), +}; + +pub const MINIMIZE_BIND_LOW_TO_HIGH: ObjectiveFunction = ObjectiveFunction { + name: "minimize_bind_low_to_high", + inputs: &[BIND_LOW_TO_HIGH], + evaluate: |m| m.get(&BIND_LOW_TO_HIGH).copied().unwrap_or(f64::INFINITY), +}; + +pub const MINIMIZE_BIND_HIGH_TO_LOW: ObjectiveFunction = ObjectiveFunction { + name: "minimize_bind_high_to_low", + inputs: &[BIND_HIGH_TO_LOW], + evaluate: |m| m.get(&BIND_HIGH_TO_LOW).copied().unwrap_or(f64::INFINITY), +}; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn minimize_lloc_evaluates() { + let mut m = HashMap::new(); + m.insert(LLOC, 5000.0); + assert_eq!((MINIMIZE_LLOC.evaluate)(&m), 5000.0); + } + + #[test] + fn missing_input_returns_infinity() { + let m = HashMap::new(); + assert_eq!((MINIMIZE_LLOC.evaluate)(&m), f64::INFINITY); + } + + #[test] + fn by_name_finds_registered() { + let f = ObjectiveFunction::by_name("minimize_lloc").unwrap(); + assert_eq!(f.name, "minimize_lloc"); + } + + #[test] + fn by_name_returns_none_for_unknown() { + assert!(ObjectiveFunction::by_name("nonexistent").is_none()); + } + + #[test] + fn all_returns_expected_count() { + assert_eq!(ObjectiveFunction::all().len(), 5); + } + + #[test] + fn custom_composite_objective() { + const INPUTS: &[OptimizationObjective] = &[LLOC, HALSTEAD_BUGS]; + let weighted = ObjectiveFunction { + name: "weighted", + inputs: INPUTS, + evaluate: |m| { + 2.0 * m.get(&LLOC).unwrap_or(&0.0) + m.get(&HALSTEAD_BUGS).unwrap_or(&0.0) + }, + }; + + let mut m = HashMap::new(); + m.insert(LLOC, 10.0); + m.insert(HALSTEAD_BUGS, 100.0); + assert_eq!((weighted.evaluate)(&m), 120.0); + } +} diff --git a/jolt-eval/src/objective/optimize.rs b/jolt-eval/src/objective/optimize.rs index 43ce3fee6..3981c33f2 100644 --- a/jolt-eval/src/objective/optimize.rs +++ b/jolt-eval/src/objective/optimize.rs @@ -3,34 +3,8 @@ use std::path::Path; use crate::agent::{truncate, AgentHarness, DiffScope}; -/// A function that combines raw objective measurements into a single -/// scalar value to minimize. -/// -/// The optimizer always minimizes. To maximize something, negate it -/// in your implementation. -pub trait ObjectiveFunction: Send + Sync { - /// Human-readable description of what this function optimizes, - /// included in the agent prompt. - fn description(&self) -> String; - - /// Combine raw measurements into a single scalar to minimize. - fn evaluate(&self, measurements: &HashMap) -> f64; -} - -/// A simple objective function that returns a single named measurement. -pub struct SingleObjective { - pub name: String, -} - -impl ObjectiveFunction for SingleObjective { - fn description(&self) -> String { - format!("Minimize {}", self.name) - } - - fn evaluate(&self, measurements: &HashMap) -> f64 { - measurements.get(&self.name).copied().unwrap_or(f64::INFINITY) - } -} +use super::objective_fn::ObjectiveFunction; +use super::OptimizationObjective; /// Configuration for an optimization run. pub struct OptimizeConfig { @@ -52,22 +26,22 @@ pub struct OptimizeResult { pub attempts: Vec, pub baseline_score: f64, pub best_score: f64, - pub best_measurements: HashMap, + pub best_measurements: HashMap, } /// Record of a single optimization attempt. pub struct OptimizationAttempt { pub description: String, pub diff: String, - pub measurements: HashMap, + pub measurements: HashMap, pub score: f64, pub invariants_passed: bool, } /// Environment trait that decouples the optimization loop from side effects. pub trait OptimizeEnv { - /// Measure all raw objectives. Returns name -> value. - fn measure(&mut self) -> HashMap; + /// Measure all raw objectives. Returns objective -> value. + fn measure(&mut self) -> HashMap; /// Check all invariants. Returns `true` if they all pass. fn check_invariants(&mut self) -> bool; @@ -89,12 +63,12 @@ pub trait OptimizeEnv { pub fn auto_optimize( agent: &A, env: &mut E, - objective: &dyn ObjectiveFunction, + objective: &ObjectiveFunction, config: &OptimizeConfig, repo_dir: &Path, ) -> OptimizeResult { let baseline = env.measure(); - let baseline_score = objective.evaluate(&baseline); + let baseline_score = (objective.evaluate)(&baseline); let mut best_score = baseline_score; let mut best_measurements = baseline.clone(); let mut attempts = Vec::new(); @@ -129,7 +103,7 @@ pub fn auto_optimize( }; let new_measurements = env.measure(); - let new_score = objective.evaluate(&new_measurements); + let new_score = (objective.evaluate)(&new_measurements); let invariants_passed = env.check_invariants(); if !invariants_passed { @@ -165,9 +139,9 @@ pub fn auto_optimize( } fn build_optimize_prompt( - objective: &dyn ObjectiveFunction, + objective: &ObjectiveFunction, current_best_score: f64, - current_best_measurements: &HashMap, + current_best_measurements: &HashMap, past_attempts: &[OptimizationAttempt], hint: Option<&str>, ) -> String { @@ -178,18 +152,30 @@ fn build_optimize_prompt( Your goal is to make code changes that MINIMIZE the objective function.\n\n", ); - prompt.push_str("## Objective function\n\n"); - prompt.push_str(&objective.description()); + prompt.push_str("## Objective\n\n"); + prompt.push_str(&format!("Minimize: **{}**\n", objective.name)); + + let inputs = objective.inputs; + prompt.push_str("Inputs: "); + for (i, input) in inputs.iter().enumerate() { + if i > 0 { + prompt.push_str(", "); + } + prompt.push_str(input.name()); + } prompt.push_str(&format!( - "\n\nCurrent best score: {current_best_score:.6}\n\n" + "\nCurrent best score: {current_best_score:.6}\n\n" )); + prompt.push_str( + "The objective function is defined in `jolt-eval/src/objective/objective_fn/`. \ + Read the implementation to understand exactly what you are optimizing.\n\n", + ); prompt.push_str("## Current measurements\n\n"); - let mut names: Vec<_> = current_best_measurements.keys().collect(); - names.sort(); - for name in &names { - let val = current_best_measurements[*name]; - prompt.push_str(&format!("- **{name}**: {val:.6}\n")); + let mut entries: Vec<_> = current_best_measurements.iter().collect(); + entries.sort_by_key(|(k, _)| k.name()); + for (key, val) in &entries { + prompt.push_str(&format!("- **{}**: {val:.6}\n", key.name())); } prompt.push('\n'); @@ -223,11 +209,10 @@ fn build_optimize_prompt( "- **{}** ({}, score={:.6}): ", attempt.description, status, attempt.score )); - let mut keys: Vec<_> = attempt.measurements.keys().collect(); - keys.sort(); - for name in keys { - let val = attempt.measurements[name]; - prompt.push_str(&format!("{name}={val:.6} ")); + let mut keys: Vec<_> = attempt.measurements.iter().collect(); + keys.sort_by_key(|(k, _)| k.name()); + for (key, val) in keys { + prompt.push_str(&format!("{}={val:.6} ", key.name())); } prompt.push('\n'); } diff --git a/jolt-eval/src/objective/performance/binding.rs b/jolt-eval/src/objective/performance/binding.rs index 9e9582392..37bb3babd 100644 --- a/jolt-eval/src/objective/performance/binding.rs +++ b/jolt-eval/src/objective/performance/binding.rs @@ -3,20 +3,22 @@ use jolt_core::field::JoltField; use jolt_core::poly::dense_mlpoly::DensePolynomial; use jolt_core::poly::multilinear_polynomial::BindingOrder; -use crate::objective::PerfObjective; +use crate::objective::{Objective, OptimizationObjective, PerformanceObjective}; + +pub const BIND_LOW_TO_HIGH: OptimizationObjective = + OptimizationObjective::Performance(PerformanceObjective::BindLowToHigh(BindLowToHighObjective)); +pub const BIND_HIGH_TO_LOW: OptimizationObjective = + OptimizationObjective::Performance(PerformanceObjective::BindHighToLow(BindHighToLowObjective)); type Challenge = ::Challenge; -/// Number of variables for the benchmark polynomial (2^NUM_VARS evaluations). const NUM_VARS: usize = 20; -/// Per-iteration state: a fresh polynomial and a challenge to bind with. pub struct BindSetup { pub poly: DensePolynomial, pub challenge: Challenge, } -/// Shared state used to produce per-iteration setups cheaply. struct BindShared { evals: Vec, challenge: Challenge, @@ -26,9 +28,7 @@ impl BindShared { fn new() -> Self { let mut rng = rand::thread_rng(); Self { - evals: (0..1 << NUM_VARS) - .map(|_| Fr::random(&mut rng)) - .collect(), + evals: (0..1 << NUM_VARS).map(|_| Fr::random(&mut rng)).collect(), challenge: Challenge::random(&mut rng), } } @@ -42,14 +42,14 @@ impl BindShared { } /// Benchmark `DensePolynomial::bind_parallel` with `LowToHigh` binding. -#[derive(Default)] +#[derive(Clone, Copy, Default)] pub struct BindLowToHighObjective; impl BindLowToHighObjective { pub const NAME: &str = "bind_parallel_low_to_high"; } -impl PerfObjective for BindLowToHighObjective { +impl Objective for BindLowToHighObjective { type Setup = BindSetup; fn name(&self) -> &str { @@ -57,7 +57,6 @@ impl PerfObjective for BindLowToHighObjective { } fn setup(&self) -> BindSetup { - // Thread-local shared state so we only generate random evals once. thread_local! { static SHARED: BindShared = BindShared::new(); } @@ -70,17 +69,21 @@ impl PerfObjective for BindLowToHighObjective { .bind_parallel(setup.challenge, BindingOrder::LowToHigh); std::hint::black_box(&setup.poly); } + + fn units(&self) -> Option<&str> { + Some("s") + } } /// Benchmark `DensePolynomial::bind_parallel` with `HighToLow` binding. -#[derive(Default)] +#[derive(Clone, Copy, Default)] pub struct BindHighToLowObjective; impl BindHighToLowObjective { pub const NAME: &str = "bind_parallel_high_to_low"; } -impl PerfObjective for BindHighToLowObjective { +impl Objective for BindHighToLowObjective { type Setup = BindSetup; fn name(&self) -> &str { @@ -100,6 +103,10 @@ impl PerfObjective for BindHighToLowObjective { .bind_parallel(setup.challenge, BindingOrder::HighToLow); std::hint::black_box(&setup.poly); } + + fn units(&self) -> Option<&str> { + Some("s") + } } #[cfg(test)] diff --git a/jolt-eval/src/objective/performance/prover_time.rs b/jolt-eval/src/objective/performance/prover_time.rs index 5de06068e..92031c07c 100644 --- a/jolt-eval/src/objective/performance/prover_time.rs +++ b/jolt-eval/src/objective/performance/prover_time.rs @@ -1,7 +1,7 @@ use jolt_core::host::Program; use crate::guests::{self, GuestConfig, GuestProgram, ProverPreprocessing}; -use crate::objective::PerfObjective; +use crate::objective::Objective; /// Per-iteration state: everything needed to call `prove`. pub struct ProverTimeSetup { @@ -25,7 +25,7 @@ impl ProverTimeObjective { } } -impl PerfObjective for ProverTimeObjective { +impl Objective for ProverTimeObjective { type Setup = ProverTimeSetup; fn name(&self) -> &str { @@ -71,6 +71,10 @@ impl PerfObjective for ProverTimeObjective { let (_proof, _io) = guests::prove(&setup.program, &setup.prover_pp, &setup.input); std::hint::black_box(()); } + + fn units(&self) -> Option<&str> { + Some("s") + } } #[cfg(test)] diff --git a/jolt-eval/src/objective/synthesis.rs b/jolt-eval/src/objective/synthesis.rs index 2b0680ead..00d2e035b 100644 --- a/jolt-eval/src/objective/synthesis.rs +++ b/jolt-eval/src/objective/synthesis.rs @@ -1,4 +1,4 @@ -/// Macro that generates a Criterion benchmark harness for a `PerfObjective`. +/// Macro that generates a Criterion benchmark harness for an [`Objective`]. /// /// Uses `iter_batched` with `BatchSize::LargeInput` so that per-iteration /// setup (e.g. polynomial clone) is excluded from the measurement. @@ -19,7 +19,7 @@ macro_rules! bench_objective { // Expression form with config methods ($obj_expr:expr, config: $($method:ident($($arg:expr),*)),* $(,)?) => { - use $crate::PerfObjective as _; + use $crate::Objective as _; fn __bench(c: &mut ::criterion::Criterion) { let obj = $obj_expr; @@ -43,7 +43,7 @@ macro_rules! bench_objective { // Simple form: just a type (uses Default + default Criterion config) ($obj_ty:ty) => { - use $crate::PerfObjective as _; + use $crate::Objective as _; fn __bench(c: &mut ::criterion::Criterion) { let obj = <$obj_ty>::default(); From 02155d4ee0d42526638c55f166aedd324fa5444d Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Fri, 3 Apr 2026 20:28:28 -0400 Subject: [PATCH 53/86] style(jolt-eval): cargo fmt Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/bin/redteam.rs | 8 ++--- jolt-eval/src/agent/claude.rs | 3 +- jolt-eval/src/guests/secp256k1_ecdsa.rs | 4 ++- jolt-eval/src/invariant/mod.rs | 36 ++++++++++++++------ jolt-eval/src/invariant/synthesis/redteam.rs | 22 ++++++------ 5 files changed, 43 insertions(+), 30 deletions(-) diff --git a/jolt-eval/bin/redteam.rs b/jolt-eval/bin/redteam.rs index 2c59d7a67..10a5d4e64 100644 --- a/jolt-eval/bin/redteam.rs +++ b/jolt-eval/bin/redteam.rs @@ -76,12 +76,8 @@ fn main() -> eyre::Result<()> { ); let result = match invariant { - JoltInvariants::SplitEqBindLowHigh(inv) => { - auto_redteam(inv, &config, &agent, &repo_dir) - } - JoltInvariants::SplitEqBindHighLow(inv) => { - auto_redteam(inv, &config, &agent, &repo_dir) - } + JoltInvariants::SplitEqBindLowHigh(inv) => auto_redteam(inv, &config, &agent, &repo_dir), + JoltInvariants::SplitEqBindHighLow(inv) => auto_redteam(inv, &config, &agent, &repo_dir), JoltInvariants::Soundness(inv) => auto_redteam(inv, &config, &agent, &repo_dir), }; diff --git a/jolt-eval/src/agent/claude.rs b/jolt-eval/src/agent/claude.rs index e60ea1cc3..30c249b4a 100644 --- a/jolt-eval/src/agent/claude.rs +++ b/jolt-eval/src/agent/claude.rs @@ -180,8 +180,7 @@ impl AgentHarness for ClaudeCodeAgent { /// filtered by the given [`DiffScope`]. fn capture_diff(worktree_dir: &Path, scope: &DiffScope) -> Option { let mut cmd = Command::new("git"); - cmd.current_dir(worktree_dir) - .args(["diff", "HEAD", "--"]); + cmd.current_dir(worktree_dir).args(["diff", "HEAD", "--"]); match scope { DiffScope::All => {} DiffScope::Include(paths) => { diff --git a/jolt-eval/src/guests/secp256k1_ecdsa.rs b/jolt-eval/src/guests/secp256k1_ecdsa.rs index 2500fadab..e4675921e 100644 --- a/jolt-eval/src/guests/secp256k1_ecdsa.rs +++ b/jolt-eval/src/guests/secp256k1_ecdsa.rs @@ -51,7 +51,9 @@ impl GuestConfig for Secp256k1EcdsaVerify { "secp256k1-ecdsa-verify-guest" } fn memory_config(&self) -> MemoryConfig { - use common::constants::{DEFAULT_MAX_TRUSTED_ADVICE_SIZE, DEFAULT_MAX_UNTRUSTED_ADVICE_SIZE}; + use common::constants::{ + DEFAULT_MAX_TRUSTED_ADVICE_SIZE, DEFAULT_MAX_UNTRUSTED_ADVICE_SIZE, + }; MemoryConfig { max_input_size: 4096, max_output_size: 4096, diff --git a/jolt-eval/src/invariant/mod.rs b/jolt-eval/src/invariant/mod.rs index b6f780872..1b9739f43 100644 --- a/jolt-eval/src/invariant/mod.rs +++ b/jolt-eval/src/invariant/mod.rs @@ -1,8 +1,8 @@ +#[cfg(test)] +mod macro_tests; pub mod soundness; pub mod split_eq_bind; pub mod synthesis; -#[cfg(test)] -mod macro_tests; use std::fmt; @@ -266,11 +266,19 @@ mod integration_tests { impl Invariant for TrivialInvariant { type Setup = (); type Input = u8; - fn name(&self) -> &str { "trivial" } - fn description(&self) -> String { "Always passes".into() } + fn name(&self) -> &str { + "trivial" + } + fn description(&self) -> String { + "Always passes".into() + } fn setup(&self) {} - fn check(&self, _: &(), _: u8) -> Result<(), CheckError> { Ok(()) } - fn seed_corpus(&self) -> Vec { vec![0, 1, 255] } + fn check(&self, _: &(), _: u8) -> Result<(), CheckError> { + Ok(()) + } + fn seed_corpus(&self) -> Vec { + vec![0, 1, 255] + } } struct FailingInvariant; @@ -279,13 +287,21 @@ mod integration_tests { impl Invariant for FailingInvariant { type Setup = (); type Input = u8; - fn name(&self) -> &str { "failing" } - fn description(&self) -> String { "Always fails".into() } + fn name(&self) -> &str { + "failing" + } + fn description(&self) -> String { + "Always fails".into() + } fn setup(&self) {} fn check(&self, _: &(), input: u8) -> Result<(), CheckError> { - Err(CheckError::Violation(InvariantViolation::new(format!("failed for input {input}")))) + Err(CheckError::Violation(InvariantViolation::new(format!( + "failed for input {input}" + )))) + } + fn seed_corpus(&self) -> Vec { + vec![42] } - fn seed_corpus(&self) -> Vec { vec![42] } } #[test] diff --git a/jolt-eval/src/invariant/synthesis/redteam.rs b/jolt-eval/src/invariant/synthesis/redteam.rs index 9b4488691..5a6635f0b 100644 --- a/jolt-eval/src/invariant/synthesis/redteam.rs +++ b/jolt-eval/src/invariant/synthesis/redteam.rs @@ -66,17 +66,17 @@ pub fn auto_redteam( let diff_scope = DiffScope::Include(vec!["jolt-eval/guest-sandbox/".into()]); let response = match agent.invoke_structured(repo_dir, &prompt, &envelope_schema, &diff_scope) { - Ok(r) => r, - Err(e) => { - tracing::warn!("Agent invocation failed: {e}"); - failed_attempts.push(FailedAttempt { - description: format!("Iteration {}", iteration + 1), - approach: "Agent invocation failed".to_string(), - failure_reason: e.to_string(), - }); - continue; - } - }; + Ok(r) => r, + Err(e) => { + tracing::warn!("Agent invocation failed: {e}"); + failed_attempts.push(FailedAttempt { + description: format!("Iteration {}", iteration + 1), + approach: "Agent invocation failed".to_string(), + failure_reason: e.to_string(), + }); + continue; + } + }; let (analysis, counterexample_json) = match parse_envelope(&response.text) { Some(pair) => pair, From 94dba4b0f697b8d61f892e0a559aaa4911528e7d Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Fri, 3 Apr 2026 21:18:16 -0400 Subject: [PATCH 54/86] feat(jolt-eval): add sorting e2e tests and --test/--list CLI flags - Add sort_e2e module with naive_sort (optimization target) and candidate_sort (subtly buggy red-team target) - CandidateSortInvariant with #[invariant(RedTeam)] checks sort correctness; bug triggers only for arrays > 16 elements - SortOptimizeEnv measures real wall-clock sort time, simulates optimization by swapping to stdlib sort on apply_diff - MockAgent tests: redteam finds violation, optimization improves, broken optimization rejected by invariant check - #[ignore] tests for ClaudeCodeAgent (real API, run manually) - Add --test flag to redteam and optimize binaries (mutually exclusive with --invariant/--objective) to run e2e sort targets - Make --invariant and --objective optional; --list works standalone Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/bin/optimize.rs | 19 +- jolt-eval/bin/redteam.rs | 40 ++- jolt-eval/src/invariant/mod.rs | 1 + jolt-eval/src/invariant/sort_e2e.rs | 454 ++++++++++++++++++++++++++++ 4 files changed, 497 insertions(+), 17 deletions(-) create mode 100644 jolt-eval/src/invariant/sort_e2e.rs diff --git a/jolt-eval/bin/optimize.rs b/jolt-eval/bin/optimize.rs index 4591e9002..329a8e436 100644 --- a/jolt-eval/bin/optimize.rs +++ b/jolt-eval/bin/optimize.rs @@ -5,6 +5,7 @@ use std::process::Command; use clap::Parser; use jolt_eval::agent::ClaudeCodeAgent; +use jolt_eval::invariant::sort_e2e; use jolt_eval::invariant::JoltInvariants; use jolt_eval::objective::objective_fn::ObjectiveFunction; use jolt_eval::objective::optimize::{auto_optimize, OptimizeConfig, OptimizeEnv}; @@ -14,11 +15,14 @@ use jolt_eval::objective::{OptimizationObjective, PerformanceObjective, StaticAn #[command(name = "optimize")] #[command(about = "AI-driven optimization of Jolt codebase objectives")] struct Cli { - /// Objective function to minimize. - /// Run with --list to see available functions. - #[arg(long)] + /// Objective function to minimize (mutually exclusive with --test). + #[arg(long, conflicts_with = "test")] objective: Option, + /// Run the built-in e2e sort optimization test. + #[arg(long, conflicts_with = "objective")] + test: bool, + /// List all available objective functions and exit. #[arg(long)] list: bool, @@ -129,13 +133,20 @@ fn main() -> eyre::Result<()> { let inputs: Vec<_> = f.inputs.iter().map(|i| i.name().to_string()).collect(); println!(" {:<35} inputs: {}", f.name, inputs.join(", ")); } + println!("\nBuilt-in e2e targets (use --test):"); + println!(" naive_sort"); + return Ok(()); + } + + if cli.test { + sort_e2e::run_optimize_test(&cli.model, cli.max_turns, cli.iterations, cli.hint); return Ok(()); } let objective_name = cli .objective .as_deref() - .expect("--objective is required (use --list to see options)"); + .expect("--objective or --test is required (use --list to see options)"); let objective = ObjectiveFunction::by_name(objective_name).unwrap_or_else(|| { eprintln!("Unknown objective function: {objective_name}"); diff --git a/jolt-eval/bin/redteam.rs b/jolt-eval/bin/redteam.rs index 10a5d4e64..3bcab3bae 100644 --- a/jolt-eval/bin/redteam.rs +++ b/jolt-eval/bin/redteam.rs @@ -2,6 +2,7 @@ use clap::Parser; use tracing::info; use jolt_eval::agent::ClaudeCodeAgent; +use jolt_eval::invariant::sort_e2e; use jolt_eval::invariant::synthesis::redteam::{auto_redteam, RedTeamConfig, RedTeamResult}; use jolt_eval::invariant::{JoltInvariants, SynthesisTarget}; @@ -9,9 +10,17 @@ use jolt_eval::invariant::{JoltInvariants, SynthesisTarget}; #[command(name = "redteam")] #[command(about = "AI-driven red team testing of Jolt invariants")] struct Cli { - /// Name of the invariant to test + /// Name of the invariant to test (mutually exclusive with --test). + #[arg(long, conflicts_with = "test")] + invariant: Option, + + /// Run the built-in e2e sort test instead of a named invariant. + #[arg(long, conflicts_with = "invariant")] + test: bool, + + /// List all red-teamable invariants and exit. #[arg(long)] - invariant: String, + list: bool, /// Number of red-team iterations #[arg(long, default_value = "10")] @@ -28,10 +37,6 @@ struct Cli { /// Extra context or guidance for the red-team agent #[arg(long)] hint: Option, - - /// List available red-teamable invariants and exit - #[arg(long)] - list: bool, } fn main() -> eyre::Result<()> { @@ -45,20 +50,29 @@ fn main() -> eyre::Result<()> { println!(" {}", inv.name()); } } + println!("\nBuilt-in e2e targets (use --test):"); + println!(" candidate_sort"); return Ok(()); } + if cli.test { + sort_e2e::run_redteam_test(&cli.model, cli.max_turns, cli.iterations, cli.hint); + return Ok(()); + } + + let invariant_name = cli + .invariant + .as_deref() + .expect("--invariant or --test is required (use --list to see options)"); + let all = JoltInvariants::all(); let invariant = all .iter() .filter(|inv| inv.targets().contains(SynthesisTarget::RedTeam)) - .find(|inv| inv.name() == cli.invariant.as_str()); + .find(|inv| inv.name() == invariant_name); let Some(invariant) = invariant else { - eprintln!( - "Invariant '{}' not found or not red-teamable.", - cli.invariant - ); + eprintln!("Invariant '{invariant_name}' not found or not red-teamable."); eprintln!("Run with --list to see available invariants."); std::process::exit(1); }; @@ -71,8 +85,8 @@ fn main() -> eyre::Result<()> { let repo_dir = std::env::current_dir()?; info!( - "Starting red team: invariant={}, iterations={}, model={}", - cli.invariant, cli.iterations, cli.model + "Starting red team: invariant={invariant_name}, iterations={}, model={}", + cli.iterations, cli.model ); let result = match invariant { diff --git a/jolt-eval/src/invariant/mod.rs b/jolt-eval/src/invariant/mod.rs index 1b9739f43..bd28a94d8 100644 --- a/jolt-eval/src/invariant/mod.rs +++ b/jolt-eval/src/invariant/mod.rs @@ -1,5 +1,6 @@ #[cfg(test)] mod macro_tests; +pub mod sort_e2e; pub mod soundness; pub mod split_eq_bind; pub mod synthesis; diff --git a/jolt-eval/src/invariant/sort_e2e.rs b/jolt-eval/src/invariant/sort_e2e.rs new file mode 100644 index 000000000..bdfb6fd5e --- /dev/null +++ b/jolt-eval/src/invariant/sort_e2e.rs @@ -0,0 +1,454 @@ +//! End-to-end test harnesses for the optimization and red-team loops, +//! using simple sorting functions as the target domain. + +use std::collections::HashMap; + +use super::{CheckError, Invariant, InvariantViolation}; +use crate::agent::ClaudeCodeAgent; +use crate::invariant::synthesis::redteam::{auto_redteam, RedTeamConfig, RedTeamResult}; +use crate::objective::objective_fn::ObjectiveFunction; +use crate::objective::optimize::{auto_optimize, OptimizeConfig, OptimizeEnv}; +use crate::objective::{OptimizationObjective, LLOC}; + +/// Naive bubble sort — the optimization target. +/// Intentionally O(n²) so a "smarter" sort is measurably faster. +pub fn naive_sort(data: &mut [i32]) { + let n = data.len(); + for i in 0..n { + for j in 0..n.saturating_sub(1 + i) { + if data[j] > data[j + 1] { + data.swap(j, j + 1); + } + } + } +} + +/// A sorting routine used as a red-team target. +pub fn candidate_sort(data: &mut [i32]) { + if data.len() <= 16 { + // Small-array path: insertion sort. + for i in 1..data.len() { + let key = data[i]; + let mut j = i; + while j > 0 && data[j - 1] > key { + data[j] = data[j - 1]; + j -= 1; + } + data[j] = key; + } + } else { + // Large-array path: delegate to an optimized routine. + let last = data.len() - 1; + data[..last].sort(); + } +} + +// ── Red-team invariant ────────────────────────────────────────────── + +/// Invariant: a sort function must preserve all elements (multiset +/// equality) and produce sorted output. +#[jolt_eval_macros::invariant(RedTeam)] +pub struct CandidateSortInvariant; + +impl Invariant for CandidateSortInvariant { + type Setup = (); + type Input = Vec; + + fn name(&self) -> &str { + "candidate_sort" + } + + fn description(&self) -> String { + "The sort function `candidate_sort` in \ + jolt-eval/src/invariant/sort_e2e.rs must return a \ + permutation of its input in non-decreasing order. \ + Any dropped, duplicated, or misplaced elements are a violation." + .to_string() + } + + fn setup(&self) {} + + fn check(&self, _: &(), input: Vec) -> Result<(), CheckError> { + if input.len() > 1_000 { + return Err(CheckError::InvalidInput( + "input too large (max 1000)".into(), + )); + } + + let mut output = input.clone(); + candidate_sort(&mut output); + + let mut expected = input; + expected.sort(); + + if output != expected { + return Err(CheckError::Violation(InvariantViolation::new(format!( + "sort incorrect: expected {expected:?}, got {output:?}" + )))); + } + + Ok(()) + } + + fn seed_corpus(&self) -> Vec> { + vec![ + vec![], + vec![1], + vec![3, 1, 2], + vec![5, 4, 3, 2, 1], + vec![1, 1, 1], + ] + } +} + +/// Invariant for the naive (correct) sort — used in the optimization +/// loop to verify that the "optimized" sort is still correct. +pub struct NaiveSortInvariant; + +impl Invariant for NaiveSortInvariant { + type Setup = (); + type Input = Vec; + + fn name(&self) -> &str { + "naive_sort_correctness" + } + + fn description(&self) -> String { + "The naive sort must return a permutation of its input in \ + non-decreasing order." + .to_string() + } + + fn setup(&self) {} + + fn check(&self, _: &(), input: Vec) -> Result<(), CheckError> { + let mut output = input.clone(); + naive_sort(&mut output); + + let mut expected = input; + expected.sort(); + + if output != expected { + return Err(CheckError::Violation(InvariantViolation::new(format!( + "naive sort incorrect: expected {expected:?}, got {output:?}" + )))); + } + + Ok(()) + } + + fn seed_corpus(&self) -> Vec> { + vec![vec![], vec![1], vec![3, 1, 2], vec![5, 4, 3, 2, 1]] + } +} + +// ── SortOptimizeEnv ───────────────────────────────────────────────── + +/// An [`OptimizeEnv`] that measures wall-clock time of a sort function. +/// `apply_diff` simulates optimization by swapping to `slice::sort`. +pub struct SortOptimizeEnv { + sort_fn: fn(&mut [i32]), + data: Vec, + invariant_ok: bool, +} + +impl SortOptimizeEnv { + pub fn new(data_size: usize) -> Self { + let data: Vec = (0..data_size as i32).rev().collect(); + Self { + sort_fn: naive_sort, + data, + invariant_ok: true, + } + } +} + +impl OptimizeEnv for SortOptimizeEnv { + fn measure(&mut self) -> HashMap { + let mut buf = self.data.clone(); + let start = std::time::Instant::now(); + (self.sort_fn)(&mut buf); + let elapsed = start.elapsed().as_secs_f64(); + + self.invariant_ok = buf.windows(2).all(|w| w[0] <= w[1]); + + let mut m = HashMap::new(); + m.insert(LLOC, elapsed); + m + } + + fn check_invariants(&mut self) -> bool { + self.invariant_ok + } + + fn apply_diff(&mut self, _diff: &str) { + self.sort_fn = |d: &mut [i32]| d.sort(); + } + + fn accept(&mut self, _iteration: usize) {} + + fn reject(&mut self) { + self.sort_fn = naive_sort; + } +} + +// ── CLI-accessible e2e runners ────────────────────────────────────── + +/// Run the red-team e2e test against `CandidateSortInvariant`. +pub fn run_redteam_test(model: &str, max_turns: usize, iterations: usize, hint: Option) { + let invariant = CandidateSortInvariant; + let agent = ClaudeCodeAgent::new(model, max_turns); + let repo_dir = std::env::current_dir().expect("current dir"); + let config = RedTeamConfig { + num_iterations: iterations, + hint, + }; + + println!("=== Red-team e2e: candidate_sort ==="); + println!("model={model}, max_turns={max_turns}, iterations={iterations}"); + println!(); + + let result = auto_redteam(&invariant, &config, &agent, &repo_dir); + + match &result { + RedTeamResult::Violation { + approach, + input_json, + error, + } => { + println!("VIOLATION FOUND"); + println!(" Approach: {approach}"); + println!(" Input: {input_json}"); + println!(" Error: {error}"); + } + RedTeamResult::NoViolation { attempts } => { + println!("No violation found after {} attempts.", attempts.len()); + for a in attempts { + println!( + " {}: {} -- {}", + a.description, a.approach, a.failure_reason + ); + } + } + } +} + +/// Run the optimization e2e test against the naive bubble sort. +pub fn run_optimize_test(model: &str, max_turns: usize, iterations: usize, hint: Option) { + let agent = ClaudeCodeAgent::new(model, max_turns); + let repo_dir = std::env::current_dir().expect("current dir"); + + let mut env = SortOptimizeEnv::new(5000); + + let baseline = env.measure(); + let baseline_time = baseline[&LLOC]; + env.sort_fn = naive_sort; + + let obj = ObjectiveFunction { + name: "sort_time", + inputs: &[LLOC], + evaluate: |m| m.get(&LLOC).copied().unwrap_or(f64::INFINITY), + }; + let config = OptimizeConfig { + num_iterations: iterations, + hint, + }; + + println!("=== Optimize e2e: naive bubble sort ==="); + println!("model={model}, max_turns={max_turns}, iterations={iterations}"); + println!("Baseline sort time: {baseline_time:.6}s"); + println!(); + + let result = auto_optimize(&agent, &mut env, &obj, &config, &repo_dir); + + println!("Best score: {:.6}s", result.best_score); + println!( + "Improvement: {:.1}%", + (1.0 - result.best_score / baseline_time) * 100.0 + ); + for (i, a) in result.attempts.iter().enumerate() { + println!( + " attempt {}: score={:.6}, invariants={}", + i + 1, + a.score, + a.invariants_passed + ); + } +} + +#[cfg(test)] +mod tests { + use std::path::Path; + + use crate::agent::{AgentResponse, MockAgent}; + use crate::invariant::synthesis::redteam::{auto_redteam, RedTeamConfig, RedTeamResult}; + use crate::objective::optimize::OptimizeEnv; + + use super::*; + + // ── Red-team e2e (MockAgent) ──────────────────────────────────── + + #[test] + fn redteam_e2e_finds_sort_violation() { + let invariant = CandidateSortInvariant; + + // 17 elements (exceeds the small-array threshold), with the + // minimum value at the end — triggers the bug. + let bad_input: Vec = (1..=17).rev().collect(); + let response = serde_json::json!({ + "analysis": "Trying a reversed sequence of 17 elements.", + "counterexample": bad_input, + }); + let agent = MockAgent::always_ok(&response.to_string()); + let config = RedTeamConfig { + num_iterations: 5, + ..Default::default() + }; + + let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); + + match result { + RedTeamResult::Violation { error, .. } => { + assert!( + error.contains("sort incorrect"), + "unexpected error: {error}" + ); + } + RedTeamResult::NoViolation { .. } => { + panic!("Expected violation for large reversed input"); + } + } + + assert_eq!(agent.recorded_prompts().len(), 1); + } + + #[test] + fn redteam_e2e_no_violation_for_small_input() { + let invariant = CandidateSortInvariant; + + let response = serde_json::json!({ + "analysis": "Trying a small permutation.", + "counterexample": [5, 3, 1, 4, 2], + }); + let agent = MockAgent::always_ok(&response.to_string()); + let config = RedTeamConfig { + num_iterations: 3, + ..Default::default() + }; + + let result = auto_redteam(&invariant, &config, &agent, Path::new("/tmp")); + + match result { + RedTeamResult::NoViolation { attempts } => { + assert_eq!(attempts.len(), 3); + } + RedTeamResult::Violation { .. } => { + panic!("Small inputs should not trigger a violation"); + } + } + } + + // ── Red-team e2e (real agent) ─────────────────────────────────── + + #[test] + #[ignore] // Requires Claude API access + fn redteam_e2e_real_agent() { + run_redteam_test("claude-sonnet-4-20250514", 10, 5, None); + } + + // ── Optimize e2e (MockAgent) ──────────────────────────────────── + + #[test] + fn optimize_e2e_sort_improves() { + let agent = MockAgent::from_responses(vec![Ok(AgentResponse { + text: "Replaced bubble sort with merge sort".into(), + diff: Some("--- a/sort.rs\n+++ b/sort.rs\n-bubble\n+merge".into()), + })]); + + let mut env = SortOptimizeEnv::new(5000); + + let baseline = env.measure(); + let baseline_time = baseline[&LLOC]; + assert!(baseline_time > 0.0); + + env.sort_fn = naive_sort; + + let obj = ObjectiveFunction { + name: "sort_time", + inputs: &[LLOC], + evaluate: |m| m.get(&LLOC).copied().unwrap_or(f64::INFINITY), + }; + let config = OptimizeConfig { + num_iterations: 1, + hint: None, + }; + + let result = auto_optimize(&agent, &mut env, &obj, &config, Path::new("/tmp")); + + assert!( + result.best_score < baseline_time, + "expected improvement: baseline={baseline_time:.6}, best={:.6}", + result.best_score + ); + assert_eq!(result.attempts.len(), 1); + assert!(result.attempts[0].invariants_passed); + } + + #[test] + fn optimize_e2e_sort_rejects_broken_optimization() { + let agent = MockAgent::from_responses(vec![Ok(AgentResponse { + text: "Removed sorting entirely for speed".into(), + diff: Some("--- a/sort.rs\n+++ b/sort.rs\n-sort\n+noop".into()), + })]); + + let env = SortOptimizeEnv::new(100); + + struct BrokenSortEnv(SortOptimizeEnv); + + impl OptimizeEnv for BrokenSortEnv { + fn measure(&mut self) -> HashMap { + self.0.measure() + } + fn check_invariants(&mut self) -> bool { + self.0.check_invariants() + } + fn apply_diff(&mut self, _diff: &str) { + self.0.sort_fn = |d: &mut [i32]| { + if d.len() > 1 { + d.swap(0, d.len() - 1); + } + }; + } + fn accept(&mut self, i: usize) { + self.0.accept(i); + } + fn reject(&mut self) { + self.0.reject(); + } + } + + let mut broken_env = BrokenSortEnv(env); + + let obj = ObjectiveFunction { + name: "sort_time", + inputs: &[LLOC], + evaluate: |m| m.get(&LLOC).copied().unwrap_or(f64::INFINITY), + }; + let config = OptimizeConfig { + num_iterations: 1, + hint: None, + }; + + let result = auto_optimize(&agent, &mut broken_env, &obj, &config, Path::new("/tmp")); + + assert!(!result.attempts[0].invariants_passed); + } + + // ── Optimize e2e (real agent) ─────────────────────────────────── + + #[test] + #[ignore] // Requires Claude API access + fn optimize_e2e_real_agent() { + run_optimize_test("claude-sonnet-4-20250514", 10, 2, None); + } +} From 3fb739c4be562606d20a02cb4458761d752c3945 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Fri, 3 Apr 2026 21:24:31 -0400 Subject: [PATCH 55/86] feat(jolt-eval): add --verbose flag to optimize and redteam binaries Prints agent prompts and responses to stderr when enabled. - Add verbose: bool to OptimizeConfig and RedTeamConfig - Print prompt/response per iteration in auto_optimize and auto_redteam - Add --verbose CLI flag to both binaries, threaded through --test path Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/bin/optimize.rs | 13 ++++++++++- jolt-eval/bin/redteam.rs | 13 ++++++++++- jolt-eval/src/agent/tests.rs | 2 ++ jolt-eval/src/invariant/sort_e2e.rs | 24 ++++++++++++++++---- jolt-eval/src/invariant/synthesis/redteam.rs | 18 +++++++++++++++ jolt-eval/src/objective/optimize.rs | 20 ++++++++++++++++ 6 files changed, 84 insertions(+), 6 deletions(-) diff --git a/jolt-eval/bin/optimize.rs b/jolt-eval/bin/optimize.rs index 329a8e436..f9135d768 100644 --- a/jolt-eval/bin/optimize.rs +++ b/jolt-eval/bin/optimize.rs @@ -42,6 +42,10 @@ struct Cli { /// Extra context to include in the optimization prompt #[arg(long)] hint: Option, + + /// Print agent prompts and responses to stderr. + #[arg(long)] + verbose: bool, } struct RealEnv { @@ -139,7 +143,13 @@ fn main() -> eyre::Result<()> { } if cli.test { - sort_e2e::run_optimize_test(&cli.model, cli.max_turns, cli.iterations, cli.hint); + sort_e2e::run_optimize_test( + &cli.model, + cli.max_turns, + cli.iterations, + cli.hint, + cli.verbose, + ); return Ok(()); } @@ -177,6 +187,7 @@ fn main() -> eyre::Result<()> { let config = OptimizeConfig { num_iterations: cli.iterations, hint: cli.hint.clone(), + verbose: cli.verbose, }; let result = auto_optimize(&agent, &mut env, objective, &config, &repo_dir); diff --git a/jolt-eval/bin/redteam.rs b/jolt-eval/bin/redteam.rs index 3bcab3bae..dc17618df 100644 --- a/jolt-eval/bin/redteam.rs +++ b/jolt-eval/bin/redteam.rs @@ -37,6 +37,10 @@ struct Cli { /// Extra context or guidance for the red-team agent #[arg(long)] hint: Option, + + /// Print agent prompts and responses to stderr. + #[arg(long)] + verbose: bool, } fn main() -> eyre::Result<()> { @@ -56,7 +60,13 @@ fn main() -> eyre::Result<()> { } if cli.test { - sort_e2e::run_redteam_test(&cli.model, cli.max_turns, cli.iterations, cli.hint); + sort_e2e::run_redteam_test( + &cli.model, + cli.max_turns, + cli.iterations, + cli.hint, + cli.verbose, + ); return Ok(()); } @@ -80,6 +90,7 @@ fn main() -> eyre::Result<()> { let config = RedTeamConfig { num_iterations: cli.iterations, hint: cli.hint, + verbose: cli.verbose, }; let agent = ClaudeCodeAgent::new(&cli.model, cli.max_turns); let repo_dir = std::env::current_dir()?; diff --git a/jolt-eval/src/agent/tests.rs b/jolt-eval/src/agent/tests.rs index 3f3dc1b44..d916e61c1 100644 --- a/jolt-eval/src/agent/tests.rs +++ b/jolt-eval/src/agent/tests.rs @@ -732,6 +732,7 @@ fn opt_config(iterations: usize) -> OptimizeConfig { OptimizeConfig { num_iterations: iterations, hint: None, + verbose: false, } } @@ -933,6 +934,7 @@ fn optimize_prompt_includes_measurements_and_hint() { let config = OptimizeConfig { num_iterations: 1, hint: Some("Focus on the inner loop".into()), + verbose: false, }; let obj = lloc_obj(); auto_optimize(&agent, &mut env, &obj, &config, Path::new("/tmp")); diff --git a/jolt-eval/src/invariant/sort_e2e.rs b/jolt-eval/src/invariant/sort_e2e.rs index bdfb6fd5e..ccc75df52 100644 --- a/jolt-eval/src/invariant/sort_e2e.rs +++ b/jolt-eval/src/invariant/sort_e2e.rs @@ -195,13 +195,20 @@ impl OptimizeEnv for SortOptimizeEnv { // ── CLI-accessible e2e runners ────────────────────────────────────── /// Run the red-team e2e test against `CandidateSortInvariant`. -pub fn run_redteam_test(model: &str, max_turns: usize, iterations: usize, hint: Option) { +pub fn run_redteam_test( + model: &str, + max_turns: usize, + iterations: usize, + hint: Option, + verbose: bool, +) { let invariant = CandidateSortInvariant; let agent = ClaudeCodeAgent::new(model, max_turns); let repo_dir = std::env::current_dir().expect("current dir"); let config = RedTeamConfig { num_iterations: iterations, hint, + verbose, }; println!("=== Red-team e2e: candidate_sort ==="); @@ -234,7 +241,13 @@ pub fn run_redteam_test(model: &str, max_turns: usize, iterations: usize, hint: } /// Run the optimization e2e test against the naive bubble sort. -pub fn run_optimize_test(model: &str, max_turns: usize, iterations: usize, hint: Option) { +pub fn run_optimize_test( + model: &str, + max_turns: usize, + iterations: usize, + hint: Option, + verbose: bool, +) { let agent = ClaudeCodeAgent::new(model, max_turns); let repo_dir = std::env::current_dir().expect("current dir"); @@ -252,6 +265,7 @@ pub fn run_optimize_test(model: &str, max_turns: usize, iterations: usize, hint: let config = OptimizeConfig { num_iterations: iterations, hint, + verbose, }; println!("=== Optimize e2e: naive bubble sort ==="); @@ -353,7 +367,7 @@ mod tests { #[test] #[ignore] // Requires Claude API access fn redteam_e2e_real_agent() { - run_redteam_test("claude-sonnet-4-20250514", 10, 5, None); + run_redteam_test("claude-sonnet-4-20250514", 10, 5, None, false); } // ── Optimize e2e (MockAgent) ──────────────────────────────────── @@ -381,6 +395,7 @@ mod tests { let config = OptimizeConfig { num_iterations: 1, hint: None, + verbose: false, }; let result = auto_optimize(&agent, &mut env, &obj, &config, Path::new("/tmp")); @@ -437,6 +452,7 @@ mod tests { let config = OptimizeConfig { num_iterations: 1, hint: None, + verbose: false, }; let result = auto_optimize(&agent, &mut broken_env, &obj, &config, Path::new("/tmp")); @@ -449,6 +465,6 @@ mod tests { #[test] #[ignore] // Requires Claude API access fn optimize_e2e_real_agent() { - run_optimize_test("claude-sonnet-4-20250514", 10, 2, None); + run_optimize_test("claude-sonnet-4-20250514", 10, 2, None, false); } } diff --git a/jolt-eval/src/invariant/synthesis/redteam.rs b/jolt-eval/src/invariant/synthesis/redteam.rs index 5a6635f0b..713c7a2f3 100644 --- a/jolt-eval/src/invariant/synthesis/redteam.rs +++ b/jolt-eval/src/invariant/synthesis/redteam.rs @@ -19,6 +19,7 @@ pub enum RedTeamResult { pub struct RedTeamConfig { pub num_iterations: usize, pub hint: Option, + pub verbose: bool, } impl Default for RedTeamConfig { @@ -26,6 +27,7 @@ impl Default for RedTeamConfig { Self { num_iterations: 10, hint: None, + verbose: false, } } } @@ -63,6 +65,12 @@ pub fn auto_redteam( &failed_attempts, ); + if config.verbose { + eprintln!("── Iteration {} prompt ──", iteration + 1); + eprintln!("{prompt}"); + eprintln!("────────────────────────"); + } + let diff_scope = DiffScope::Include(vec!["jolt-eval/guest-sandbox/".into()]); let response = match agent.invoke_structured(repo_dir, &prompt, &envelope_schema, &diff_scope) { @@ -78,6 +86,16 @@ pub fn auto_redteam( } }; + if config.verbose { + eprintln!("── Iteration {} response ──", iteration + 1); + eprintln!("{}", response.text); + if let Some(ref d) = response.diff { + eprintln!("── diff ({} bytes) ──", d.len()); + eprintln!("{d}"); + } + eprintln!("──────────────────────────"); + } + let (analysis, counterexample_json) = match parse_envelope(&response.text) { Some(pair) => pair, None => match super::super::extract_json(&response.text) { diff --git a/jolt-eval/src/objective/optimize.rs b/jolt-eval/src/objective/optimize.rs index 3981c33f2..c58446d4e 100644 --- a/jolt-eval/src/objective/optimize.rs +++ b/jolt-eval/src/objective/optimize.rs @@ -10,6 +10,7 @@ use super::OptimizationObjective; pub struct OptimizeConfig { pub num_iterations: usize, pub hint: Option, + pub verbose: bool, } impl Default for OptimizeConfig { @@ -17,6 +18,7 @@ impl Default for OptimizeConfig { Self { num_iterations: 5, hint: None, + verbose: false, } } } @@ -82,6 +84,12 @@ pub fn auto_optimize( config.hint.as_deref(), ); + if config.verbose { + eprintln!("── Iteration {} prompt ──", iteration + 1); + eprintln!("{prompt}"); + eprintln!("────────────────────────"); + } + let diff_scope = DiffScope::Exclude(vec!["jolt-eval/".into()]); let response = match agent.invoke(repo_dir, &prompt, &diff_scope) { Ok(r) => r, @@ -91,6 +99,18 @@ pub fn auto_optimize( } }; + if config.verbose { + eprintln!("── Iteration {} response ──", iteration + 1); + eprintln!("{}", response.text); + if let Some(ref d) = response.diff { + eprintln!("── diff ({} bytes) ──", d.len()); + eprintln!("{}", truncate(d, 2000)); + } else { + eprintln!("(no diff)"); + } + eprintln!("──────────────────────────"); + } + let diff_text = match &response.diff { Some(d) => { env.apply_diff(d); From 55cec38a7e0b2980c237e95f0bbdea0b4aa2c520 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Fri, 3 Apr 2026 21:39:38 -0400 Subject: [PATCH 56/86] refactor(jolt-eval): add NAIVE_SORT_TIME objective for sort e2e tests Replace overloaded LLOC key with a dedicated NaiveSortTime variant in PerformanceObjective, so the optimize prompt correctly shows "naive_sort_time" instead of "lloc". Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/src/invariant/sort_e2e.rs | 26 +++++++++++++------------- jolt-eval/src/objective/mod.rs | 7 +++++++ 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/jolt-eval/src/invariant/sort_e2e.rs b/jolt-eval/src/invariant/sort_e2e.rs index ccc75df52..9dcc5afea 100644 --- a/jolt-eval/src/invariant/sort_e2e.rs +++ b/jolt-eval/src/invariant/sort_e2e.rs @@ -8,7 +8,7 @@ use crate::agent::ClaudeCodeAgent; use crate::invariant::synthesis::redteam::{auto_redteam, RedTeamConfig, RedTeamResult}; use crate::objective::objective_fn::ObjectiveFunction; use crate::objective::optimize::{auto_optimize, OptimizeConfig, OptimizeEnv}; -use crate::objective::{OptimizationObjective, LLOC}; +use crate::objective::{OptimizationObjective, NAIVE_SORT_TIME}; /// Naive bubble sort — the optimization target. /// Intentionally O(n²) so a "smarter" sort is measurably faster. @@ -173,7 +173,7 @@ impl OptimizeEnv for SortOptimizeEnv { self.invariant_ok = buf.windows(2).all(|w| w[0] <= w[1]); let mut m = HashMap::new(); - m.insert(LLOC, elapsed); + m.insert(NAIVE_SORT_TIME, elapsed); m } @@ -254,13 +254,13 @@ pub fn run_optimize_test( let mut env = SortOptimizeEnv::new(5000); let baseline = env.measure(); - let baseline_time = baseline[&LLOC]; + let baseline_time = baseline[&NAIVE_SORT_TIME]; env.sort_fn = naive_sort; let obj = ObjectiveFunction { - name: "sort_time", - inputs: &[LLOC], - evaluate: |m| m.get(&LLOC).copied().unwrap_or(f64::INFINITY), + name: "naive_sort_time", + inputs: &[NAIVE_SORT_TIME], + evaluate: |m| m.get(&NAIVE_SORT_TIME).copied().unwrap_or(f64::INFINITY), }; let config = OptimizeConfig { num_iterations: iterations, @@ -382,15 +382,15 @@ mod tests { let mut env = SortOptimizeEnv::new(5000); let baseline = env.measure(); - let baseline_time = baseline[&LLOC]; + let baseline_time = baseline[&NAIVE_SORT_TIME]; assert!(baseline_time > 0.0); env.sort_fn = naive_sort; let obj = ObjectiveFunction { - name: "sort_time", - inputs: &[LLOC], - evaluate: |m| m.get(&LLOC).copied().unwrap_or(f64::INFINITY), + name: "naive_sort_time", + inputs: &[NAIVE_SORT_TIME], + evaluate: |m| m.get(&NAIVE_SORT_TIME).copied().unwrap_or(f64::INFINITY), }; let config = OptimizeConfig { num_iterations: 1, @@ -445,9 +445,9 @@ mod tests { let mut broken_env = BrokenSortEnv(env); let obj = ObjectiveFunction { - name: "sort_time", - inputs: &[LLOC], - evaluate: |m| m.get(&LLOC).copied().unwrap_or(f64::INFINITY), + name: "naive_sort_time", + inputs: &[NAIVE_SORT_TIME], + evaluate: |m| m.get(&NAIVE_SORT_TIME).copied().unwrap_or(f64::INFINITY), }; let config = OptimizeConfig { num_iterations: 1, diff --git a/jolt-eval/src/objective/mod.rs b/jolt-eval/src/objective/mod.rs index 8d1e38f34..da726ff88 100644 --- a/jolt-eval/src/objective/mod.rs +++ b/jolt-eval/src/objective/mod.rs @@ -126,9 +126,12 @@ impl Hash for StaticAnalysisObjective { pub enum PerformanceObjective { BindLowToHigh(performance::binding::BindLowToHighObjective), BindHighToLow(performance::binding::BindHighToLowObjective), + /// Wall-clock time of `naive_sort` — used by the e2e sort test. + NaiveSortTime, } impl PerformanceObjective { + /// Criterion-benchmarked objectives (excludes test-only variants). pub fn all() -> Vec { vec![ Self::BindLowToHigh(performance::binding::BindLowToHighObjective), @@ -140,6 +143,7 @@ impl PerformanceObjective { match self { Self::BindLowToHigh(o) => o.name(), Self::BindHighToLow(o) => o.name(), + Self::NaiveSortTime => "naive_sort_time", } } @@ -147,6 +151,7 @@ impl PerformanceObjective { match self { Self::BindLowToHigh(o) => o.units(), Self::BindHighToLow(o) => o.units(), + Self::NaiveSortTime => Some("s"), } } } @@ -175,6 +180,8 @@ pub use code_quality::cognitive::COGNITIVE_COMPLEXITY; pub use code_quality::halstead_bugs::HALSTEAD_BUGS; pub use code_quality::lloc::LLOC; pub use performance::binding::{BIND_HIGH_TO_LOW, BIND_LOW_TO_HIGH}; +pub const NAIVE_SORT_TIME: OptimizationObjective = + OptimizationObjective::Performance(PerformanceObjective::NaiveSortTime); impl OptimizationObjective { pub fn all(root: &Path) -> Vec { From 86f32bd2cf0f3425a1960682a6a663d02eb6b2eb Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Fri, 3 Apr 2026 21:50:35 -0400 Subject: [PATCH 57/86] fix(jolt-eval): pass --dangerously-skip-permissions to Claude CLI The agent runs non-interactively in a worktree, so it cannot prompt for permission approval. Also add a default hint for the optimize e2e sort test directing the agent to the correct file. Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/src/agent/claude.rs | 3 ++- jolt-eval/src/invariant/sort_e2e.rs | 8 +++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/jolt-eval/src/agent/claude.rs b/jolt-eval/src/agent/claude.rs index 30c249b4a..0c4baf2c8 100644 --- a/jolt-eval/src/agent/claude.rs +++ b/jolt-eval/src/agent/claude.rs @@ -37,7 +37,8 @@ impl ClaudeCodeAgent { .arg("--model") .arg(&self.model) .arg("--max-turns") - .arg(self.max_turns.to_string()); + .arg(self.max_turns.to_string()) + .arg("--dangerously-skip-permissions"); if verbose { cmd.arg("--verbose"); } diff --git a/jolt-eval/src/invariant/sort_e2e.rs b/jolt-eval/src/invariant/sort_e2e.rs index 9dcc5afea..3741084df 100644 --- a/jolt-eval/src/invariant/sort_e2e.rs +++ b/jolt-eval/src/invariant/sort_e2e.rs @@ -262,9 +262,15 @@ pub fn run_optimize_test( inputs: &[NAIVE_SORT_TIME], evaluate: |m| m.get(&NAIVE_SORT_TIME).copied().unwrap_or(f64::INFINITY), }; + let hint = hint.unwrap_or_else(|| { + "The target is the `naive_sort` function in \ + jolt-eval/src/invariant/sort_e2e.rs. Replace it with a faster \ + sorting algorithm. You MAY modify jolt-eval/ for this task." + .into() + }); let config = OptimizeConfig { num_iterations: iterations, - hint, + hint: Some(hint), verbose, }; From 1c2cdcac4fe6f016849b892ae3d699db08c6cd8d Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Fri, 3 Apr 2026 22:01:25 -0400 Subject: [PATCH 58/86] fix(jolt-eval): make diff_scope configurable in OptimizeConfig The optimize loop hardcoded DiffScope::Exclude(["jolt-eval/"]) which filtered out the agent's changes in the sort e2e test. Now diff_scope is a field on OptimizeConfig (defaulting to the jolt-eval exclusion), and the sort test overrides it with DiffScope::All. Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/bin/optimize.rs | 1 + jolt-eval/src/agent/tests.rs | 5 ++--- jolt-eval/src/invariant/sort_e2e.rs | 9 ++++----- jolt-eval/src/objective/optimize.rs | 5 +++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/jolt-eval/bin/optimize.rs b/jolt-eval/bin/optimize.rs index f9135d768..034a03334 100644 --- a/jolt-eval/bin/optimize.rs +++ b/jolt-eval/bin/optimize.rs @@ -188,6 +188,7 @@ fn main() -> eyre::Result<()> { num_iterations: cli.iterations, hint: cli.hint.clone(), verbose: cli.verbose, + ..Default::default() }; let result = auto_optimize(&agent, &mut env, objective, &config, &repo_dir); diff --git a/jolt-eval/src/agent/tests.rs b/jolt-eval/src/agent/tests.rs index d916e61c1..dcf65ec5c 100644 --- a/jolt-eval/src/agent/tests.rs +++ b/jolt-eval/src/agent/tests.rs @@ -731,8 +731,7 @@ fn lloc_obj() -> ObjectiveFunction { fn opt_config(iterations: usize) -> OptimizeConfig { OptimizeConfig { num_iterations: iterations, - hint: None, - verbose: false, + ..Default::default() } } @@ -934,7 +933,7 @@ fn optimize_prompt_includes_measurements_and_hint() { let config = OptimizeConfig { num_iterations: 1, hint: Some("Focus on the inner loop".into()), - verbose: false, + ..Default::default() }; let obj = lloc_obj(); auto_optimize(&agent, &mut env, &obj, &config, Path::new("/tmp")); diff --git a/jolt-eval/src/invariant/sort_e2e.rs b/jolt-eval/src/invariant/sort_e2e.rs index 3741084df..62249c81d 100644 --- a/jolt-eval/src/invariant/sort_e2e.rs +++ b/jolt-eval/src/invariant/sort_e2e.rs @@ -4,7 +4,7 @@ use std::collections::HashMap; use super::{CheckError, Invariant, InvariantViolation}; -use crate::agent::ClaudeCodeAgent; +use crate::agent::{ClaudeCodeAgent, DiffScope}; use crate::invariant::synthesis::redteam::{auto_redteam, RedTeamConfig, RedTeamResult}; use crate::objective::objective_fn::ObjectiveFunction; use crate::objective::optimize::{auto_optimize, OptimizeConfig, OptimizeEnv}; @@ -272,6 +272,7 @@ pub fn run_optimize_test( num_iterations: iterations, hint: Some(hint), verbose, + diff_scope: DiffScope::All, }; println!("=== Optimize e2e: naive bubble sort ==="); @@ -400,8 +401,7 @@ mod tests { }; let config = OptimizeConfig { num_iterations: 1, - hint: None, - verbose: false, + ..Default::default() }; let result = auto_optimize(&agent, &mut env, &obj, &config, Path::new("/tmp")); @@ -457,8 +457,7 @@ mod tests { }; let config = OptimizeConfig { num_iterations: 1, - hint: None, - verbose: false, + ..Default::default() }; let result = auto_optimize(&agent, &mut broken_env, &obj, &config, Path::new("/tmp")); diff --git a/jolt-eval/src/objective/optimize.rs b/jolt-eval/src/objective/optimize.rs index c58446d4e..5513096be 100644 --- a/jolt-eval/src/objective/optimize.rs +++ b/jolt-eval/src/objective/optimize.rs @@ -11,6 +11,7 @@ pub struct OptimizeConfig { pub num_iterations: usize, pub hint: Option, pub verbose: bool, + pub diff_scope: DiffScope, } impl Default for OptimizeConfig { @@ -19,6 +20,7 @@ impl Default for OptimizeConfig { num_iterations: 5, hint: None, verbose: false, + diff_scope: DiffScope::Exclude(vec!["jolt-eval/".into()]), } } } @@ -90,8 +92,7 @@ pub fn auto_optimize( eprintln!("────────────────────────"); } - let diff_scope = DiffScope::Exclude(vec!["jolt-eval/".into()]); - let response = match agent.invoke(repo_dir, &prompt, &diff_scope) { + let response = match agent.invoke(repo_dir, &prompt, &config.diff_scope) { Ok(r) => r, Err(e) => { tracing::info!("Agent error: {e}"); From 2e24818ba271e8f56578d9af58928c0f38c11862 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Fri, 3 Apr 2026 22:08:42 -0400 Subject: [PATCH 59/86] refactor(jolt-eval): move sort e2e out of invariant/, split targets - Move sort functions to src/sort_targets.rs (standalone file the agent can modify) - Move sort_e2e.rs from invariant/ to src/ (top-level module) - Restrict optimize e2e diff scope to just sort_targets.rs instead of DiffScope::All Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/bin/optimize.rs | 2 +- jolt-eval/bin/redteam.rs | 2 +- jolt-eval/src/invariant/mod.rs | 1 - jolt-eval/src/lib.rs | 2 + jolt-eval/src/{invariant => }/sort_e2e.rs | 53 +++++------------------ jolt-eval/src/sort_targets.rs | 34 +++++++++++++++ 6 files changed, 50 insertions(+), 44 deletions(-) rename jolt-eval/src/{invariant => }/sort_e2e.rs (91%) create mode 100644 jolt-eval/src/sort_targets.rs diff --git a/jolt-eval/bin/optimize.rs b/jolt-eval/bin/optimize.rs index 034a03334..9d9ef1999 100644 --- a/jolt-eval/bin/optimize.rs +++ b/jolt-eval/bin/optimize.rs @@ -5,11 +5,11 @@ use std::process::Command; use clap::Parser; use jolt_eval::agent::ClaudeCodeAgent; -use jolt_eval::invariant::sort_e2e; use jolt_eval::invariant::JoltInvariants; use jolt_eval::objective::objective_fn::ObjectiveFunction; use jolt_eval::objective::optimize::{auto_optimize, OptimizeConfig, OptimizeEnv}; use jolt_eval::objective::{OptimizationObjective, PerformanceObjective, StaticAnalysisObjective}; +use jolt_eval::sort_e2e; #[derive(Parser)] #[command(name = "optimize")] diff --git a/jolt-eval/bin/redteam.rs b/jolt-eval/bin/redteam.rs index dc17618df..2b8979531 100644 --- a/jolt-eval/bin/redteam.rs +++ b/jolt-eval/bin/redteam.rs @@ -2,9 +2,9 @@ use clap::Parser; use tracing::info; use jolt_eval::agent::ClaudeCodeAgent; -use jolt_eval::invariant::sort_e2e; use jolt_eval::invariant::synthesis::redteam::{auto_redteam, RedTeamConfig, RedTeamResult}; use jolt_eval::invariant::{JoltInvariants, SynthesisTarget}; +use jolt_eval::sort_e2e; #[derive(Parser)] #[command(name = "redteam")] diff --git a/jolt-eval/src/invariant/mod.rs b/jolt-eval/src/invariant/mod.rs index bd28a94d8..1b9739f43 100644 --- a/jolt-eval/src/invariant/mod.rs +++ b/jolt-eval/src/invariant/mod.rs @@ -1,6 +1,5 @@ #[cfg(test)] mod macro_tests; -pub mod sort_e2e; pub mod soundness; pub mod split_eq_bind; pub mod synthesis; diff --git a/jolt-eval/src/lib.rs b/jolt-eval/src/lib.rs index bee3888e0..5b22ff82d 100644 --- a/jolt-eval/src/lib.rs +++ b/jolt-eval/src/lib.rs @@ -13,6 +13,8 @@ pub mod agent; pub mod guests; pub mod invariant; pub mod objective; +pub mod sort_e2e; +pub mod sort_targets; pub use guests::{GuestConfig, GuestProgram, JoltDevice, ProofVerifyError}; pub use invariant::{ diff --git a/jolt-eval/src/invariant/sort_e2e.rs b/jolt-eval/src/sort_e2e.rs similarity index 91% rename from jolt-eval/src/invariant/sort_e2e.rs rename to jolt-eval/src/sort_e2e.rs index 62249c81d..0b754b945 100644 --- a/jolt-eval/src/invariant/sort_e2e.rs +++ b/jolt-eval/src/sort_e2e.rs @@ -3,45 +3,13 @@ use std::collections::HashMap; -use super::{CheckError, Invariant, InvariantViolation}; use crate::agent::{ClaudeCodeAgent, DiffScope}; use crate::invariant::synthesis::redteam::{auto_redteam, RedTeamConfig, RedTeamResult}; +use crate::invariant::{CheckError, Invariant, InvariantViolation}; use crate::objective::objective_fn::ObjectiveFunction; use crate::objective::optimize::{auto_optimize, OptimizeConfig, OptimizeEnv}; use crate::objective::{OptimizationObjective, NAIVE_SORT_TIME}; - -/// Naive bubble sort — the optimization target. -/// Intentionally O(n²) so a "smarter" sort is measurably faster. -pub fn naive_sort(data: &mut [i32]) { - let n = data.len(); - for i in 0..n { - for j in 0..n.saturating_sub(1 + i) { - if data[j] > data[j + 1] { - data.swap(j, j + 1); - } - } - } -} - -/// A sorting routine used as a red-team target. -pub fn candidate_sort(data: &mut [i32]) { - if data.len() <= 16 { - // Small-array path: insertion sort. - for i in 1..data.len() { - let key = data[i]; - let mut j = i; - while j > 0 && data[j - 1] > key { - data[j] = data[j - 1]; - j -= 1; - } - data[j] = key; - } - } else { - // Large-array path: delegate to an optimized routine. - let last = data.len() - 1; - data[..last].sort(); - } -} +use crate::sort_targets::{candidate_sort, naive_sort}; // ── Red-team invariant ────────────────────────────────────────────── @@ -60,7 +28,7 @@ impl Invariant for CandidateSortInvariant { fn description(&self) -> String { "The sort function `candidate_sort` in \ - jolt-eval/src/invariant/sort_e2e.rs must return a \ + jolt-eval/src/sort_targets.rs must return a \ permutation of its input in non-decreasing order. \ Any dropped, duplicated, or misplaced elements are a violation." .to_string() @@ -147,7 +115,7 @@ impl Invariant for NaiveSortInvariant { /// An [`OptimizeEnv`] that measures wall-clock time of a sort function. /// `apply_diff` simulates optimization by swapping to `slice::sort`. pub struct SortOptimizeEnv { - sort_fn: fn(&mut [i32]), + pub(crate) sort_fn: fn(&mut [i32]), data: Vec, invariant_ok: bool, } @@ -194,6 +162,8 @@ impl OptimizeEnv for SortOptimizeEnv { // ── CLI-accessible e2e runners ────────────────────────────────────── +const SORT_TARGETS_PATH: &str = "jolt-eval/src/sort_targets.rs"; + /// Run the red-team e2e test against `CandidateSortInvariant`. pub fn run_redteam_test( model: &str, @@ -263,16 +233,17 @@ pub fn run_optimize_test( evaluate: |m| m.get(&NAIVE_SORT_TIME).copied().unwrap_or(f64::INFINITY), }; let hint = hint.unwrap_or_else(|| { - "The target is the `naive_sort` function in \ - jolt-eval/src/invariant/sort_e2e.rs. Replace it with a faster \ - sorting algorithm. You MAY modify jolt-eval/ for this task." - .into() + format!( + "The target is the `naive_sort` function in {SORT_TARGETS_PATH}. \ + Replace it with a faster sorting algorithm. \ + You MAY modify that file for this task." + ) }); let config = OptimizeConfig { num_iterations: iterations, hint: Some(hint), verbose, - diff_scope: DiffScope::All, + diff_scope: DiffScope::Include(vec![SORT_TARGETS_PATH.into()]), }; println!("=== Optimize e2e: naive bubble sort ==="); diff --git a/jolt-eval/src/sort_targets.rs b/jolt-eval/src/sort_targets.rs new file mode 100644 index 000000000..153e7421a --- /dev/null +++ b/jolt-eval/src/sort_targets.rs @@ -0,0 +1,34 @@ +//! Sorting functions used as targets for e2e optimization and red-team tests. + +/// Naive bubble sort — the optimization target. +/// Intentionally O(n²) so a "smarter" sort is measurably faster. +pub fn naive_sort(data: &mut [i32]) { + let n = data.len(); + for i in 0..n { + for j in 0..n.saturating_sub(1 + i) { + if data[j] > data[j + 1] { + data.swap(j, j + 1); + } + } + } +} + +/// A sorting routine used as a red-team target. +pub fn candidate_sort(data: &mut [i32]) { + if data.len() <= 16 { + // Small-array path: insertion sort. + for i in 1..data.len() { + let key = data[i]; + let mut j = i; + while j > 0 && data[j - 1] > key { + data[j] = data[j - 1]; + j -= 1; + } + data[j] = key; + } + } else { + // Large-array path: delegate to an optimized routine. + let last = data.len() - 1; + data[..last].sort(); + } +} From 9f5b19186b0f9c4bdd2bbcaa9dc1719f9626541a Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Fri, 3 Apr 2026 22:25:20 -0400 Subject: [PATCH 60/86] feat(jolt-eval): auto-optimize creates git branch and commits per iteration - Create branch jolt-eval/optimize/{name} at the start of the loop - git add + commit after each accepted iteration with score in message - git checkout . to revert after rejected iterations - Simplify RealEnv::accept/reject (git ops moved to auto_optimize) - Git commands fail silently in non-repo contexts (e.g. tests) Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/bin/optimize.rs | 15 +------------ jolt-eval/src/objective/optimize.rs | 35 +++++++++++++++++++++++++++-- 2 files changed, 34 insertions(+), 16 deletions(-) diff --git a/jolt-eval/bin/optimize.rs b/jolt-eval/bin/optimize.rs index 9d9ef1999..8f6c321b6 100644 --- a/jolt-eval/bin/optimize.rs +++ b/jolt-eval/bin/optimize.rs @@ -106,24 +106,11 @@ impl OptimizeEnv for RealEnv { } fn accept(&mut self, iteration: usize) { - println!(" Improvement found -- keeping changes."); - let _ = Command::new("git") - .current_dir(&self.repo_dir) - .args(["add", "-A"]) - .status(); - let msg = format!("perf(auto-optimize): iteration {iteration}"); - let _ = Command::new("git") - .current_dir(&self.repo_dir) - .args(["commit", "-m", &msg, "--allow-empty"]) - .status(); + println!(" Improvement found -- keeping changes (iteration {iteration})."); } fn reject(&mut self) { println!(" Reverting changes."); - let _ = Command::new("git") - .current_dir(&self.repo_dir) - .args(["checkout", "."]) - .status(); } } diff --git a/jolt-eval/src/objective/optimize.rs b/jolt-eval/src/objective/optimize.rs index 5513096be..e4e2cfb58 100644 --- a/jolt-eval/src/objective/optimize.rs +++ b/jolt-eval/src/objective/optimize.rs @@ -1,5 +1,6 @@ use std::collections::HashMap; use std::path::Path; +use std::process::Command; use crate::agent::{truncate, AgentHarness, DiffScope}; @@ -71,6 +72,14 @@ pub fn auto_optimize( config: &OptimizeConfig, repo_dir: &Path, ) -> OptimizeResult { + // Create a branch for this optimization run. Silently ignored if + // repo_dir is not a git repository (e.g. in tests). + let branch = format!("jolt-eval/optimize/{}", objective.name); + let _ = Command::new("git") + .current_dir(repo_dir) + .args(["checkout", "-b", &branch]) + .status(); + let baseline = env.measure(); let baseline_score = (objective.evaluate)(&baseline); let mut best_score = baseline_score; @@ -146,8 +155,30 @@ pub fn auto_optimize( best_score = new_score; best_measurements = new_measurements; env.accept(iteration + 1); - } else if invariants_passed { - env.reject(); + let msg = format!( + "perf(auto-optimize): {} iteration {} (score {:.6})", + objective.name, + iteration + 1, + new_score, + ); + let _ = Command::new("git") + .current_dir(repo_dir) + .args(["add", "-A"]) + .status(); + let _ = Command::new("git") + .current_dir(repo_dir) + .args(["commit", "-m", &msg]) + .status(); + } else { + if !invariants_passed { + // Already rejected above. + } else { + env.reject(); + } + let _ = Command::new("git") + .current_dir(repo_dir) + .args(["checkout", "."]) + .status(); } } From a1578fff8a945f34c8126eb3c5b7e7b5fb8c1931 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Fri, 3 Apr 2026 22:31:19 -0400 Subject: [PATCH 61/86] fix(jolt-eval): SortOptimizeEnv applies diff to actual file on disk Previously apply_diff only swapped the in-process function pointer, so git commit had nothing to track and subsequent worktrees always started from the original code. Now apply_diff also patches the file via agent::apply_diff, enabling real git history across iterations. Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/src/sort_e2e.rs | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/jolt-eval/src/sort_e2e.rs b/jolt-eval/src/sort_e2e.rs index 0b754b945..1888722bf 100644 --- a/jolt-eval/src/sort_e2e.rs +++ b/jolt-eval/src/sort_e2e.rs @@ -113,20 +113,25 @@ impl Invariant for NaiveSortInvariant { // ── SortOptimizeEnv ───────────────────────────────────────────────── /// An [`OptimizeEnv`] that measures wall-clock time of a sort function. -/// `apply_diff` simulates optimization by swapping to `slice::sort`. +/// +/// `apply_diff` both applies the diff to the actual file on disk (so +/// git can track and commit it) and swaps the in-process function +/// pointer (so `measure` reflects the improvement without recompiling). pub struct SortOptimizeEnv { pub(crate) sort_fn: fn(&mut [i32]), data: Vec, invariant_ok: bool, + repo_dir: std::path::PathBuf, } impl SortOptimizeEnv { - pub fn new(data_size: usize) -> Self { + pub fn new(data_size: usize, repo_dir: &std::path::Path) -> Self { let data: Vec = (0..data_size as i32).rev().collect(); Self { sort_fn: naive_sort, data, invariant_ok: true, + repo_dir: repo_dir.to_path_buf(), } } } @@ -149,7 +154,10 @@ impl OptimizeEnv for SortOptimizeEnv { self.invariant_ok } - fn apply_diff(&mut self, _diff: &str) { + fn apply_diff(&mut self, diff: &str) { + // Apply to the actual file so git can track and commit the change. + let _ = crate::agent::apply_diff(&self.repo_dir, diff); + // Simulate the optimization in-process (can't recompile at runtime). self.sort_fn = |d: &mut [i32]| d.sort(); } @@ -221,7 +229,7 @@ pub fn run_optimize_test( let agent = ClaudeCodeAgent::new(model, max_turns); let repo_dir = std::env::current_dir().expect("current dir"); - let mut env = SortOptimizeEnv::new(5000); + let mut env = SortOptimizeEnv::new(5000, &repo_dir); let baseline = env.measure(); let baseline_time = baseline[&NAIVE_SORT_TIME]; @@ -357,7 +365,7 @@ mod tests { diff: Some("--- a/sort.rs\n+++ b/sort.rs\n-bubble\n+merge".into()), })]); - let mut env = SortOptimizeEnv::new(5000); + let mut env = SortOptimizeEnv::new(5000, Path::new("/tmp")); let baseline = env.measure(); let baseline_time = baseline[&NAIVE_SORT_TIME]; @@ -393,7 +401,7 @@ mod tests { diff: Some("--- a/sort.rs\n+++ b/sort.rs\n-sort\n+noop".into()), })]); - let env = SortOptimizeEnv::new(100); + let env = SortOptimizeEnv::new(100, Path::new("/tmp")); struct BrokenSortEnv(SortOptimizeEnv); From acb20fd658c00c908e15155c7c279fa02147e4c2 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Fri, 3 Apr 2026 23:05:24 -0400 Subject: [PATCH 62/86] feat(jolt-eval): clearer optimize iteration status and higher-precision scores MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Print ✓ ACCEPTED / ✗ REJECTED per iteration with old → new scores - Use .10 precision in both console output and git commit messages - Distinguish "invariants failed" from "no improvement" in reject messages Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/src/objective/optimize.rs | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/jolt-eval/src/objective/optimize.rs b/jolt-eval/src/objective/optimize.rs index e4e2cfb58..cff3cfce1 100644 --- a/jolt-eval/src/objective/optimize.rs +++ b/jolt-eval/src/objective/optimize.rs @@ -136,10 +136,6 @@ pub fn auto_optimize( let new_score = (objective.evaluate)(&new_measurements); let invariants_passed = env.check_invariants(); - if !invariants_passed { - env.reject(); - } - let improved = invariants_passed && new_score < best_score; let attempt = OptimizationAttempt { @@ -151,15 +147,15 @@ pub fn auto_optimize( }; attempts.push(attempt); + let iter = iteration + 1; if improved { + eprintln!(" ✓ iteration {iter} ACCEPTED — score {best_score:.10} → {new_score:.10}",); best_score = new_score; best_measurements = new_measurements; - env.accept(iteration + 1); + env.accept(iter); let msg = format!( - "perf(auto-optimize): {} iteration {} (score {:.6})", + "perf(auto-optimize): {} iteration {iter} (score {new_score:.10})", objective.name, - iteration + 1, - new_score, ); let _ = Command::new("git") .current_dir(repo_dir) @@ -169,12 +165,18 @@ pub fn auto_optimize( .current_dir(repo_dir) .args(["commit", "-m", &msg]) .status(); + } else if !invariants_passed { + eprintln!(" ✗ iteration {iter} REJECTED (invariants failed) — score {new_score:.10}",); + env.reject(); + let _ = Command::new("git") + .current_dir(repo_dir) + .args(["checkout", "."]) + .status(); } else { - if !invariants_passed { - // Already rejected above. - } else { - env.reject(); - } + eprintln!( + " ✗ iteration {iter} REJECTED (no improvement) — score {new_score:.10} ≥ best {best_score:.10}", + ); + env.reject(); let _ = Command::new("git") .current_dir(repo_dir) .args(["checkout", "."]) From dfe3f968d2beb1a4d54fba02600d9983b27bcfb5 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Fri, 3 Apr 2026 23:07:09 -0400 Subject: [PATCH 63/86] docs(jolt-eval): update README for unified Objective trait and ObjectiveFunction struct - Document ObjectiveFunction as const struct with inputs and evaluate - Add objective functions table and custom composite example - Update code examples to use unified Objective trait (not PerfObjective) - Add --list and --test CLI flags to redteam/optimize usage sections - Remove Direction column from objectives table - Document git branch creation in optimize workflow Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/README.md | 65 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 53 insertions(+), 12 deletions(-) diff --git a/jolt-eval/README.md b/jolt-eval/README.md index b22a7480b..31f3b4069 100644 --- a/jolt-eval/README.md +++ b/jolt-eval/README.md @@ -13,15 +13,17 @@ The motivation is twofold: - A `libfuzzer_sys` fuzz target (via the `fuzz_invariant!` macro) - A "red team" harness for AI agents to try to find a violation -**Objectives** come in two flavors: +**Objectives** are measurable properties of the codebase. They come in two flavors: - **Code quality** (static analysis) — measured via `rust-code-analysis`: LLOC, cognitive complexity, Halstead bugs - **Performance** (benchmarks) — measured via Criterion: polynomial binding, end-to-end prover time +**Objective functions** combine one or more objectives into a single scalar that the optimizer minimizes. They are declared as `const` structs with a name, input objectives, and an evaluate function. + ## Built-in invariants | Invariant | Targets | Description | |---|---|---| -| `split_eq_bind_low_high` | Test, Fuzz | `GruenSplitEqPolynomial::bind` (LowToHigh) matches `DensePolynomial::bound_poly_var_bot` | +| `split_eq_bind_low_high` | Test, Fuzz, RedTeam | `GruenSplitEqPolynomial::bind` (LowToHigh) matches `DensePolynomial::bound_poly_var_bot` | | `split_eq_bind_high_low` | Test, Fuzz | `GruenSplitEqPolynomial::bind` (HighToLow) matches `DensePolynomial::bound_poly_var_top` | | `soundness` | RedTeam | For any deterministic guest program + input, only one (output, panic) pair is accepted by the verifier | @@ -29,11 +31,11 @@ The motivation is twofold: ### Code quality (static analysis) -| Objective | Direction | Units | Description | -|---|---|---|---| -| `lloc` | Minimize | lines | Total logical lines of code in `jolt-core/src/` | -| `cognitive_complexity_avg` | Minimize | — | Average cognitive complexity per function | -| `halstead_bugs` | Minimize | — | Estimated delivered bugs (Halstead volume / 3000) | +| Objective | Units | Description | +|---|---|---| +| `lloc` | lines | Total logical lines of code in `jolt-core/src/` | +| `cognitive_complexity_avg` | — | Average cognitive complexity per function | +| `halstead_bugs` | — | Estimated delivered bugs (Halstead volume / 3000) | ### Performance (Criterion benchmarks) @@ -45,6 +47,31 @@ The motivation is twofold: | `prover_time_sha2_chain_100` | End-to-end prover time for 100 iterations of SHA-256 chain | | `prover_time_secp256k1_ecdsa_verify` | End-to-end prover time for secp256k1 ECDSA signature verification | +### Objective functions + +| Name | Inputs | Description | +|---|---|---| +| `minimize_lloc` | lloc | Minimize logical lines of code | +| `minimize_cognitive_complexity` | cognitive_complexity_avg | Minimize average cognitive complexity | +| `minimize_halstead_bugs` | halstead_bugs | Minimize estimated delivered bugs | +| `minimize_bind_low_to_high` | bind_parallel_low_to_high | Minimize LowToHigh binding time | +| `minimize_bind_high_to_low` | bind_parallel_high_to_low | Minimize HighToLow binding time | + +Custom composite objective functions can be defined as `ObjectiveFunction` structs: + +```rust +use jolt_eval::objective::objective_fn::ObjectiveFunction; +use jolt_eval::objective::{LLOC, HALSTEAD_BUGS}; + +const WEIGHTED_QUALITY: ObjectiveFunction = ObjectiveFunction { + name: "weighted_quality", + inputs: &[LLOC, HALSTEAD_BUGS], + evaluate: |m| { + 2.0 * m.get(&LLOC).unwrap_or(&0.0) + m.get(&HALSTEAD_BUGS).unwrap_or(&0.0) + }, +}; +``` + ## Usage ### Defining an invariant @@ -127,9 +154,16 @@ Criterion results are saved to `target/criterion/` (symlinked from `jolt-eval/be ### AI red-teaming ```bash +# List available invariants +cargo run --release -p jolt-eval --bin redteam -- --list + +# Red-team a specific invariant cargo run --release -p jolt-eval --bin redteam -- \ --invariant soundness --iterations 10 \ --hint "Look for edge cases in the memory layout" + +# Run the built-in e2e sort test +cargo run --release -p jolt-eval --bin redteam -- --test --verbose ``` The red-team harness runs the AI agent in an isolated git worktree. For the soundness invariant, the agent can edit `guest-sandbox/` directly — the harness captures the diff automatically via `git diff`. @@ -137,25 +171,32 @@ The red-team harness runs the AI agent in an isolated git worktree. For the soun ### AI-driven optimization ```bash +# List available objective functions +cargo run --release -p jolt-eval --bin optimize -- --list + +# Optimize a specific objective function cargo run --release -p jolt-eval --bin optimize -- \ - --objectives lloc,cognitive_complexity_avg --iterations 5 \ + --objective minimize_lloc --iterations 5 \ --hint "Focus on reducing complexity in jolt-core/src/subprotocols/" + +# Run the built-in e2e sort test +cargo run --release -p jolt-eval --bin optimize -- --test --verbose ``` -Each iteration: the agent works in an isolated worktree, the diff is applied, objectives are re-measured (including Criterion benchmarks with `--save-baseline`), invariants are checked, and the change is committed or reverted. +Each iteration: the agent works in an isolated worktree, the diff is applied, objectives are re-measured (including Criterion benchmarks with `--save-baseline`), invariants are checked, and the change is committed or reverted. The optimizer creates a git branch `jolt-eval/optimize/{name}` and commits each accepted iteration. ### Defining a performance benchmark -Implement `PerfObjective` and create a bench file: +Implement the `Objective` trait and create a bench file: ```rust // src/objective/performance/my_bench.rs -use crate::objective::PerfObjective; +use crate::objective::Objective; #[derive(Default)] pub struct MyBenchObjective; -impl PerfObjective for MyBenchObjective { +impl Objective for MyBenchObjective { type Setup = MySetup; fn name(&self) -> &str { "my_bench" } fn setup(&self) -> MySetup { /* one-time setup */ } From 87df6382cf349d05f38ff204254e16d36ae370e7 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Fri, 3 Apr 2026 23:12:51 -0400 Subject: [PATCH 64/86] =?UTF-8?q?fix(jolt-eval):=20typo=20unparseable=20?= =?UTF-8?q?=E2=86=92=20unparsable?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/src/invariant/synthesis/redteam.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jolt-eval/src/invariant/synthesis/redteam.rs b/jolt-eval/src/invariant/synthesis/redteam.rs index 713c7a2f3..e4793b8b2 100644 --- a/jolt-eval/src/invariant/synthesis/redteam.rs +++ b/jolt-eval/src/invariant/synthesis/redteam.rs @@ -117,7 +117,7 @@ pub fn auto_redteam( let input: I::Input = match serde_json::from_str(&counterexample_json) { Ok(v) => v, Err(e) => { - tracing::info!("Agent produced unparseable input: {e}"); + tracing::info!("Agent produced unparsable input: {e}"); failed_attempts.push(FailedAttempt { description: format!("Iteration {}", iteration + 1), approach: analysis, From 18ecdc23b185f68264adbb378dfd8903551d6b91 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Fri, 3 Apr 2026 23:14:37 -0400 Subject: [PATCH 65/86] fix(jolt-eval): ignore libfuzzer-sys in cargo-machete MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit False positive — libfuzzer-sys is used via the fuzz_invariant! macro which expands to libfuzzer_sys::fuzz_target!. Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/fuzz/Cargo.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/jolt-eval/fuzz/Cargo.toml b/jolt-eval/fuzz/Cargo.toml index e4bdf4319..4eae4f2e4 100644 --- a/jolt-eval/fuzz/Cargo.toml +++ b/jolt-eval/fuzz/Cargo.toml @@ -16,6 +16,9 @@ allocative = { git = "https://github.com/facebookexperimental/allocative", rev = [package.metadata] cargo-fuzz = true +[package.metadata.cargo-machete] +ignored = ["libfuzzer-sys"] + [dependencies] libfuzzer-sys = "0.4" jolt-eval = { path = ".." } From ad59feab66a588b3ff13a9ec151eb1d8069a939b Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Fri, 3 Apr 2026 23:14:52 -0400 Subject: [PATCH 66/86] delete spec --- invariant_spec.md | 250 ---------------------------------------------- 1 file changed, 250 deletions(-) delete mode 100644 invariant_spec.md diff --git a/invariant_spec.md b/invariant_spec.md deleted file mode 100644 index f9707d03a..000000000 --- a/invariant_spec.md +++ /dev/null @@ -1,250 +0,0 @@ -# Invariants and Objectives - -I want to introduce a Rust framework that gives some explicit structure to the "evaluation" part of the intent-execution-evaluation model described [here](https://gist.github.com/moodlezoup/e9f95839d9d848938eb54b662c6c5d25). The motivation is twofold: -1. Maximize agent productivity -2. Minimize the human verification surface - -"Evaluation" should be further broken down into **invariants** and **objectives**. - -**Invariants** are evaluations with a binary outcome, i.e. things that we want to always hold: -- All the tests pass -- No linter warnings/errors -- No unused dependencies - -**Objectives** are evaluations with a numerical outcome, i.e. things we may want to optimize for: -- Peak memory usage -- Runtime -- Code coverage -- Some subjective score of code quality, as judged by AI - -Note that by definition, invariants are a special case of objectives, but it's useful to think of them as separate categories. - -The key property for both invariants are objectives is that they must be **mechanically checkable**. This is important for both of our motivations: it increases agent productivity, by giving the agent a way to check its work without a human in the loop; and it allows the human to gain assurance about the larger codebase while only focusing on a smaller kernel of invariants/objectives. - -## Invariants - -Given a single invariant description (a small amount of Rust encoding the invariant), we should be able to mechanically synthesize it into: -- A test, -- A `libfuzzer_sys` fuzz target, -- And/or a "red team" harness for AI agents to try to find a violation of the invariant - - Assuming the invariant and harness are well-written, this should totally eliminate the possibility of false positives - - Should be flexible with respect to the agent setup (which model, how many agents, guiding prompt, etc.) - -In the long-term we should also be able to formally verify certain invariants. - -The invariant description should specify which of the above to generate. The "regular" tests generated from invariants should be run in CI. Fuzzing and AI-driven security reviews can be run at a less-frequent cadence or ad-hoc. - -Pseudocode for an Invariant trait: -```rust - trait Invariant: Send + Sync { - type Setup; - type Input: Arbitrary; - - fn name(&self) -> &str; - /// Used as context for an AI agent trying to violate this invariant - fn description(&self) -> String; - /// What to synthesize from this invariant - fn targets(&self) -> EnumSet // ⊆ {Test, Fuzz, RedTeam} - fn setup(&self) -> Self::Setup; - fn check(&self, setup: &Self::Setup, input: Self::Input) -> Result<(), InvariantViolation>; - /// Returns a seed corpus for tests/fuzzing (known-interesting inputs) - fn seed_corpus(&self) -> Vec { - vec![] - } -} - -``` - -Pseudocode for the AI "red team" harness: -```rust -fn auto_redteam(invariant: Invariant, prompt: String) { - for _ in NUM_ITERATIONS { - // Note: AI should run in an isolated worktree to produce the - // claimed bad input. The invariant is checked in the original - // working tree so the AI cannot cheat. - if let Some(bad_input) = todo!("Tell Claude to find violation of invariant") { - if let Err(e) = invariant.check(bad_input) { - todo!("Log counterexample and error"); - todo!("Tell Claude to summarize how it found the violation"); - break; - } - } else { - todo!("Clean up the worktree, cache description of failed attempt") - } - } -} - -struct InvariantCounterexample { - description: String, - input: I::Input, - error: InvariantViolation, -} - -struct FailedAttempt { - description: String, - approach: String, // What the agent tried - failure_reason: String, // Why it didn't produce a valid counterexample -} -``` - -## Objectives - -The top-level interface for working with objectives should look something like: -```rust -fn measure_objectives(objectives: Vec) -> HashMap; -``` -The function would iterate through the provided objectives, dispatch to their respective `collect_measurement` methods. - -Psuedocode for an Objective trait -```rust -trait AbstractObjective: Send + Sync { - fn name(&self) -> &str; - fn collect_measurement(&self) -> Result; - /// How many samples to take for statistical significance - fn recommended_samples(&self) -> usize { 1 } - /// What threshold is considered a regression, e.g., 5% slowdown - fn regression_threshold(&self) -> Option { None } - /// Is lower better or higher better? - fn direction(&self) -> Direction; // Minimize or Maximize -} -``` - -Objectives can be used as building blocks for expressive, AI-driven optimization tasks (cf. [autoresearch](https://github.com/karpathy/autoresearch)). -Pseudocode for a simple optimization harness: -```rust -fn auto_optimize) -> f64>(objectives: Vec, objective_function: F, prompt: String) { - let mut baseline = objective_function(measure_objectives(objectives)); - for _ in NUM_ITERATIONS { - todo!("Tell Claude Code to optimize for the given objective function"); - // Can also point Claude to specific functions/snippets to optimize - let new_score = objective_function(measure_objectives(objectives)); - let invariants_hold = check_invariants(); - if invariants_hold && new_score > baseline { - // Successful optimization - baseline = new_score; - todo!("Commit changes for async human review"); - } else { - todo!("Revert changes, cache description of the failed attempt"); - } - } -} - -pub enum Objective { - PeakRss(PeakRssObjective), - ProverTime(ProverTimeObjective), - ProofSize(ProofSizeObjective), - VerifierTime(VerifierTimeObjective), - GuestCycleCount(GuestCycleCountObjective), - // ... -} - -impl Objective { - pub fn collect_measurement(&self) -> Measurement { - match self { - Self::PeakRss(o) => o.collect_measurement(), - Self::ProverTime(o) => o.collect_measurement(), - // ... - } - } -} - -struct OptimizationAttempt { - description: String, // What the agent tried - diff: String, // The actual code change - measurements: HashMap, - invariants_passed: bool -} -``` - -Objectives are ideally reproducible, deterministic, and quick to obtain, though none of these are hard rules –– in particular, performance metrics like runtime inevitably have some variance and may be slow to obtain. -## Framing tasks in terms of invariants and objectives - -### Implementing a new feature -- Add new invariants to capture the behavior of the feature -- Modify existing invariants as necessary -- The spec for a new feature should clearly document new and modified invariants, as well as expected impact on objectives - - Impact on objectives can be mechanically validated -- Ensure that all invariants hold -### Bug fix -- Add a new invariant (or modify existing one) to fail without the fix -- Ensure that all other invariants still hold -- Document impact on objectives -### Security review -- Try to find a counterexample to some invariant -### Optimization -- For some function $f(o_1, o_2, \dots, o_n)$ that takes as input the objectives and outputs a single score, maximize the score -- Can apply techniques from multi-objective optimization literature -- Ensure that all invariants still hold -### Refactor -- Special case of optimization, where the objective function captures some notion of code quality - -## As applied to Jolt - -### Example invariants - -- **Soundness**: For a fixed program, input, and honest prover output/proof, the verifier does not accept for any other output/proof. -- **(Verifier) Completeness**: For a fixed program, input, and honest prover output/proof, the verifier accepts the honest output/proof. -- **(Prover) Completness**: For a fixed program, input, and valid size parameters for that program/input pair, the prover should produce a proof (or OOM/timeout). -- **Determinism**: Same program + input → same proof (byte-identical). -- **Serialization roundtrip**: `deserialize(serialize(proof)) == proof` -### Example objectives - -- Peak RSS (prover memory) -- Prover time -- Proof size -- Verifier time -- Guest cycle counts -- Virtual/inline sequence lengths -- Wrapping cost (Transpiled verifier constraint count) - -### Crate structure - -``` -jolt-eval/ - ├── Cargo.toml - ├── src/ - │ ├── lib.rs # Re-exports, top-level check/measure fns - │ │ - │ ├── invariant/ - │ │ ├── mod.rs # Invariant trait, InvariantViolation, SynthesisTarget, - │ │ │ # FailedAttempt, centralized Invariant enum - │ │ ├── soundness.rs # Soundness invariant (proof mutation) - │ │ ├── completeness_verifier.rs # Verifier completeness (honest proof accepted) - │ │ ├── completeness_prover.rs # Prover completeness (prover doesn't panic) - │ │ ├── determinism.rs # Same input → same proof - │ │ ├── serialization_roundtrip.rs # serialize(deserialize(x)) == x - │ │ ├── zk_consistency.rs # host and host,zk both produce valid proofs - │ │ └── synthesis/ - │ │ ├── mod.rs # Synthesis registry, shared types - │ │ ├── test.rs # #[test] generation from invariants - │ │ ├── fuzz.rs # libfuzzer_sys target generation - │ │ └── redteam.rs # auto_redteam loop, worktree orchestration, - │ │ # InvariantCounterexample, prompt construction - │ │ - │ └── objective/ - │ ├── mod.rs # AbstractObjective trait, Measurement, Unit, Direction, - │ │ # centralized Objective enum, measure_objectives() - │ ├── peak_rss.rs # Peak resident set size - │ ├── prover_time.rs # Wall-clock prover time - │ ├── proof_size.rs # Serialized proof byte length - │ ├── verifier_time.rs # Wall-clock verifier time - │ ├── guest_cycles.rs # Guest instruction cycle count - │ ├── inline_lengths.rs # Virtual/inline sequence lengths - │ ├── wrapping_cost.rs # Transpiled verifier constraint count - │ └── optimize.rs # auto_optimize loop, OptimizationAttempt, - │ # baseline tracking, commit/revert logic - │ - | - ├── macros/ - │ ├── Cargo.toml # jolt-eval-macros proc-macro crate - │ └── src/ - │ └── lib.rs # #[invariant(targets = [...])] attribute macro - │ - ├── bin/ - │ ├── check_invariants.rs # CLI: run all or selected invariants - │ ├── measure_objectives.rs # CLI: measure all or selected objectives, compare to baseline - │ └── redteam.rs # CLI: --invariant --iterations N --model - │ - └── tests/ - └── integration.rs # Smoke tests for the framework itself -``` From 121142e37485d97eb37f55cf413e38a02e86a836 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Fri, 3 Apr 2026 23:27:19 -0400 Subject: [PATCH 67/86] feat(jolt-eval): derive diff_scope from objective inputs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each OptimizationObjective now declares diff_paths() specifying which file paths an optimizer may modify. ObjectiveFunction::diff_scope() computes the union of all input objectives' paths as DiffScope::Include. - Static analysis + binding objectives: jolt-core/ - NaiveSortTime: jolt-eval/src/sort_targets.rs - Remove diff_scope from OptimizeConfig (now derived automatically) - Fix unwrap() → unwrap_or(f64::INFINITY) in const ObjectiveFunctions Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/bin/optimize.rs | 1 - jolt-eval/src/objective/mod.rs | 21 +++++++++++++++++++++ jolt-eval/src/objective/objective_fn/mod.rs | 16 ++++++++++++++++ jolt-eval/src/objective/optimize.rs | 6 ++---- jolt-eval/src/sort_e2e.rs | 3 +-- 5 files changed, 40 insertions(+), 7 deletions(-) diff --git a/jolt-eval/bin/optimize.rs b/jolt-eval/bin/optimize.rs index 8f6c321b6..e1834e380 100644 --- a/jolt-eval/bin/optimize.rs +++ b/jolt-eval/bin/optimize.rs @@ -175,7 +175,6 @@ fn main() -> eyre::Result<()> { num_iterations: cli.iterations, hint: cli.hint.clone(), verbose: cli.verbose, - ..Default::default() }; let result = auto_optimize(&agent, &mut env, objective, &config, &repo_dir); diff --git a/jolt-eval/src/objective/mod.rs b/jolt-eval/src/objective/mod.rs index da726ff88..3fa28194a 100644 --- a/jolt-eval/src/objective/mod.rs +++ b/jolt-eval/src/objective/mod.rs @@ -107,6 +107,13 @@ impl StaticAnalysisObjective { Self::HalsteadBugs(o) => o.units(), } } + + /// File paths that an optimizer should be allowed to modify. + pub fn diff_paths(&self) -> &'static [&'static str] { + match self { + Self::Lloc(_) | Self::CognitiveComplexity(_) | Self::HalsteadBugs(_) => &["jolt-core/"], + } + } } impl PartialEq for StaticAnalysisObjective { @@ -154,6 +161,13 @@ impl PerformanceObjective { Self::NaiveSortTime => Some("s"), } } + + pub fn diff_paths(&self) -> &'static [&'static str] { + match self { + Self::BindLowToHigh(_) | Self::BindHighToLow(_) => &["jolt-core/"], + Self::NaiveSortTime => &["jolt-eval/src/sort_targets.rs"], + } + } } impl PartialEq for PerformanceObjective { @@ -209,6 +223,13 @@ impl OptimizationObjective { } } + pub fn diff_paths(&self) -> &'static [&'static str] { + match self { + Self::StaticAnalysis(s) => s.diff_paths(), + Self::Performance(p) => p.diff_paths(), + } + } + pub fn is_perf(&self) -> bool { matches!(self, Self::Performance(_)) } diff --git a/jolt-eval/src/objective/objective_fn/mod.rs b/jolt-eval/src/objective/objective_fn/mod.rs index b2fb86e6a..d342d67e0 100644 --- a/jolt-eval/src/objective/objective_fn/mod.rs +++ b/jolt-eval/src/objective/objective_fn/mod.rs @@ -1,5 +1,7 @@ use std::collections::HashMap; +use crate::agent::DiffScope; + use super::{ OptimizationObjective, BIND_HIGH_TO_LOW, BIND_LOW_TO_HIGH, COGNITIVE_COMPLEXITY, HALSTEAD_BUGS, LLOC, @@ -36,6 +38,20 @@ impl ObjectiveFunction { pub fn by_name(name: &str) -> Option<&'static ObjectiveFunction> { Self::all().iter().find(|f| f.name == name) } + + /// Derive a [`DiffScope`] from the union of all input objectives' diff paths. + pub fn diff_scope(&self) -> DiffScope { + let mut paths = Vec::new(); + for input in self.inputs { + for &p in input.diff_paths() { + let s = p.to_string(); + if !paths.contains(&s) { + paths.push(s); + } + } + } + DiffScope::Include(paths) + } } pub const MINIMIZE_LLOC: ObjectiveFunction = ObjectiveFunction { diff --git a/jolt-eval/src/objective/optimize.rs b/jolt-eval/src/objective/optimize.rs index cff3cfce1..b8776b50e 100644 --- a/jolt-eval/src/objective/optimize.rs +++ b/jolt-eval/src/objective/optimize.rs @@ -2,7 +2,7 @@ use std::collections::HashMap; use std::path::Path; use std::process::Command; -use crate::agent::{truncate, AgentHarness, DiffScope}; +use crate::agent::{truncate, AgentHarness}; use super::objective_fn::ObjectiveFunction; use super::OptimizationObjective; @@ -12,7 +12,6 @@ pub struct OptimizeConfig { pub num_iterations: usize, pub hint: Option, pub verbose: bool, - pub diff_scope: DiffScope, } impl Default for OptimizeConfig { @@ -21,7 +20,6 @@ impl Default for OptimizeConfig { num_iterations: 5, hint: None, verbose: false, - diff_scope: DiffScope::Exclude(vec!["jolt-eval/".into()]), } } } @@ -101,7 +99,7 @@ pub fn auto_optimize( eprintln!("────────────────────────"); } - let response = match agent.invoke(repo_dir, &prompt, &config.diff_scope) { + let response = match agent.invoke(repo_dir, &prompt, &objective.diff_scope()) { Ok(r) => r, Err(e) => { tracing::info!("Agent error: {e}"); diff --git a/jolt-eval/src/sort_e2e.rs b/jolt-eval/src/sort_e2e.rs index 1888722bf..e6aadd919 100644 --- a/jolt-eval/src/sort_e2e.rs +++ b/jolt-eval/src/sort_e2e.rs @@ -3,7 +3,7 @@ use std::collections::HashMap; -use crate::agent::{ClaudeCodeAgent, DiffScope}; +use crate::agent::ClaudeCodeAgent; use crate::invariant::synthesis::redteam::{auto_redteam, RedTeamConfig, RedTeamResult}; use crate::invariant::{CheckError, Invariant, InvariantViolation}; use crate::objective::objective_fn::ObjectiveFunction; @@ -251,7 +251,6 @@ pub fn run_optimize_test( num_iterations: iterations, hint: Some(hint), verbose, - diff_scope: DiffScope::Include(vec![SORT_TARGETS_PATH.into()]), }; println!("=== Optimize e2e: naive bubble sort ==="); From 925ad6d876f3b555d538e104719c5bfa2e88e9f7 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Fri, 3 Apr 2026 23:38:54 -0400 Subject: [PATCH 68/86] feat(jolt-eval): improve optimize and redteam agent prompts - Add description() to StaticAnalysisObjective, PerformanceObjective, OptimizationObjective - Optimize prompt: add "What you are optimizing" section with descriptions and units - Optimize prompt: show units alongside measurement values - Optimize prompt: include first 500 chars of each past attempt's diff - Optimize prompt: add anti-repetition steering after previous attempts - Optimize prompt: derive targeted reading guidance from objective's diff_paths - Redteam prompt: always include JSON schema (not just when seed example exists) - Redteam prompt: truncate approach text to 200 chars in previous attempts - Redteam prompt: show "Iteration {current}/{total}" counter Co-Authored-By: Claude Sonnet 4.6 --- jolt-eval/src/invariant/synthesis/redteam.rs | 23 ++++++--- jolt-eval/src/objective/mod.rs | 27 ++++++++++ jolt-eval/src/objective/optimize.rs | 52 +++++++++++++++++--- 3 files changed, 90 insertions(+), 12 deletions(-) diff --git a/jolt-eval/src/invariant/synthesis/redteam.rs b/jolt-eval/src/invariant/synthesis/redteam.rs index e4793b8b2..2699e2ed6 100644 --- a/jolt-eval/src/invariant/synthesis/redteam.rs +++ b/jolt-eval/src/invariant/synthesis/redteam.rs @@ -1,7 +1,7 @@ use std::path::Path; use super::super::{CheckError, FailedAttempt, Invariant}; -use crate::agent::{AgentHarness, DiffScope}; +use crate::agent::{truncate, AgentHarness, DiffScope}; /// Result of a red-team session. pub enum RedTeamResult { @@ -61,8 +61,11 @@ pub fn auto_redteam( let prompt = build_redteam_prompt( &description, input_example.as_deref(), + &input_schema, config.hint.as_deref(), &failed_attempts, + iteration + 1, + config.num_iterations, ); if config.verbose { @@ -190,15 +193,19 @@ fn parse_envelope(text: &str) -> Option<(String, String)> { fn build_redteam_prompt( invariant_description: &str, input_example: Option<&str>, + input_schema: &serde_json::Value, hint: Option<&str>, failed_attempts: &[FailedAttempt], + current_iteration: usize, + total_iterations: usize, ) -> String { let mut prompt = String::new(); - prompt.push_str( + prompt.push_str(&format!( "You are a security researcher red-teaming a zkVM (Jolt). \ - Your goal is to find a concrete input that VIOLATES the following invariant.\n\n", - ); + Your goal is to find a concrete input that VIOLATES the following invariant.\n\ + Iteration {current_iteration}/{total_iterations}.\n\n" + )); prompt.push_str("## Invariant\n\n"); prompt.push_str(invariant_description); @@ -220,8 +227,8 @@ fn build_redteam_prompt( your best counterexample. A wrong guess is always better than no guess.\n\n", ); + prompt.push_str("## Input format\n\n"); if let Some(example) = input_example { - prompt.push_str("## Input format\n\n"); prompt.push_str( "The counterexample must be a JSON value matching the schema. \ Here is an example of a valid input:\n\n```json\n", @@ -229,6 +236,9 @@ fn build_redteam_prompt( prompt.push_str(example); prompt.push_str("\n```\n\n"); } + prompt.push_str("JSON schema for the counterexample:\n\n```json\n"); + prompt.push_str(&serde_json::to_string_pretty(input_schema).unwrap_or_default()); + prompt.push_str("\n```\n\n"); if let Some(hint) = hint { prompt.push_str("## Hint\n\n"); @@ -243,9 +253,10 @@ fn build_redteam_prompt( valid counterexample.\n\n", ); for attempt in failed_attempts { + let approach_preview = truncate(&attempt.approach, 200); prompt.push_str(&format!( "- **{}**: {}\n Failure: {}\n", - attempt.description, attempt.approach, attempt.failure_reason + attempt.description, approach_preview, attempt.failure_reason )); } prompt.push('\n'); diff --git a/jolt-eval/src/objective/mod.rs b/jolt-eval/src/objective/mod.rs index 3fa28194a..e537c5a82 100644 --- a/jolt-eval/src/objective/mod.rs +++ b/jolt-eval/src/objective/mod.rs @@ -92,6 +92,18 @@ impl StaticAnalysisObjective { } } + pub fn description(&self) -> &str { + match self { + Self::Lloc(_) => "Total logical lines of code in jolt-core/src/", + Self::CognitiveComplexity(_) => { + "Average cognitive complexity per function in jolt-core/src/" + } + Self::HalsteadBugs(_) => { + "Estimated delivered bugs (Halstead volume / 3000) in jolt-core/src/" + } + } + } + pub fn collect_measurement(&self) -> Result { match self { Self::Lloc(o) => o.collect_measurement(), @@ -162,6 +174,14 @@ impl PerformanceObjective { } } + pub fn description(&self) -> &str { + match self { + Self::BindLowToHigh(_) => "Wall-clock time of DensePolynomial::bind_parallel with LowToHigh binding (2^20 evaluations)", + Self::BindHighToLow(_) => "Wall-clock time of DensePolynomial::bind_parallel with HighToLow binding (2^20 evaluations)", + Self::NaiveSortTime => "Wall-clock time of the naive_sort function in jolt-eval/src/sort_targets.rs", + } + } + pub fn diff_paths(&self) -> &'static [&'static str] { match self { Self::BindLowToHigh(_) | Self::BindHighToLow(_) => &["jolt-core/"], @@ -223,6 +243,13 @@ impl OptimizationObjective { } } + pub fn description(&self) -> &str { + match self { + Self::StaticAnalysis(s) => s.description(), + Self::Performance(p) => p.description(), + } + } + pub fn diff_paths(&self) -> &'static [&'static str] { match self { Self::StaticAnalysis(s) => s.diff_paths(), diff --git a/jolt-eval/src/objective/optimize.rs b/jolt-eval/src/objective/optimize.rs index b8776b50e..3d8b6d8b6 100644 --- a/jolt-eval/src/objective/optimize.rs +++ b/jolt-eval/src/objective/optimize.rs @@ -223,20 +223,48 @@ fn build_optimize_prompt( Read the implementation to understand exactly what you are optimizing.\n\n", ); + prompt.push_str("## What you are optimizing\n\n"); + for input in inputs { + let units_str = input + .units() + .map(|u| format!(" (units: {u})")) + .unwrap_or_default(); + prompt.push_str(&format!( + "- **{}**{units_str}: {}\n", + input.name(), + input.description() + )); + } + prompt.push('\n'); + prompt.push_str("## Current measurements\n\n"); let mut entries: Vec<_> = current_best_measurements.iter().collect(); entries.sort_by_key(|(k, _)| k.name()); for (key, val) in &entries { - prompt.push_str(&format!("- **{}**: {val:.6}\n", key.name())); + let units_str = key.units().map(|u| format!(" {u}")).unwrap_or_default(); + prompt.push_str(&format!("- **{}**: {val:.6}{units_str}\n", key.name())); } prompt.push('\n'); + // Derive targeted reading guidance from the union of all input diff_paths. + let mut diff_paths: Vec<&str> = Vec::new(); + for input in inputs { + for &p in input.diff_paths() { + if !diff_paths.contains(&p) { + diff_paths.push(p); + } + } + } + let paths_list = diff_paths.join(", "); + prompt.push_str("## Instructions\n\n"); + prompt.push_str(&format!( + "1. Read the relevant source code in: {paths_list}. Also read \ + `jolt-eval/src/objective/objective_fn/` to understand the exact scoring formula.\n" + )); prompt.push_str( - "## Instructions\n\n\ - 1. Read the relevant source code (especially `jolt-core/src/`) to understand \ - hot paths and potential optimization opportunities.\n\ - 2. Make targeted code changes that you believe will reduce the objective function.\n\ - 3. Focus on changes to `jolt-core/` -- do NOT modify `jolt-eval/`.\n\ + "2. Make targeted code changes that you believe will reduce the objective function.\n\ + 3. Focus your changes on the paths listed above -- do NOT modify `jolt-eval/` unless \ + it is explicitly listed.\n\ 4. Prefer changes that are safe, correct, and unlikely to break invariants.\n\ 5. Run `cargo clippy -p jolt-core --features host --message-format=short -q` \ to verify your changes compile.\n\ @@ -267,8 +295,20 @@ fn build_optimize_prompt( prompt.push_str(&format!("{}={val:.6} ", key.name())); } prompt.push('\n'); + if !attempt.diff.is_empty() { + prompt.push_str(&format!( + " Diff preview: `{}`\n", + truncate(&attempt.diff, 500) + )); + } } prompt.push('\n'); + + prompt.push_str( + "If previous attempts failed or showed no improvement, try a fundamentally \ + different approach. Analyze WHY the previous approach did not reduce the score \ + and pivot to a new strategy.\n\n", + ); } prompt.push_str( From eac6ddb10a809d302b4e3879d7297cc574735b2d Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Fri, 3 Apr 2026 23:43:33 -0400 Subject: [PATCH 69/86] refactor(jolt-eval): move objective descriptions to Objective trait Add description() with default (returns name()) to the Objective trait. Each concrete type now owns its description, and the enums delegate instead of hardcoding strings. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../src/objective/code_quality/cognitive.rs | 4 ++++ .../objective/code_quality/halstead_bugs.rs | 4 ++++ jolt-eval/src/objective/code_quality/lloc.rs | 4 ++++ jolt-eval/src/objective/mod.rs | 22 ++++++++++--------- .../src/objective/performance/binding.rs | 8 +++++++ 5 files changed, 32 insertions(+), 10 deletions(-) diff --git a/jolt-eval/src/objective/code_quality/cognitive.rs b/jolt-eval/src/objective/code_quality/cognitive.rs index 4207ec38a..7b91ee1f2 100644 --- a/jolt-eval/src/objective/code_quality/cognitive.rs +++ b/jolt-eval/src/objective/code_quality/cognitive.rs @@ -33,6 +33,10 @@ impl Objective for CognitiveComplexityObjective { "cognitive_complexity_avg" } + fn description(&self) -> &str { + "Average cognitive complexity per function in jolt-core/src/" + } + fn setup(&self) {} fn collect_measurement(&self) -> Result { diff --git a/jolt-eval/src/objective/code_quality/halstead_bugs.rs b/jolt-eval/src/objective/code_quality/halstead_bugs.rs index f6a29aafc..408efb84b 100644 --- a/jolt-eval/src/objective/code_quality/halstead_bugs.rs +++ b/jolt-eval/src/objective/code_quality/halstead_bugs.rs @@ -34,6 +34,10 @@ impl Objective for HalsteadBugsObjective { "halstead_bugs" } + fn description(&self) -> &str { + "Estimated delivered bugs (Halstead volume / 3000) in jolt-core/src/" + } + fn setup(&self) {} fn collect_measurement(&self) -> Result { diff --git a/jolt-eval/src/objective/code_quality/lloc.rs b/jolt-eval/src/objective/code_quality/lloc.rs index 680733679..5545c2bd2 100644 --- a/jolt-eval/src/objective/code_quality/lloc.rs +++ b/jolt-eval/src/objective/code_quality/lloc.rs @@ -33,6 +33,10 @@ impl Objective for LlocObjective { "lloc" } + fn description(&self) -> &str { + "Total logical lines of code in jolt-core/src/" + } + fn setup(&self) {} fn collect_measurement(&self) -> Result { diff --git a/jolt-eval/src/objective/mod.rs b/jolt-eval/src/objective/mod.rs index e537c5a82..dfcc4ff99 100644 --- a/jolt-eval/src/objective/mod.rs +++ b/jolt-eval/src/objective/mod.rs @@ -43,6 +43,10 @@ pub trait Objective: Send + Sync { fn name(&self) -> &str; + fn description(&self) -> &str { + self.name() + } + fn units(&self) -> Option<&str> { None } @@ -94,13 +98,9 @@ impl StaticAnalysisObjective { pub fn description(&self) -> &str { match self { - Self::Lloc(_) => "Total logical lines of code in jolt-core/src/", - Self::CognitiveComplexity(_) => { - "Average cognitive complexity per function in jolt-core/src/" - } - Self::HalsteadBugs(_) => { - "Estimated delivered bugs (Halstead volume / 3000) in jolt-core/src/" - } + Self::Lloc(o) => o.description(), + Self::CognitiveComplexity(o) => o.description(), + Self::HalsteadBugs(o) => o.description(), } } @@ -176,9 +176,11 @@ impl PerformanceObjective { pub fn description(&self) -> &str { match self { - Self::BindLowToHigh(_) => "Wall-clock time of DensePolynomial::bind_parallel with LowToHigh binding (2^20 evaluations)", - Self::BindHighToLow(_) => "Wall-clock time of DensePolynomial::bind_parallel with HighToLow binding (2^20 evaluations)", - Self::NaiveSortTime => "Wall-clock time of the naive_sort function in jolt-eval/src/sort_targets.rs", + Self::BindLowToHigh(o) => o.description(), + Self::BindHighToLow(o) => o.description(), + Self::NaiveSortTime => { + "Wall-clock time of the naive_sort function in jolt-eval/src/sort_targets.rs" + } } } diff --git a/jolt-eval/src/objective/performance/binding.rs b/jolt-eval/src/objective/performance/binding.rs index 37bb3babd..bcfcdaaba 100644 --- a/jolt-eval/src/objective/performance/binding.rs +++ b/jolt-eval/src/objective/performance/binding.rs @@ -56,6 +56,10 @@ impl Objective for BindLowToHighObjective { Self::NAME } + fn description(&self) -> &str { + "Wall-clock time of DensePolynomial::bind_parallel with LowToHigh binding (2^20 evaluations)" + } + fn setup(&self) -> BindSetup { thread_local! { static SHARED: BindShared = BindShared::new(); @@ -90,6 +94,10 @@ impl Objective for BindHighToLowObjective { Self::NAME } + fn description(&self) -> &str { + "Wall-clock time of DensePolynomial::bind_parallel with HighToLow binding (2^20 evaluations)" + } + fn setup(&self) -> BindSetup { thread_local! { static SHARED: BindShared = BindShared::new(); From 74c4f3b9d08ad1799c03e9d3e5f01afe9ad6c0bc Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Fri, 3 Apr 2026 23:49:18 -0400 Subject: [PATCH 70/86] refactor(jolt-eval): add JoltInvariants::red_team() via dispatch macro Replace the manual match statement in the redteam binary with a red_team() method on JoltInvariants that uses the dispatch! macro, consistent with name(), description(), targets(), and run_checks(). Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/bin/redteam.rs | 8 ++------ jolt-eval/src/invariant/mod.rs | 11 +++++++++++ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/jolt-eval/bin/redteam.rs b/jolt-eval/bin/redteam.rs index 2b8979531..c2111f5e9 100644 --- a/jolt-eval/bin/redteam.rs +++ b/jolt-eval/bin/redteam.rs @@ -2,7 +2,7 @@ use clap::Parser; use tracing::info; use jolt_eval::agent::ClaudeCodeAgent; -use jolt_eval::invariant::synthesis::redteam::{auto_redteam, RedTeamConfig, RedTeamResult}; +use jolt_eval::invariant::synthesis::redteam::{RedTeamConfig, RedTeamResult}; use jolt_eval::invariant::{JoltInvariants, SynthesisTarget}; use jolt_eval::sort_e2e; @@ -100,11 +100,7 @@ fn main() -> eyre::Result<()> { cli.iterations, cli.model ); - let result = match invariant { - JoltInvariants::SplitEqBindLowHigh(inv) => auto_redteam(inv, &config, &agent, &repo_dir), - JoltInvariants::SplitEqBindHighLow(inv) => auto_redteam(inv, &config, &agent, &repo_dir), - JoltInvariants::Soundness(inv) => auto_redteam(inv, &config, &agent, &repo_dir), - }; + let result = invariant.red_team(&config, &agent, &repo_dir); match result { RedTeamResult::Violation { diff --git a/jolt-eval/src/invariant/mod.rs b/jolt-eval/src/invariant/mod.rs index 1b9739f43..3d8674f4b 100644 --- a/jolt-eval/src/invariant/mod.rs +++ b/jolt-eval/src/invariant/mod.rs @@ -170,6 +170,17 @@ impl JoltInvariants { pub fn run_checks(&self, num_random: usize) -> Vec> { dispatch!(self, |inv| run_checks_impl(inv, num_random)) } + + pub fn red_team( + &self, + config: &synthesis::redteam::RedTeamConfig, + agent: &dyn crate::agent::AgentHarness, + repo_dir: &std::path::Path, + ) -> synthesis::redteam::RedTeamResult { + dispatch!(self, |inv| synthesis::redteam::auto_redteam( + inv, config, agent, repo_dir + )) + } } fn run_checks_impl( From 674e85a284c81ad6e4c44d7fb6e1a055c536981f Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Sat, 4 Apr 2026 10:50:01 -0400 Subject: [PATCH 71/86] feat(jolt-eval): add baseline() and normalized() for objective score normalization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Objectives have different units and magnitudes (e.g. LLOC ~5500 lines vs cognitive complexity ~4.0), making weighted combinations unwieldy. - Add baseline() to StaticAnalysisObjective, PerformanceObjective, and OptimizationObjective returning a reference scale - Add normalized(obj, measurements) free function: value / baseline, yielding a dimensionless ratio where 1.0 ≈ current typical value - Add test demonstrating a balanced 50/50 weighted composite that treats a 10% improvement in any objective equally Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/src/lib.rs | 2 +- jolt-eval/src/objective/mod.rs | 46 +++++++++++++++++++++ jolt-eval/src/objective/objective_fn/mod.rs | 28 +++++++++++++ 3 files changed, 75 insertions(+), 1 deletion(-) diff --git a/jolt-eval/src/lib.rs b/jolt-eval/src/lib.rs index 5b22ff82d..90deab02e 100644 --- a/jolt-eval/src/lib.rs +++ b/jolt-eval/src/lib.rs @@ -22,7 +22,7 @@ pub use invariant::{ }; pub use objective::objective_fn::ObjectiveFunction; pub use objective::{ - MeasurementError, Objective, OptimizationObjective, PerformanceObjective, + normalized, MeasurementError, Objective, OptimizationObjective, PerformanceObjective, StaticAnalysisObjective, }; diff --git a/jolt-eval/src/objective/mod.rs b/jolt-eval/src/objective/mod.rs index dfcc4ff99..80e86b8b1 100644 --- a/jolt-eval/src/objective/mod.rs +++ b/jolt-eval/src/objective/mod.rs @@ -120,6 +120,17 @@ impl StaticAnalysisObjective { } } + /// Reference scale for normalization. `value / baseline()` yields a + /// dimensionless ratio where 1.0 ≈ "typical current value". + /// Update these when the codebase changes significantly. + pub fn baseline(&self) -> f64 { + match self { + Self::Lloc(_) => 5500.0, + Self::CognitiveComplexity(_) => 4.0, + Self::HalsteadBugs(_) => 80.0, + } + } + /// File paths that an optimizer should be allowed to modify. pub fn diff_paths(&self) -> &'static [&'static str] { match self { @@ -184,6 +195,14 @@ impl PerformanceObjective { } } + pub fn baseline(&self) -> f64 { + match self { + Self::BindLowToHigh(_) => 0.04, + Self::BindHighToLow(_) => 0.04, + Self::NaiveSortTime => 0.01, + } + } + pub fn diff_paths(&self) -> &'static [&'static str] { match self { Self::BindLowToHigh(_) | Self::BindHighToLow(_) => &["jolt-core/"], @@ -252,6 +271,14 @@ impl OptimizationObjective { } } + /// Reference scale for normalization. See [`normalized`]. + pub fn baseline(&self) -> f64 { + match self { + Self::StaticAnalysis(s) => s.baseline(), + Self::Performance(p) => p.baseline(), + } + } + pub fn diff_paths(&self) -> &'static [&'static str] { match self { Self::StaticAnalysis(s) => s.diff_paths(), @@ -264,6 +291,25 @@ impl OptimizationObjective { } } +/// Look up an objective's measurement and divide by its [`baseline`](OptimizationObjective::baseline), +/// yielding a dimensionless ratio where 1.0 ≈ "typical current value". +/// +/// Use this in composite [`ObjectiveFunction`](objective_fn::ObjectiveFunction) +/// evaluate closures so that objectives with different units contribute +/// on a comparable scale: +/// +/// ```ignore +/// use jolt_eval::objective::{normalized, LLOC, HALSTEAD_BUGS}; +/// +/// let evaluate = |m| 0.5 * normalized(&LLOC, m) + 0.5 * normalized(&HALSTEAD_BUGS, m); +/// ``` +pub fn normalized( + obj: &OptimizationObjective, + measurements: &std::collections::HashMap, +) -> f64 { + measurements.get(obj).copied().unwrap_or(f64::INFINITY) / obj.baseline() +} + impl PartialEq for OptimizationObjective { fn eq(&self, other: &Self) -> bool { match (self, other) { diff --git a/jolt-eval/src/objective/objective_fn/mod.rs b/jolt-eval/src/objective/objective_fn/mod.rs index d342d67e0..2e29fdab0 100644 --- a/jolt-eval/src/objective/objective_fn/mod.rs +++ b/jolt-eval/src/objective/objective_fn/mod.rs @@ -137,4 +137,32 @@ mod tests { m.insert(HALSTEAD_BUGS, 100.0); assert_eq!((weighted.evaluate)(&m), 120.0); } + + #[test] + fn normalized_composite_objective() { + use crate::objective::normalized; + + // LLOC baseline is 5500, Halstead baseline is 80. + // Without normalization, LLOC dominates due to magnitude. + // With normalization, both contribute on a comparable scale. + const INPUTS: &[OptimizationObjective] = &[LLOC, HALSTEAD_BUGS]; + let balanced = ObjectiveFunction { + name: "balanced_quality", + inputs: INPUTS, + evaluate: |m| 0.5 * normalized(&LLOC, m) + 0.5 * normalized(&HALSTEAD_BUGS, m), + }; + + let mut m = HashMap::new(); + m.insert(LLOC, 5500.0); // exactly at baseline → normalized = 1.0 + m.insert(HALSTEAD_BUGS, 80.0); // exactly at baseline → normalized = 1.0 + let score = (balanced.evaluate)(&m); + assert!((score - 1.0).abs() < 1e-9, "expected 1.0, got {score}"); + + // 10% improvement in LLOC + m.insert(LLOC, 4950.0); + let score2 = (balanced.evaluate)(&m); + assert!(score2 < score, "10% LLOC improvement should reduce score"); + // 0.5 * (4950/5500) + 0.5 * (80/80) = 0.5 * 0.9 + 0.5 = 0.95 + assert!((score2 - 0.95).abs() < 1e-9, "expected 0.95, got {score2}"); + } } From 43fe4b1300b5719e4bde2c7810da1a627ed75953 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Sat, 4 Apr 2026 10:55:28 -0400 Subject: [PATCH 72/86] refactor(jolt-eval): use initial measurements as normalization baselines Replace hardcoded baseline() values with dynamic baselines captured from the first env.measure() call at the start of auto_optimize. - ObjectiveFunction.evaluate now takes (measurements, baselines) - normalized(obj, measurements, baselines) divides by the baseline measurement instead of a hardcoded constant - Remove baseline() from all objective enums - auto_optimize passes initial measurements as baselines to evaluate - Single-objective functions ignore baselines (use |m, _|) Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/bin/optimize.rs | 2 +- jolt-eval/src/agent/tests.rs | 9 ++-- jolt-eval/src/objective/mod.rs | 44 ++++------------- jolt-eval/src/objective/objective_fn/mod.rs | 52 +++++++++++++-------- jolt-eval/src/objective/optimize.rs | 4 +- jolt-eval/src/sort_e2e.rs | 6 +-- 6 files changed, 53 insertions(+), 64 deletions(-) diff --git a/jolt-eval/bin/optimize.rs b/jolt-eval/bin/optimize.rs index e1834e380..41773ff6b 100644 --- a/jolt-eval/bin/optimize.rs +++ b/jolt-eval/bin/optimize.rs @@ -166,7 +166,7 @@ fn main() -> eyre::Result<()> { println!("=== Baseline ==="); let baseline = env.measure(); - let baseline_score = (objective.evaluate)(&baseline); + let baseline_score = (objective.evaluate)(&baseline, &baseline); print_measurements(&baseline); println!("Objective: {} = {:.6}\n", objective.name, baseline_score); diff --git a/jolt-eval/src/agent/tests.rs b/jolt-eval/src/agent/tests.rs index dcf65ec5c..bb31167cd 100644 --- a/jolt-eval/src/agent/tests.rs +++ b/jolt-eval/src/agent/tests.rs @@ -724,7 +724,7 @@ fn lloc_obj() -> ObjectiveFunction { ObjectiveFunction { name: "test_lloc", inputs: INPUTS, - evaluate: |m| m.get(&LLOC).copied().unwrap_or(f64::INFINITY), + evaluate: |m, _| m.get(&LLOC).copied().unwrap_or(f64::INFINITY), } } @@ -807,7 +807,7 @@ fn optimize_custom_objective_function() { let weighted = ObjectiveFunction { name: "weighted", inputs: INPUTS, - evaluate: |m| 2.0 * m.get(&LLOC).unwrap_or(&0.0) + m.get(&HALSTEAD_BUGS).unwrap_or(&0.0), + evaluate: |m, _| 2.0 * m.get(&LLOC).unwrap_or(&0.0) + m.get(&HALSTEAD_BUGS).unwrap_or(&0.0), }; let agent = MockAgent::from_responses(vec![Ok(AgentResponse { @@ -1036,8 +1036,9 @@ fn objective_function_struct_evaluates() { let obj = lloc_obj(); let mut m = HashMap::new(); m.insert(lloc(), 3.5); - assert_eq!((obj.evaluate)(&m), 3.5); + let b = HashMap::new(); + assert_eq!((obj.evaluate)(&m, &b), 3.5); let empty = HashMap::new(); - assert_eq!((obj.evaluate)(&empty), f64::INFINITY); + assert_eq!((obj.evaluate)(&empty, &b), f64::INFINITY); } diff --git a/jolt-eval/src/objective/mod.rs b/jolt-eval/src/objective/mod.rs index 80e86b8b1..f40cfad92 100644 --- a/jolt-eval/src/objective/mod.rs +++ b/jolt-eval/src/objective/mod.rs @@ -120,17 +120,6 @@ impl StaticAnalysisObjective { } } - /// Reference scale for normalization. `value / baseline()` yields a - /// dimensionless ratio where 1.0 ≈ "typical current value". - /// Update these when the codebase changes significantly. - pub fn baseline(&self) -> f64 { - match self { - Self::Lloc(_) => 5500.0, - Self::CognitiveComplexity(_) => 4.0, - Self::HalsteadBugs(_) => 80.0, - } - } - /// File paths that an optimizer should be allowed to modify. pub fn diff_paths(&self) -> &'static [&'static str] { match self { @@ -195,14 +184,6 @@ impl PerformanceObjective { } } - pub fn baseline(&self) -> f64 { - match self { - Self::BindLowToHigh(_) => 0.04, - Self::BindHighToLow(_) => 0.04, - Self::NaiveSortTime => 0.01, - } - } - pub fn diff_paths(&self) -> &'static [&'static str] { match self { Self::BindLowToHigh(_) | Self::BindHighToLow(_) => &["jolt-core/"], @@ -271,14 +252,6 @@ impl OptimizationObjective { } } - /// Reference scale for normalization. See [`normalized`]. - pub fn baseline(&self) -> f64 { - match self { - Self::StaticAnalysis(s) => s.baseline(), - Self::Performance(p) => p.baseline(), - } - } - pub fn diff_paths(&self) -> &'static [&'static str] { match self { Self::StaticAnalysis(s) => s.diff_paths(), @@ -291,23 +264,26 @@ impl OptimizationObjective { } } -/// Look up an objective's measurement and divide by its [`baseline`](OptimizationObjective::baseline), -/// yielding a dimensionless ratio where 1.0 ≈ "typical current value". +/// Look up an objective's measurement and divide by its baseline value, +/// yielding a dimensionless ratio where 1.0 = the baseline. /// -/// Use this in composite [`ObjectiveFunction`](objective_fn::ObjectiveFunction) -/// evaluate closures so that objectives with different units contribute -/// on a comparable scale: +/// `baselines` is typically the initial measurements captured at the +/// start of an optimization run (passed as the second argument to +/// [`ObjectiveFunction::evaluate`](objective_fn::ObjectiveFunction)). /// /// ```ignore /// use jolt_eval::objective::{normalized, LLOC, HALSTEAD_BUGS}; /// -/// let evaluate = |m| 0.5 * normalized(&LLOC, m) + 0.5 * normalized(&HALSTEAD_BUGS, m); +/// let evaluate = |m, b| 0.5 * normalized(&LLOC, m, b) + 0.5 * normalized(&HALSTEAD_BUGS, m, b); /// ``` pub fn normalized( obj: &OptimizationObjective, measurements: &std::collections::HashMap, + baselines: &std::collections::HashMap, ) -> f64 { - measurements.get(obj).copied().unwrap_or(f64::INFINITY) / obj.baseline() + let value = measurements.get(obj).copied().unwrap_or(f64::INFINITY); + let baseline = baselines.get(obj).copied().unwrap_or(1.0); + value / baseline } impl PartialEq for OptimizationObjective { diff --git a/jolt-eval/src/objective/objective_fn/mod.rs b/jolt-eval/src/objective/objective_fn/mod.rs index 2e29fdab0..6f6373ef7 100644 --- a/jolt-eval/src/objective/objective_fn/mod.rs +++ b/jolt-eval/src/objective/objective_fn/mod.rs @@ -18,8 +18,11 @@ pub struct ObjectiveFunction { /// The [`OptimizationObjective`]s this function reads. pub inputs: &'static [OptimizationObjective], /// Combine measurements into a scalar to minimize. - /// The HashMap is guaranteed to contain all keys from [`inputs`]. - pub evaluate: fn(&HashMap) -> f64, + /// The first HashMap contains the current measurements; the second + /// contains the baseline measurements (captured at the start of the + /// optimization run) for use with [`normalized()`](super::normalized). + pub evaluate: + fn(&HashMap, &HashMap) -> f64, } impl ObjectiveFunction { @@ -57,13 +60,13 @@ impl ObjectiveFunction { pub const MINIMIZE_LLOC: ObjectiveFunction = ObjectiveFunction { name: "minimize_lloc", inputs: &[LLOC], - evaluate: |m| m.get(&LLOC).copied().unwrap_or(f64::INFINITY), + evaluate: |m, _| m.get(&LLOC).copied().unwrap_or(f64::INFINITY), }; pub const MINIMIZE_COGNITIVE_COMPLEXITY: ObjectiveFunction = ObjectiveFunction { name: "minimize_cognitive_complexity", inputs: &[COGNITIVE_COMPLEXITY], - evaluate: |m| { + evaluate: |m, _| { m.get(&COGNITIVE_COMPLEXITY) .copied() .unwrap_or(f64::INFINITY) @@ -73,36 +76,43 @@ pub const MINIMIZE_COGNITIVE_COMPLEXITY: ObjectiveFunction = ObjectiveFunction { pub const MINIMIZE_HALSTEAD_BUGS: ObjectiveFunction = ObjectiveFunction { name: "minimize_halstead_bugs", inputs: &[HALSTEAD_BUGS], - evaluate: |m| m.get(&HALSTEAD_BUGS).copied().unwrap_or(f64::INFINITY), + evaluate: |m, _| m.get(&HALSTEAD_BUGS).copied().unwrap_or(f64::INFINITY), }; pub const MINIMIZE_BIND_LOW_TO_HIGH: ObjectiveFunction = ObjectiveFunction { name: "minimize_bind_low_to_high", inputs: &[BIND_LOW_TO_HIGH], - evaluate: |m| m.get(&BIND_LOW_TO_HIGH).copied().unwrap_or(f64::INFINITY), + evaluate: |m, _| m.get(&BIND_LOW_TO_HIGH).copied().unwrap_or(f64::INFINITY), }; pub const MINIMIZE_BIND_HIGH_TO_LOW: ObjectiveFunction = ObjectiveFunction { name: "minimize_bind_high_to_low", inputs: &[BIND_HIGH_TO_LOW], - evaluate: |m| m.get(&BIND_HIGH_TO_LOW).copied().unwrap_or(f64::INFINITY), + evaluate: |m, _| m.get(&BIND_HIGH_TO_LOW).copied().unwrap_or(f64::INFINITY), }; #[cfg(test)] mod tests { use super::*; + fn empty_baselines() -> HashMap { + HashMap::new() + } + #[test] fn minimize_lloc_evaluates() { let mut m = HashMap::new(); m.insert(LLOC, 5000.0); - assert_eq!((MINIMIZE_LLOC.evaluate)(&m), 5000.0); + assert_eq!((MINIMIZE_LLOC.evaluate)(&m, &empty_baselines()), 5000.0); } #[test] fn missing_input_returns_infinity() { let m = HashMap::new(); - assert_eq!((MINIMIZE_LLOC.evaluate)(&m), f64::INFINITY); + assert_eq!( + (MINIMIZE_LLOC.evaluate)(&m, &empty_baselines()), + f64::INFINITY + ); } #[test] @@ -127,7 +137,7 @@ mod tests { let weighted = ObjectiveFunction { name: "weighted", inputs: INPUTS, - evaluate: |m| { + evaluate: |m, _| { 2.0 * m.get(&LLOC).unwrap_or(&0.0) + m.get(&HALSTEAD_BUGS).unwrap_or(&0.0) }, }; @@ -135,32 +145,34 @@ mod tests { let mut m = HashMap::new(); m.insert(LLOC, 10.0); m.insert(HALSTEAD_BUGS, 100.0); - assert_eq!((weighted.evaluate)(&m), 120.0); + assert_eq!((weighted.evaluate)(&m, &empty_baselines()), 120.0); } #[test] fn normalized_composite_objective() { use crate::objective::normalized; - // LLOC baseline is 5500, Halstead baseline is 80. - // Without normalization, LLOC dominates due to magnitude. - // With normalization, both contribute on a comparable scale. + // Baselines are the initial measurements. Normalization divides + // each value by its baseline, yielding a dimensionless ratio. const INPUTS: &[OptimizationObjective] = &[LLOC, HALSTEAD_BUGS]; let balanced = ObjectiveFunction { name: "balanced_quality", inputs: INPUTS, - evaluate: |m| 0.5 * normalized(&LLOC, m) + 0.5 * normalized(&HALSTEAD_BUGS, m), + evaluate: |m, b| 0.5 * normalized(&LLOC, m, b) + 0.5 * normalized(&HALSTEAD_BUGS, m, b), }; - let mut m = HashMap::new(); - m.insert(LLOC, 5500.0); // exactly at baseline → normalized = 1.0 - m.insert(HALSTEAD_BUGS, 80.0); // exactly at baseline → normalized = 1.0 - let score = (balanced.evaluate)(&m); + let mut baselines = HashMap::new(); + baselines.insert(LLOC, 5500.0); + baselines.insert(HALSTEAD_BUGS, 80.0); + + // At baseline values → normalized = 1.0 for each → score = 1.0 + let score = (balanced.evaluate)(&baselines, &baselines); assert!((score - 1.0).abs() < 1e-9, "expected 1.0, got {score}"); // 10% improvement in LLOC + let mut m = baselines.clone(); m.insert(LLOC, 4950.0); - let score2 = (balanced.evaluate)(&m); + let score2 = (balanced.evaluate)(&m, &baselines); assert!(score2 < score, "10% LLOC improvement should reduce score"); // 0.5 * (4950/5500) + 0.5 * (80/80) = 0.5 * 0.9 + 0.5 = 0.95 assert!((score2 - 0.95).abs() < 1e-9, "expected 0.95, got {score2}"); diff --git a/jolt-eval/src/objective/optimize.rs b/jolt-eval/src/objective/optimize.rs index 3d8b6d8b6..0af9be82e 100644 --- a/jolt-eval/src/objective/optimize.rs +++ b/jolt-eval/src/objective/optimize.rs @@ -79,7 +79,7 @@ pub fn auto_optimize( .status(); let baseline = env.measure(); - let baseline_score = (objective.evaluate)(&baseline); + let baseline_score = (objective.evaluate)(&baseline, &baseline); let mut best_score = baseline_score; let mut best_measurements = baseline.clone(); let mut attempts = Vec::new(); @@ -131,7 +131,7 @@ pub fn auto_optimize( }; let new_measurements = env.measure(); - let new_score = (objective.evaluate)(&new_measurements); + let new_score = (objective.evaluate)(&new_measurements, &baseline); let invariants_passed = env.check_invariants(); let improved = invariants_passed && new_score < best_score; diff --git a/jolt-eval/src/sort_e2e.rs b/jolt-eval/src/sort_e2e.rs index e6aadd919..4291c77aa 100644 --- a/jolt-eval/src/sort_e2e.rs +++ b/jolt-eval/src/sort_e2e.rs @@ -238,7 +238,7 @@ pub fn run_optimize_test( let obj = ObjectiveFunction { name: "naive_sort_time", inputs: &[NAIVE_SORT_TIME], - evaluate: |m| m.get(&NAIVE_SORT_TIME).copied().unwrap_or(f64::INFINITY), + evaluate: |m, _| m.get(&NAIVE_SORT_TIME).copied().unwrap_or(f64::INFINITY), }; let hint = hint.unwrap_or_else(|| { format!( @@ -375,7 +375,7 @@ mod tests { let obj = ObjectiveFunction { name: "naive_sort_time", inputs: &[NAIVE_SORT_TIME], - evaluate: |m| m.get(&NAIVE_SORT_TIME).copied().unwrap_or(f64::INFINITY), + evaluate: |m, _| m.get(&NAIVE_SORT_TIME).copied().unwrap_or(f64::INFINITY), }; let config = OptimizeConfig { num_iterations: 1, @@ -431,7 +431,7 @@ mod tests { let obj = ObjectiveFunction { name: "naive_sort_time", inputs: &[NAIVE_SORT_TIME], - evaluate: |m| m.get(&NAIVE_SORT_TIME).copied().unwrap_or(f64::INFINITY), + evaluate: |m, _| m.get(&NAIVE_SORT_TIME).copied().unwrap_or(f64::INFINITY), }; let config = OptimizeConfig { num_iterations: 1, From a65c10ea16dec095c51118584c0d44c04fa466b1 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Sat, 4 Apr 2026 13:35:17 -0400 Subject: [PATCH 73/86] =?UTF-8?q?cleanup(jolt-eval):=20remove=20AI=20slop?= =?UTF-8?q?=20=E2=80=94=20separators,=20duplication,=20dead=20patterns?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Delete all section separator comments (// ===, // ---, // ──) - Simplify StaticAnalysisObjective::diff_paths (all variants return the same value, no match needed) - Extract read_criterion_estimate into objective::performance with baseline parameter, removing duplicate in both binaries - Replace manual diff_paths dedup in prompt builder with objective.diff_scope() call Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/bin/measure_objectives.rs | 16 ++-------------- jolt-eval/bin/optimize.rs | 15 ++------------- jolt-eval/src/agent/tests.rs | 12 ------------ jolt-eval/src/invariant/macro_tests.rs | 6 ------ jolt-eval/src/objective/mod.rs | 7 +------ jolt-eval/src/objective/optimize.rs | 14 ++++---------- jolt-eval/src/objective/performance/mod.rs | 15 +++++++++++++++ jolt-eval/src/sort_e2e.rs | 14 -------------- 8 files changed, 24 insertions(+), 75 deletions(-) diff --git a/jolt-eval/bin/measure_objectives.rs b/jolt-eval/bin/measure_objectives.rs index 9e32ce862..c2c926166 100644 --- a/jolt-eval/bin/measure_objectives.rs +++ b/jolt-eval/bin/measure_objectives.rs @@ -1,7 +1,6 @@ -use std::path::Path; - use clap::Parser; +use jolt_eval::objective::performance::read_criterion_estimate; use jolt_eval::objective::{PerformanceObjective, StaticAnalysisObjective}; #[derive(Parser)] @@ -74,7 +73,7 @@ fn main() -> eyre::Result<()> { continue; } } - match read_criterion_estimate(p.name()) { + match read_criterion_estimate(p.name(), "new") { Some(secs) => print_row(p.name(), secs, "s"), None => { println!("{:<35} {:>15}", p.name(), "NO DATA"); @@ -108,14 +107,3 @@ fn main() -> eyre::Result<()> { Ok(()) } - -fn read_criterion_estimate(bench_name: &str) -> Option { - let path = Path::new("target/criterion") - .join(bench_name) - .join("new") - .join("estimates.json"); - let data = std::fs::read_to_string(path).ok()?; - let json: serde_json::Value = serde_json::from_str(&data).ok()?; - let nanos = json.get("mean")?.get("point_estimate")?.as_f64()?; - Some(nanos / 1e9) -} diff --git a/jolt-eval/bin/optimize.rs b/jolt-eval/bin/optimize.rs index 41773ff6b..0d3575e66 100644 --- a/jolt-eval/bin/optimize.rs +++ b/jolt-eval/bin/optimize.rs @@ -1,5 +1,4 @@ use std::collections::HashMap; -use std::path::Path; use std::process::Command; use clap::Parser; @@ -8,6 +7,7 @@ use jolt_eval::agent::ClaudeCodeAgent; use jolt_eval::invariant::JoltInvariants; use jolt_eval::objective::objective_fn::ObjectiveFunction; use jolt_eval::objective::optimize::{auto_optimize, OptimizeConfig, OptimizeEnv}; +use jolt_eval::objective::performance::read_criterion_estimate; use jolt_eval::objective::{OptimizationObjective, PerformanceObjective, StaticAnalysisObjective}; use jolt_eval::sort_e2e; @@ -82,7 +82,7 @@ impl OptimizeEnv for RealEnv { .status(); if matches!(status, Ok(s) if s.success()) { - if let Some(secs) = read_criterion_estimate(p.name()) { + if let Some(secs) = read_criterion_estimate(p.name(), "optimize") { results.insert(OptimizationObjective::Performance(p), secs); } } @@ -206,14 +206,3 @@ fn print_measurements(measurements: &HashMap) { println!(" {:<35} {:>15.6}", key.name(), val); } } - -fn read_criterion_estimate(bench_name: &str) -> Option { - let path = Path::new("target/criterion") - .join(bench_name) - .join("optimize") - .join("estimates.json"); - let data = std::fs::read_to_string(path).ok()?; - let json: serde_json::Value = serde_json::from_str(&data).ok()?; - let nanos = json.get("mean")?.get("point_estimate")?.as_f64()?; - Some(nanos / 1e9) -} diff --git a/jolt-eval/src/agent/tests.rs b/jolt-eval/src/agent/tests.rs index bb31167cd..32196e88c 100644 --- a/jolt-eval/src/agent/tests.rs +++ b/jolt-eval/src/agent/tests.rs @@ -12,9 +12,7 @@ use crate::objective::objective_fn::ObjectiveFunction; use crate::objective::optimize::{auto_optimize, OptimizeConfig, OptimizeEnv}; use crate::objective::{OptimizationObjective, HALSTEAD_BUGS, LLOC}; -// ========================================================================= // Test invariants -// ========================================================================= struct AlwaysPassInvariant; impl InvariantTargets for AlwaysPassInvariant { @@ -96,9 +94,7 @@ impl Invariant for FailsOnZeroInvariant { } } -// ========================================================================= // MockAgent tests -// ========================================================================= #[test] fn mock_always_ok_returns_text() { @@ -233,9 +229,7 @@ fn mock_with_diff() { assert!(resp.diff.unwrap().contains("+new")); } -// ========================================================================= // auto_redteam tests with MockAgent -// ========================================================================= fn envelope(analysis: &str, counterexample: impl serde::Serialize) -> String { serde_json::json!({ @@ -537,9 +531,7 @@ fn redteam_mixed_agent_responses() { } } -// ========================================================================= // AgentHarness trait object tests -// ========================================================================= #[test] fn agent_harness_is_object_safe() { @@ -637,9 +629,7 @@ fn custom_harness_plugs_into_auto_redteam() { } } -// ========================================================================= // Mock OptimizeEnv -// ========================================================================= fn lloc() -> OptimizationObjective { LLOC @@ -735,9 +725,7 @@ fn opt_config(iterations: usize) -> OptimizeConfig { } } -// ========================================================================= // auto_optimize tests -// ========================================================================= #[test] fn optimize_accepts_improvement() { diff --git a/jolt-eval/src/invariant/macro_tests.rs b/jolt-eval/src/invariant/macro_tests.rs index d87448252..f0d430e6c 100644 --- a/jolt-eval/src/invariant/macro_tests.rs +++ b/jolt-eval/src/invariant/macro_tests.rs @@ -1,8 +1,6 @@ use crate::invariant::{CheckError, Invariant, InvariantViolation}; -// --------------------------------------------------------------------------- // AlwaysPass: trivial invariant to test macro synthesis -// --------------------------------------------------------------------------- #[jolt_eval_macros::invariant(Test, Fuzz, RedTeam)] #[derive(Default)] @@ -27,9 +25,7 @@ impl Invariant for AlwaysPassInvariant { } } -// --------------------------------------------------------------------------- // BoundsCheck: uses a struct Input type -// --------------------------------------------------------------------------- #[derive( Debug, @@ -86,7 +82,5 @@ impl Invariant for BoundsCheckInvariant { } } -// =========================================================================== // The #[test] functions `seed_corpus` and `random_inputs` inside the // generated `*_synthesized` modules are auto-discovered by nextest. -// =========================================================================== diff --git a/jolt-eval/src/objective/mod.rs b/jolt-eval/src/objective/mod.rs index f40cfad92..e5a6eb872 100644 --- a/jolt-eval/src/objective/mod.rs +++ b/jolt-eval/src/objective/mod.rs @@ -63,9 +63,7 @@ pub trait Objective: Send + Sync { fn run(&self, _setup: Self::Setup) {} } -// ========================================================================= // Data-containing enums — Hash/Eq based on discriminant only -// ========================================================================= /// Static-analysis objectives. #[derive(Clone, Copy)] @@ -120,11 +118,8 @@ impl StaticAnalysisObjective { } } - /// File paths that an optimizer should be allowed to modify. pub fn diff_paths(&self) -> &'static [&'static str] { - match self { - Self::Lloc(_) | Self::CognitiveComplexity(_) | Self::HalsteadBugs(_) => &["jolt-core/"], - } + &["jolt-core/"] } } diff --git a/jolt-eval/src/objective/optimize.rs b/jolt-eval/src/objective/optimize.rs index 0af9be82e..13a8693f4 100644 --- a/jolt-eval/src/objective/optimize.rs +++ b/jolt-eval/src/objective/optimize.rs @@ -246,16 +246,10 @@ fn build_optimize_prompt( } prompt.push('\n'); - // Derive targeted reading guidance from the union of all input diff_paths. - let mut diff_paths: Vec<&str> = Vec::new(); - for input in inputs { - for &p in input.diff_paths() { - if !diff_paths.contains(&p) { - diff_paths.push(p); - } - } - } - let paths_list = diff_paths.join(", "); + let paths_list = match objective.diff_scope() { + crate::agent::DiffScope::Include(paths) => paths.join(", "), + _ => "jolt-core/".to_string(), + }; prompt.push_str("## Instructions\n\n"); prompt.push_str(&format!( "1. Read the relevant source code in: {paths_list}. Also read \ diff --git a/jolt-eval/src/objective/performance/mod.rs b/jolt-eval/src/objective/performance/mod.rs index 12df6f00c..25ba3925c 100644 --- a/jolt-eval/src/objective/performance/mod.rs +++ b/jolt-eval/src/objective/performance/mod.rs @@ -1,2 +1,17 @@ pub mod binding; pub mod prover_time; + +use std::path::Path; + +/// Read the point estimate (mean, in seconds) from Criterion's output +/// for a given benchmark and baseline name. +pub fn read_criterion_estimate(bench_name: &str, baseline: &str) -> Option { + let path = Path::new("target/criterion") + .join(bench_name) + .join(baseline) + .join("estimates.json"); + let data = std::fs::read_to_string(path).ok()?; + let json: serde_json::Value = serde_json::from_str(&data).ok()?; + let nanos = json.get("mean")?.get("point_estimate")?.as_f64()?; + Some(nanos / 1e9) +} diff --git a/jolt-eval/src/sort_e2e.rs b/jolt-eval/src/sort_e2e.rs index 4291c77aa..4ec8b9770 100644 --- a/jolt-eval/src/sort_e2e.rs +++ b/jolt-eval/src/sort_e2e.rs @@ -11,8 +11,6 @@ use crate::objective::optimize::{auto_optimize, OptimizeConfig, OptimizeEnv}; use crate::objective::{OptimizationObjective, NAIVE_SORT_TIME}; use crate::sort_targets::{candidate_sort, naive_sort}; -// ── Red-team invariant ────────────────────────────────────────────── - /// Invariant: a sort function must preserve all elements (multiset /// equality) and produce sorted output. #[jolt_eval_macros::invariant(RedTeam)] @@ -110,8 +108,6 @@ impl Invariant for NaiveSortInvariant { } } -// ── SortOptimizeEnv ───────────────────────────────────────────────── - /// An [`OptimizeEnv`] that measures wall-clock time of a sort function. /// /// `apply_diff` both applies the diff to the actual file on disk (so @@ -168,8 +164,6 @@ impl OptimizeEnv for SortOptimizeEnv { } } -// ── CLI-accessible e2e runners ────────────────────────────────────── - const SORT_TARGETS_PATH: &str = "jolt-eval/src/sort_targets.rs"; /// Run the red-team e2e test against `CandidateSortInvariant`. @@ -285,8 +279,6 @@ mod tests { use super::*; - // ── Red-team e2e (MockAgent) ──────────────────────────────────── - #[test] fn redteam_e2e_finds_sort_violation() { let invariant = CandidateSortInvariant; @@ -347,16 +339,12 @@ mod tests { } } - // ── Red-team e2e (real agent) ─────────────────────────────────── - #[test] #[ignore] // Requires Claude API access fn redteam_e2e_real_agent() { run_redteam_test("claude-sonnet-4-20250514", 10, 5, None, false); } - // ── Optimize e2e (MockAgent) ──────────────────────────────────── - #[test] fn optimize_e2e_sort_improves() { let agent = MockAgent::from_responses(vec![Ok(AgentResponse { @@ -443,8 +431,6 @@ mod tests { assert!(!result.attempts[0].invariants_passed); } - // ── Optimize e2e (real agent) ─────────────────────────────────── - #[test] #[ignore] // Requires Claude API access fn optimize_e2e_real_agent() { From d40acab625653827b6f3320ef78edd08681bd1cf Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Sat, 4 Apr 2026 13:48:52 -0400 Subject: [PATCH 74/86] feat(jolt-eval): add --measure flag, subprocess-based sort measurement The SortOptimizeEnv previously used an in-process function pointer swap to simulate optimization (can't recompile at runtime). Now it shells out to `cargo run --bin optimize -- --measure`, which recompiles the modified sort_targets.rs and measures the actual modified code. - Add --measure flag to optimize binary: measures objective inputs and prints JSON to stdout - Register MINIMIZE_NAIVE_SORT_TIME as a proper ObjectiveFunction - Add measure_perf() helper for per-variant perf measurement (Criterion for bindings, direct timing for NaiveSortTime) - SortOptimizeEnv::measure() shells out to the binary - SortOptimizeEnv::apply_diff() just patches the file (no fn swap) - Replace mock tests with MockEnv (predetermined measurements) Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/bin/optimize.rs | 60 ++++++ jolt-eval/src/objective/objective_fn/mod.rs | 11 +- jolt-eval/src/sort_e2e.rs | 196 +++++++++++--------- 3 files changed, 181 insertions(+), 86 deletions(-) diff --git a/jolt-eval/bin/optimize.rs b/jolt-eval/bin/optimize.rs index 0d3575e66..d87792486 100644 --- a/jolt-eval/bin/optimize.rs +++ b/jolt-eval/bin/optimize.rs @@ -23,6 +23,11 @@ struct Cli { #[arg(long, conflicts_with = "objective")] test: bool, + /// Measure the objective's inputs and print JSON to stdout, then exit. + /// Requires --objective. Useful for subprocess-based measurement. + #[arg(long, requires = "objective")] + measure: bool, + /// List all available objective functions and exit. #[arg(long)] list: bool, @@ -155,6 +160,14 @@ fn main() -> eyre::Result<()> { }); let repo_dir = std::env::current_dir()?; + + if cli.measure { + let measurements = measure_inputs(objective, &repo_dir); + let json = serde_json::to_string(&measurements).unwrap(); + println!("{json}"); + return Ok(()); + } + let bench_perf = objective.inputs.iter().any(|i| i.is_perf()); let invariants = JoltInvariants::all(); @@ -206,3 +219,50 @@ fn print_measurements(measurements: &HashMap) { println!(" {:<35} {:>15.6}", key.name(), val); } } + +/// Measure just the inputs of an objective function and return a JSON- +/// serializable map of `name -> value`. +fn measure_inputs( + objective: &ObjectiveFunction, + repo_dir: &std::path::Path, +) -> serde_json::Map { + let mut out = serde_json::Map::new(); + + for &input in objective.inputs { + let value = match input { + OptimizationObjective::StaticAnalysis(sa) => sa.collect_measurement().ok(), + OptimizationObjective::Performance(p) => measure_perf(&p, repo_dir), + }; + if let Some(v) = value { + out.insert(input.name().to_string(), serde_json::Value::from(v)); + } + } + + out +} + +fn measure_perf(p: &PerformanceObjective, repo_dir: &std::path::Path) -> Option { + match p { + PerformanceObjective::BindLowToHigh(_) | PerformanceObjective::BindHighToLow(_) => { + let _ = Command::new("cargo") + .current_dir(repo_dir) + .args([ + "bench", + "-p", + "jolt-eval", + "--bench", + p.name(), + "--", + "--quick", + ]) + .status(); + read_criterion_estimate(p.name(), "new") + } + PerformanceObjective::NaiveSortTime => { + let mut data: Vec = (0..5000i32).rev().collect(); + let start = std::time::Instant::now(); + jolt_eval::sort_targets::naive_sort(&mut data); + Some(start.elapsed().as_secs_f64()) + } + } +} diff --git a/jolt-eval/src/objective/objective_fn/mod.rs b/jolt-eval/src/objective/objective_fn/mod.rs index 6f6373ef7..9864a6c49 100644 --- a/jolt-eval/src/objective/objective_fn/mod.rs +++ b/jolt-eval/src/objective/objective_fn/mod.rs @@ -4,7 +4,7 @@ use crate::agent::DiffScope; use super::{ OptimizationObjective, BIND_HIGH_TO_LOW, BIND_LOW_TO_HIGH, COGNITIVE_COMPLEXITY, HALSTEAD_BUGS, - LLOC, + LLOC, NAIVE_SORT_TIME, }; /// A concrete objective function that the optimizer minimizes. @@ -34,6 +34,7 @@ impl ObjectiveFunction { MINIMIZE_HALSTEAD_BUGS, MINIMIZE_BIND_LOW_TO_HIGH, MINIMIZE_BIND_HIGH_TO_LOW, + MINIMIZE_NAIVE_SORT_TIME, ] } @@ -91,6 +92,12 @@ pub const MINIMIZE_BIND_HIGH_TO_LOW: ObjectiveFunction = ObjectiveFunction { evaluate: |m, _| m.get(&BIND_HIGH_TO_LOW).copied().unwrap_or(f64::INFINITY), }; +pub const MINIMIZE_NAIVE_SORT_TIME: ObjectiveFunction = ObjectiveFunction { + name: "minimize_naive_sort_time", + inputs: &[NAIVE_SORT_TIME], + evaluate: |m, _| m.get(&NAIVE_SORT_TIME).copied().unwrap_or(f64::INFINITY), +}; + #[cfg(test)] mod tests { use super::*; @@ -128,7 +135,7 @@ mod tests { #[test] fn all_returns_expected_count() { - assert_eq!(ObjectiveFunction::all().len(), 5); + assert_eq!(ObjectiveFunction::all().len(), 6); } #[test] diff --git a/jolt-eval/src/sort_e2e.rs b/jolt-eval/src/sort_e2e.rs index 4ec8b9770..4301d5983 100644 --- a/jolt-eval/src/sort_e2e.rs +++ b/jolt-eval/src/sort_e2e.rs @@ -108,60 +108,83 @@ impl Invariant for NaiveSortInvariant { } } -/// An [`OptimizeEnv`] that measures wall-clock time of a sort function. -/// -/// `apply_diff` both applies the diff to the actual file on disk (so -/// git can track and commit it) and swaps the in-process function -/// pointer (so `measure` reflects the improvement without recompiling). +/// An [`OptimizeEnv`] that measures the sort objective by shelling out +/// to `cargo run --bin optimize -- --measure`, which recompiles and +/// runs the (potentially modified) `sort_targets::naive_sort`. pub struct SortOptimizeEnv { - pub(crate) sort_fn: fn(&mut [i32]), - data: Vec, - invariant_ok: bool, repo_dir: std::path::PathBuf, + last_invariant_ok: bool, } impl SortOptimizeEnv { - pub fn new(data_size: usize, repo_dir: &std::path::Path) -> Self { - let data: Vec = (0..data_size as i32).rev().collect(); + pub fn new(repo_dir: &std::path::Path) -> Self { Self { - sort_fn: naive_sort, - data, - invariant_ok: true, repo_dir: repo_dir.to_path_buf(), + last_invariant_ok: true, } } } impl OptimizeEnv for SortOptimizeEnv { fn measure(&mut self) -> HashMap { - let mut buf = self.data.clone(); - let start = std::time::Instant::now(); - (self.sort_fn)(&mut buf); - let elapsed = start.elapsed().as_secs_f64(); - - self.invariant_ok = buf.windows(2).all(|w| w[0] <= w[1]); + let output = std::process::Command::new("cargo") + .current_dir(&self.repo_dir) + .args([ + "run", + "--release", + "-p", + "jolt-eval", + "--bin", + "optimize", + "--", + "--objective", + "minimize_naive_sort_time", + "--measure", + ]) + .output(); let mut m = HashMap::new(); - m.insert(NAIVE_SORT_TIME, elapsed); + match output { + Ok(out) if out.status.success() => { + let stdout = String::from_utf8_lossy(&out.stdout); + if let Ok(json) = + serde_json::from_str::>(&stdout) + { + for (key, val) in &json { + if let Some(v) = val.as_f64() { + // Match the key back to an OptimizationObjective. + if key == "naive_sort_time" { + m.insert(NAIVE_SORT_TIME, v); + } + } + } + } + // Check invariant: sort must produce sorted output. + // The measurement binary already ran the sort successfully, + // so we just verify the output is sorted by running it locally + // (cheap, since it's already compiled). + let mut buf: Vec = (0..5000i32).rev().collect(); + naive_sort(&mut buf); + self.last_invariant_ok = buf.windows(2).all(|w| w[0] <= w[1]); + } + _ => { + self.last_invariant_ok = false; + } + } m } fn check_invariants(&mut self) -> bool { - self.invariant_ok + self.last_invariant_ok } fn apply_diff(&mut self, diff: &str) { - // Apply to the actual file so git can track and commit the change. let _ = crate::agent::apply_diff(&self.repo_dir, diff); - // Simulate the optimization in-process (can't recompile at runtime). - self.sort_fn = |d: &mut [i32]| d.sort(); } fn accept(&mut self, _iteration: usize) {} - fn reject(&mut self) { - self.sort_fn = naive_sort; - } + fn reject(&mut self) {} } const SORT_TARGETS_PATH: &str = "jolt-eval/src/sort_targets.rs"; @@ -223,14 +246,13 @@ pub fn run_optimize_test( let agent = ClaudeCodeAgent::new(model, max_turns); let repo_dir = std::env::current_dir().expect("current dir"); - let mut env = SortOptimizeEnv::new(5000, &repo_dir); + let mut env = SortOptimizeEnv::new(&repo_dir); let baseline = env.measure(); let baseline_time = baseline[&NAIVE_SORT_TIME]; - env.sort_fn = naive_sort; let obj = ObjectiveFunction { - name: "naive_sort_time", + name: "minimize_naive_sort_time", inputs: &[NAIVE_SORT_TIME], evaluate: |m, _| m.get(&NAIVE_SORT_TIME).copied().unwrap_or(f64::INFINITY), }; @@ -346,91 +368,97 @@ mod tests { } #[test] - fn optimize_e2e_sort_improves() { + fn optimize_e2e_sort_accepts_improvement() { + use crate::objective::objective_fn::MINIMIZE_NAIVE_SORT_TIME; + let agent = MockAgent::from_responses(vec![Ok(AgentResponse { text: "Replaced bubble sort with merge sort".into(), diff: Some("--- a/sort.rs\n+++ b/sort.rs\n-bubble\n+merge".into()), })]); - let mut env = SortOptimizeEnv::new(5000, Path::new("/tmp")); - - let baseline = env.measure(); - let baseline_time = baseline[&NAIVE_SORT_TIME]; - assert!(baseline_time > 0.0); - - env.sort_fn = naive_sort; - - let obj = ObjectiveFunction { - name: "naive_sort_time", - inputs: &[NAIVE_SORT_TIME], - evaluate: |m, _| m.get(&NAIVE_SORT_TIME).copied().unwrap_or(f64::INFINITY), + // Predetermined measurements: baseline 0.01s, after optimization 0.0001s. + let mut mock = MockEnv { + measurements: vec![ + HashMap::from([(NAIVE_SORT_TIME, 0.01)]), + HashMap::from([(NAIVE_SORT_TIME, 0.0001)]), + ], + index: 0, + invariant_ok: true, }; + let config = OptimizeConfig { num_iterations: 1, ..Default::default() }; - let result = auto_optimize(&agent, &mut env, &obj, &config, Path::new("/tmp")); - - assert!( - result.best_score < baseline_time, - "expected improvement: baseline={baseline_time:.6}, best={:.6}", - result.best_score + let result = auto_optimize( + &agent, + &mut mock, + &MINIMIZE_NAIVE_SORT_TIME, + &config, + Path::new("/tmp"), ); + assert_eq!(result.attempts.len(), 1); assert!(result.attempts[0].invariants_passed); + assert!(result.best_score < 0.01); } #[test] - fn optimize_e2e_sort_rejects_broken_optimization() { + fn optimize_e2e_sort_rejects_broken() { + use crate::objective::objective_fn::MINIMIZE_NAIVE_SORT_TIME; + let agent = MockAgent::from_responses(vec![Ok(AgentResponse { - text: "Removed sorting entirely for speed".into(), + text: "Removed sorting entirely".into(), diff: Some("--- a/sort.rs\n+++ b/sort.rs\n-sort\n+noop".into()), })]); - let env = SortOptimizeEnv::new(100, Path::new("/tmp")); - - struct BrokenSortEnv(SortOptimizeEnv); - - impl OptimizeEnv for BrokenSortEnv { - fn measure(&mut self) -> HashMap { - self.0.measure() - } - fn check_invariants(&mut self) -> bool { - self.0.check_invariants() - } - fn apply_diff(&mut self, _diff: &str) { - self.0.sort_fn = |d: &mut [i32]| { - if d.len() > 1 { - d.swap(0, d.len() - 1); - } - }; - } - fn accept(&mut self, i: usize) { - self.0.accept(i); - } - fn reject(&mut self) { - self.0.reject(); - } - } - - let mut broken_env = BrokenSortEnv(env); - - let obj = ObjectiveFunction { - name: "naive_sort_time", - inputs: &[NAIVE_SORT_TIME], - evaluate: |m, _| m.get(&NAIVE_SORT_TIME).copied().unwrap_or(f64::INFINITY), + let mut mock = MockEnv { + measurements: vec![ + HashMap::from([(NAIVE_SORT_TIME, 0.01)]), + HashMap::from([(NAIVE_SORT_TIME, 0.0001)]), + ], + index: 0, + invariant_ok: false, }; + let config = OptimizeConfig { num_iterations: 1, ..Default::default() }; - let result = auto_optimize(&agent, &mut broken_env, &obj, &config, Path::new("/tmp")); + let result = auto_optimize( + &agent, + &mut mock, + &MINIMIZE_NAIVE_SORT_TIME, + &config, + Path::new("/tmp"), + ); assert!(!result.attempts[0].invariants_passed); } + /// Simple mock env for unit tests (no subprocess, no recompilation). + struct MockEnv { + measurements: Vec>, + index: usize, + invariant_ok: bool, + } + + impl OptimizeEnv for MockEnv { + fn measure(&mut self) -> HashMap { + let idx = self.index.min(self.measurements.len() - 1); + self.index += 1; + self.measurements[idx].clone() + } + fn check_invariants(&mut self) -> bool { + self.invariant_ok + } + fn apply_diff(&mut self, _: &str) {} + fn accept(&mut self, _: usize) {} + fn reject(&mut self) {} + } + #[test] #[ignore] // Requires Claude API access fn optimize_e2e_real_agent() { From c16ad58539a0cd938a77e45f0b6d0350d23169ca Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Sat, 4 Apr 2026 14:18:40 -0400 Subject: [PATCH 75/86] refactor(jolt-eval): make naive_sort a proper Criterion benchmark MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the NaiveSortTime fieldless variant and SortOptimizeEnv subprocess hack with a standard Criterion-based performance objective: - Add NaiveSortObjective in sort_targets.rs implementing Objective with setup()/run() — same pattern as BindLowToHighObjective - Add benches/naive_sort_time.rs using bench_objective! macro - NaiveSortTime now wraps NaiveSortObjective (data-carrying variant) - Include NaiveSortTime in PerformanceObjective::all() - Delete SortOptimizeEnv, run_optimize_test, and mock optimize tests from sort_e2e.rs (redundant with agent/tests.rs) - Rewrite --test in optimize binary to use RealEnv directly - RealEnv::measure() handles naive_sort via cargo bench like all other perf objectives — no special-casing Co-Authored-By: Claude Opus 4.6 (1M context) --- Cargo.lock | 304 ++++++++++++++++++++++++++- jolt-eval/Cargo.toml | 4 + jolt-eval/benches/naive_sort_time.rs | 2 + jolt-eval/bin/optimize.rs | 120 +++++------ jolt-eval/src/objective/mod.rs | 22 +- jolt-eval/src/sort_e2e.rs | 243 +-------------------- jolt-eval/src/sort_targets.rs | 32 +++ 7 files changed, 407 insertions(+), 320 deletions(-) create mode 100644 jolt-eval/benches/naive_sort_time.rs diff --git a/Cargo.lock b/Cargo.lock index fb2cca639..7e070baa4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -65,6 +65,15 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "aho-corasick" +version = "0.7.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" +dependencies = [ + "memchr", +] + [[package]] name = "aho-corasick" version = "1.1.4" @@ -564,6 +573,15 @@ version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" +[[package]] +name = "arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1" +dependencies = [ + "derive_arbitrary", +] + [[package]] name = "ark-bn254" version = "0.5.0" @@ -1668,6 +1686,17 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "derive_arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "derive_more" version = "2.1.1" @@ -1884,6 +1913,27 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "enumset" +version = "1.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25b07a8dfbbbfc0064c0a6bdf9edcf966de6b1c33ce344bdeca3b41615452634" +dependencies = [ + "enumset_derive", +] + +[[package]] +name = "enumset_derive" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f43e744e4ea338060faee68ed933e46e722fb7f3617e722a5772d7e856d8b3ce" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "env_filter" version = "1.0.0" @@ -2119,6 +2169,15 @@ dependencies = [ "slab", ] +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + [[package]] name = "generic-array" version = "0.14.7" @@ -2623,7 +2682,7 @@ dependencies = [ "jolt-optimizations", "memory-stats", "num", - "num-derive", + "num-derive 0.4.2", "num-traits", "postcard", "pprof", @@ -2645,6 +2704,42 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "jolt-eval" +version = "0.1.0" +dependencies = [ + "arbitrary", + "ark-bn254", + "clap", + "common", + "criterion", + "enumset", + "eyre", + "jolt-core", + "jolt-eval-macros", + "jolt-inlines-secp256k1", + "jolt-inlines-sha2", + "postcard", + "rand 0.8.5", + "rust-code-analysis", + "schemars 0.8.22", + "serde", + "serde_json", + "tempfile", + "tracer", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "jolt-eval-macros" +version = "0.1.0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "jolt-field" version = "0.1.0" @@ -3190,6 +3285,17 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" +[[package]] +name = "num-derive" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "876a53fff98e03a936a674b29568b0e605f06b29372c2489ff4de23f1949743d" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "num-derive" version = "0.4.2" @@ -4141,7 +4247,7 @@ version = "1.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" dependencies = [ - "aho-corasick", + "aho-corasick 1.1.4", "memchr", "regex-automata", "regex-syntax", @@ -4153,7 +4259,7 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" dependencies = [ - "aho-corasick", + "aho-corasick 1.1.4", "memchr", "regex-syntax", ] @@ -4372,6 +4478,35 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48fd7bd8a6377e15ad9d42a8ec25371b94ddc67abe7c8b9127bec79bebaaae18" +[[package]] +name = "rust-code-analysis" +version = "0.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92a0f85e044428a7b58538f95fa58a157d89d5bcc5b37df6e7024957e52bdc5a" +dependencies = [ + "aho-corasick 0.7.20", + "fxhash", + "lazy_static", + "num", + "num-derive 0.3.3", + "num-format", + "num-traits", + "petgraph", + "regex", + "serde", + "termcolor", + "tree-sitter", + "tree-sitter-ccomment", + "tree-sitter-java", + "tree-sitter-javascript", + "tree-sitter-mozcpp", + "tree-sitter-mozjs", + "tree-sitter-preproc", + "tree-sitter-python", + "tree-sitter-rust", + "tree-sitter-typescript", +] + [[package]] name = "rustc-demangle" version = "0.1.27" @@ -4463,6 +4598,13 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "sandbox-guest" +version = "0.1.0" +dependencies = [ + "jolt-sdk", +] + [[package]] name = "scc" version = "2.4.0" @@ -4472,6 +4614,18 @@ dependencies = [ "sdd", ] +[[package]] +name = "schemars" +version = "0.8.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615" +dependencies = [ + "dyn-clone", + "schemars_derive", + "serde", + "serde_json", +] + [[package]] name = "schemars" version = "0.9.0" @@ -4496,6 +4650,18 @@ dependencies = [ "serde_json", ] +[[package]] +name = "schemars_derive" +version = "0.8.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32e265784ad618884abaea0600a9adf15393368d840e0222d101a072f3f7534d" +dependencies = [ + "proc-macro2", + "quote", + "serde_derive_internals", + "syn 2.0.117", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -4638,6 +4804,17 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "serde_derive_internals" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "serde_json" version = "1.0.149" @@ -5158,6 +5335,15 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + [[package]] name = "text-block-macros" version = "0.2.0" @@ -5397,6 +5583,118 @@ dependencies = [ "tracing-log", ] +[[package]] +name = "tree-sitter" +version = "0.19.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f41201fed3db3b520405a9c01c61773a250d4c3f43e9861c14b2bb232c981ab" +dependencies = [ + "cc", + "regex", +] + +[[package]] +name = "tree-sitter-ccomment" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3b402bc539927bb457e5ab59aac7260e2c3b97c5fcfc043575788654eedd69a" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-cpp" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7bd90c7b7db59369ed00fbc40458d9c9b2b8ed145640e337e839ac07aa63e15" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-java" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "301ae2ee7813e1bf935dc06db947642400645bbea8878431e1b31131488d5430" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-javascript" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "840bb4d5f3c384cb76b976ff07297f5a24b6e61a708baa4464f53e395caaa5f9" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-mozcpp" +version = "0.19.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5439f32b7685af19efcd0165d28dab80261e1cc922ed259c9c7909c96ac4cc6" +dependencies = [ + "cc", + "tree-sitter", + "tree-sitter-cpp", +] + +[[package]] +name = "tree-sitter-mozjs" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "def6b21c10157d3d79b912191fa4549008885da827451a62be9f30abeb7319c8" +dependencies = [ + "cc", + "tree-sitter", + "tree-sitter-javascript", +] + +[[package]] +name = "tree-sitter-preproc" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "226b2a77578e83efa7a193919660ffc88c22e357f9c2d9f27b5b11898a8682d3" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-python" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5646bfe71c4eb1c21b714ce0c38334c311eab767095582859e85da6281e9fd6c" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-rust" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "784f7ef9cdbd4c895dc2d4bb785e95b4a5364a602eec803681db83d1927ddf15" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-typescript" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3f62d49c6e56bf291c412ee5e178ea14dff40f14a5f01a8847933f56d65bf3b" +dependencies = [ + "cc", + "tree-sitter", +] + [[package]] name = "twox-hash" version = "2.1.2" diff --git a/jolt-eval/Cargo.toml b/jolt-eval/Cargo.toml index 606e2f9f0..4aa2ff1f7 100644 --- a/jolt-eval/Cargo.toml +++ b/jolt-eval/Cargo.toml @@ -37,6 +37,10 @@ criterion = { workspace = true } name = "bind_parallel_high_to_low" harness = false +[[bench]] +name = "naive_sort_time" +harness = false + [[bench]] name = "bind_parallel_low_to_high" harness = false diff --git a/jolt-eval/benches/naive_sort_time.rs b/jolt-eval/benches/naive_sort_time.rs new file mode 100644 index 000000000..e6387121e --- /dev/null +++ b/jolt-eval/benches/naive_sort_time.rs @@ -0,0 +1,2 @@ +use jolt_eval::sort_targets::NaiveSortObjective; +jolt_eval::bench_objective!(NaiveSortObjective); diff --git a/jolt-eval/bin/optimize.rs b/jolt-eval/bin/optimize.rs index d87792486..bd42c537c 100644 --- a/jolt-eval/bin/optimize.rs +++ b/jolt-eval/bin/optimize.rs @@ -9,7 +9,6 @@ use jolt_eval::objective::objective_fn::ObjectiveFunction; use jolt_eval::objective::optimize::{auto_optimize, OptimizeConfig, OptimizeEnv}; use jolt_eval::objective::performance::read_criterion_estimate; use jolt_eval::objective::{OptimizationObjective, PerformanceObjective, StaticAnalysisObjective}; -use jolt_eval::sort_e2e; #[derive(Parser)] #[command(name = "optimize")] @@ -135,13 +134,51 @@ fn main() -> eyre::Result<()> { } if cli.test { - sort_e2e::run_optimize_test( - &cli.model, - cli.max_turns, - cli.iterations, - cli.hint, - cli.verbose, + const SORT_TARGETS_PATH: &str = "jolt-eval/src/sort_targets.rs"; + let objective = ObjectiveFunction::by_name("minimize_naive_sort_time").unwrap(); + let repo_dir = std::env::current_dir()?; + let invariants = JoltInvariants::all(); + let mut env = RealEnv { + repo_dir: repo_dir.clone(), + invariants, + bench_perf: true, + }; + let baseline = env.measure(); + let baseline_score = (objective.evaluate)(&baseline, &baseline); + let hint = cli.hint.unwrap_or_else(|| { + format!( + "The target is the `naive_sort` function in {SORT_TARGETS_PATH}. \ + Replace it with a faster sorting algorithm. \ + You MAY modify that file for this task." + ) + }); + let config = OptimizeConfig { + num_iterations: cli.iterations, + hint: Some(hint), + verbose: cli.verbose, + }; + println!("=== Optimize e2e: naive bubble sort ==="); + println!( + "model={}, max_turns={}, iterations={}", + cli.model, cli.max_turns, cli.iterations ); + println!("Baseline sort time: {baseline_score:.6}s"); + println!(); + let agent = ClaudeCodeAgent::new(&cli.model, cli.max_turns); + let result = auto_optimize(&agent, &mut env, objective, &config, &repo_dir); + println!("Best score: {:.6}s", result.best_score); + println!( + "Improvement: {:.1}%", + (1.0 - result.best_score / baseline_score) * 100.0 + ); + for (i, a) in result.attempts.iter().enumerate() { + println!( + " attempt {}: score={:.6}, invariants={}", + i + 1, + a.score, + a.invariants_passed + ); + } return Ok(()); } @@ -161,13 +198,6 @@ fn main() -> eyre::Result<()> { let repo_dir = std::env::current_dir()?; - if cli.measure { - let measurements = measure_inputs(objective, &repo_dir); - let json = serde_json::to_string(&measurements).unwrap(); - println!("{json}"); - return Ok(()); - } - let bench_perf = objective.inputs.iter().any(|i| i.is_perf()); let invariants = JoltInvariants::all(); @@ -177,10 +207,21 @@ fn main() -> eyre::Result<()> { bench_perf, }; - println!("=== Baseline ==="); let baseline = env.measure(); - let baseline_score = (objective.evaluate)(&baseline, &baseline); + + if cli.measure { + let named: HashMap = baseline + .iter() + .map(|(k, &v)| (k.name().to_string(), v)) + .collect(); + let json = serde_json::to_string(&named).unwrap(); + println!("{json}"); + return Ok(()); + } + + println!("=== Baseline ==="); print_measurements(&baseline); + let baseline_score = (objective.evaluate)(&baseline, &baseline); println!("Objective: {} = {:.6}\n", objective.name, baseline_score); let agent = ClaudeCodeAgent::new(&cli.model, cli.max_turns); @@ -219,50 +260,3 @@ fn print_measurements(measurements: &HashMap) { println!(" {:<35} {:>15.6}", key.name(), val); } } - -/// Measure just the inputs of an objective function and return a JSON- -/// serializable map of `name -> value`. -fn measure_inputs( - objective: &ObjectiveFunction, - repo_dir: &std::path::Path, -) -> serde_json::Map { - let mut out = serde_json::Map::new(); - - for &input in objective.inputs { - let value = match input { - OptimizationObjective::StaticAnalysis(sa) => sa.collect_measurement().ok(), - OptimizationObjective::Performance(p) => measure_perf(&p, repo_dir), - }; - if let Some(v) = value { - out.insert(input.name().to_string(), serde_json::Value::from(v)); - } - } - - out -} - -fn measure_perf(p: &PerformanceObjective, repo_dir: &std::path::Path) -> Option { - match p { - PerformanceObjective::BindLowToHigh(_) | PerformanceObjective::BindHighToLow(_) => { - let _ = Command::new("cargo") - .current_dir(repo_dir) - .args([ - "bench", - "-p", - "jolt-eval", - "--bench", - p.name(), - "--", - "--quick", - ]) - .status(); - read_criterion_estimate(p.name(), "new") - } - PerformanceObjective::NaiveSortTime => { - let mut data: Vec = (0..5000i32).rev().collect(); - let start = std::time::Instant::now(); - jolt_eval::sort_targets::naive_sort(&mut data); - Some(start.elapsed().as_secs_f64()) - } - } -} diff --git a/jolt-eval/src/objective/mod.rs b/jolt-eval/src/objective/mod.rs index e5a6eb872..c6ba91da8 100644 --- a/jolt-eval/src/objective/mod.rs +++ b/jolt-eval/src/objective/mod.rs @@ -140,16 +140,15 @@ impl Hash for StaticAnalysisObjective { pub enum PerformanceObjective { BindLowToHigh(performance::binding::BindLowToHighObjective), BindHighToLow(performance::binding::BindHighToLowObjective), - /// Wall-clock time of `naive_sort` — used by the e2e sort test. - NaiveSortTime, + NaiveSortTime(crate::sort_targets::NaiveSortObjective), } impl PerformanceObjective { - /// Criterion-benchmarked objectives (excludes test-only variants). pub fn all() -> Vec { vec![ Self::BindLowToHigh(performance::binding::BindLowToHighObjective), Self::BindHighToLow(performance::binding::BindHighToLowObjective), + Self::NaiveSortTime(crate::sort_targets::NaiveSortObjective), ] } @@ -157,7 +156,7 @@ impl PerformanceObjective { match self { Self::BindLowToHigh(o) => o.name(), Self::BindHighToLow(o) => o.name(), - Self::NaiveSortTime => "naive_sort_time", + Self::NaiveSortTime(o) => o.name(), } } @@ -165,7 +164,7 @@ impl PerformanceObjective { match self { Self::BindLowToHigh(o) => o.units(), Self::BindHighToLow(o) => o.units(), - Self::NaiveSortTime => Some("s"), + Self::NaiveSortTime(o) => o.units(), } } @@ -173,16 +172,14 @@ impl PerformanceObjective { match self { Self::BindLowToHigh(o) => o.description(), Self::BindHighToLow(o) => o.description(), - Self::NaiveSortTime => { - "Wall-clock time of the naive_sort function in jolt-eval/src/sort_targets.rs" - } + Self::NaiveSortTime(o) => o.description(), } } pub fn diff_paths(&self) -> &'static [&'static str] { match self { Self::BindLowToHigh(_) | Self::BindHighToLow(_) => &["jolt-core/"], - Self::NaiveSortTime => &["jolt-eval/src/sort_targets.rs"], + Self::NaiveSortTime(_) => &["jolt-eval/src/sort_targets.rs"], } } } @@ -211,8 +208,9 @@ pub use code_quality::cognitive::COGNITIVE_COMPLEXITY; pub use code_quality::halstead_bugs::HALSTEAD_BUGS; pub use code_quality::lloc::LLOC; pub use performance::binding::{BIND_HIGH_TO_LOW, BIND_LOW_TO_HIGH}; -pub const NAIVE_SORT_TIME: OptimizationObjective = - OptimizationObjective::Performance(PerformanceObjective::NaiveSortTime); +pub const NAIVE_SORT_TIME: OptimizationObjective = OptimizationObjective::Performance( + PerformanceObjective::NaiveSortTime(crate::sort_targets::NaiveSortObjective), +); impl OptimizationObjective { pub fn all(root: &Path) -> Vec { @@ -372,7 +370,7 @@ mod tests { .parent() .unwrap(); let all = OptimizationObjective::all(root); - assert_eq!(all.len(), 5); // 3 static + 2 perf + assert_eq!(all.len(), 6); // 3 static + 3 perf assert!(all.iter().any(|o| o.is_perf())); assert!(all.iter().any(|o| !o.is_perf())); } diff --git a/jolt-eval/src/sort_e2e.rs b/jolt-eval/src/sort_e2e.rs index 4301d5983..ddacc98fd 100644 --- a/jolt-eval/src/sort_e2e.rs +++ b/jolt-eval/src/sort_e2e.rs @@ -1,14 +1,9 @@ //! End-to-end test harnesses for the optimization and red-team loops, //! using simple sorting functions as the target domain. -use std::collections::HashMap; - use crate::agent::ClaudeCodeAgent; use crate::invariant::synthesis::redteam::{auto_redteam, RedTeamConfig, RedTeamResult}; use crate::invariant::{CheckError, Invariant, InvariantViolation}; -use crate::objective::objective_fn::ObjectiveFunction; -use crate::objective::optimize::{auto_optimize, OptimizeConfig, OptimizeEnv}; -use crate::objective::{OptimizationObjective, NAIVE_SORT_TIME}; use crate::sort_targets::{candidate_sort, naive_sort}; /// Invariant: a sort function must preserve all elements (multiset @@ -108,87 +103,6 @@ impl Invariant for NaiveSortInvariant { } } -/// An [`OptimizeEnv`] that measures the sort objective by shelling out -/// to `cargo run --bin optimize -- --measure`, which recompiles and -/// runs the (potentially modified) `sort_targets::naive_sort`. -pub struct SortOptimizeEnv { - repo_dir: std::path::PathBuf, - last_invariant_ok: bool, -} - -impl SortOptimizeEnv { - pub fn new(repo_dir: &std::path::Path) -> Self { - Self { - repo_dir: repo_dir.to_path_buf(), - last_invariant_ok: true, - } - } -} - -impl OptimizeEnv for SortOptimizeEnv { - fn measure(&mut self) -> HashMap { - let output = std::process::Command::new("cargo") - .current_dir(&self.repo_dir) - .args([ - "run", - "--release", - "-p", - "jolt-eval", - "--bin", - "optimize", - "--", - "--objective", - "minimize_naive_sort_time", - "--measure", - ]) - .output(); - - let mut m = HashMap::new(); - match output { - Ok(out) if out.status.success() => { - let stdout = String::from_utf8_lossy(&out.stdout); - if let Ok(json) = - serde_json::from_str::>(&stdout) - { - for (key, val) in &json { - if let Some(v) = val.as_f64() { - // Match the key back to an OptimizationObjective. - if key == "naive_sort_time" { - m.insert(NAIVE_SORT_TIME, v); - } - } - } - } - // Check invariant: sort must produce sorted output. - // The measurement binary already ran the sort successfully, - // so we just verify the output is sorted by running it locally - // (cheap, since it's already compiled). - let mut buf: Vec = (0..5000i32).rev().collect(); - naive_sort(&mut buf); - self.last_invariant_ok = buf.windows(2).all(|w| w[0] <= w[1]); - } - _ => { - self.last_invariant_ok = false; - } - } - m - } - - fn check_invariants(&mut self) -> bool { - self.last_invariant_ok - } - - fn apply_diff(&mut self, diff: &str) { - let _ = crate::agent::apply_diff(&self.repo_dir, diff); - } - - fn accept(&mut self, _iteration: usize) {} - - fn reject(&mut self) {} -} - -const SORT_TARGETS_PATH: &str = "jolt-eval/src/sort_targets.rs"; - /// Run the red-team e2e test against `CandidateSortInvariant`. pub fn run_redteam_test( model: &str, @@ -235,69 +149,12 @@ pub fn run_redteam_test( } } -/// Run the optimization e2e test against the naive bubble sort. -pub fn run_optimize_test( - model: &str, - max_turns: usize, - iterations: usize, - hint: Option, - verbose: bool, -) { - let agent = ClaudeCodeAgent::new(model, max_turns); - let repo_dir = std::env::current_dir().expect("current dir"); - - let mut env = SortOptimizeEnv::new(&repo_dir); - - let baseline = env.measure(); - let baseline_time = baseline[&NAIVE_SORT_TIME]; - - let obj = ObjectiveFunction { - name: "minimize_naive_sort_time", - inputs: &[NAIVE_SORT_TIME], - evaluate: |m, _| m.get(&NAIVE_SORT_TIME).copied().unwrap_or(f64::INFINITY), - }; - let hint = hint.unwrap_or_else(|| { - format!( - "The target is the `naive_sort` function in {SORT_TARGETS_PATH}. \ - Replace it with a faster sorting algorithm. \ - You MAY modify that file for this task." - ) - }); - let config = OptimizeConfig { - num_iterations: iterations, - hint: Some(hint), - verbose, - }; - - println!("=== Optimize e2e: naive bubble sort ==="); - println!("model={model}, max_turns={max_turns}, iterations={iterations}"); - println!("Baseline sort time: {baseline_time:.6}s"); - println!(); - - let result = auto_optimize(&agent, &mut env, &obj, &config, &repo_dir); - - println!("Best score: {:.6}s", result.best_score); - println!( - "Improvement: {:.1}%", - (1.0 - result.best_score / baseline_time) * 100.0 - ); - for (i, a) in result.attempts.iter().enumerate() { - println!( - " attempt {}: score={:.6}, invariants={}", - i + 1, - a.score, - a.invariants_passed - ); - } -} - #[cfg(test)] mod tests { use std::path::Path; - use crate::agent::{AgentResponse, MockAgent}; + use crate::agent::MockAgent; use crate::invariant::synthesis::redteam::{auto_redteam, RedTeamConfig, RedTeamResult}; - use crate::objective::optimize::OptimizeEnv; use super::*; @@ -366,102 +223,4 @@ mod tests { fn redteam_e2e_real_agent() { run_redteam_test("claude-sonnet-4-20250514", 10, 5, None, false); } - - #[test] - fn optimize_e2e_sort_accepts_improvement() { - use crate::objective::objective_fn::MINIMIZE_NAIVE_SORT_TIME; - - let agent = MockAgent::from_responses(vec![Ok(AgentResponse { - text: "Replaced bubble sort with merge sort".into(), - diff: Some("--- a/sort.rs\n+++ b/sort.rs\n-bubble\n+merge".into()), - })]); - - // Predetermined measurements: baseline 0.01s, after optimization 0.0001s. - let mut mock = MockEnv { - measurements: vec![ - HashMap::from([(NAIVE_SORT_TIME, 0.01)]), - HashMap::from([(NAIVE_SORT_TIME, 0.0001)]), - ], - index: 0, - invariant_ok: true, - }; - - let config = OptimizeConfig { - num_iterations: 1, - ..Default::default() - }; - - let result = auto_optimize( - &agent, - &mut mock, - &MINIMIZE_NAIVE_SORT_TIME, - &config, - Path::new("/tmp"), - ); - - assert_eq!(result.attempts.len(), 1); - assert!(result.attempts[0].invariants_passed); - assert!(result.best_score < 0.01); - } - - #[test] - fn optimize_e2e_sort_rejects_broken() { - use crate::objective::objective_fn::MINIMIZE_NAIVE_SORT_TIME; - - let agent = MockAgent::from_responses(vec![Ok(AgentResponse { - text: "Removed sorting entirely".into(), - diff: Some("--- a/sort.rs\n+++ b/sort.rs\n-sort\n+noop".into()), - })]); - - let mut mock = MockEnv { - measurements: vec![ - HashMap::from([(NAIVE_SORT_TIME, 0.01)]), - HashMap::from([(NAIVE_SORT_TIME, 0.0001)]), - ], - index: 0, - invariant_ok: false, - }; - - let config = OptimizeConfig { - num_iterations: 1, - ..Default::default() - }; - - let result = auto_optimize( - &agent, - &mut mock, - &MINIMIZE_NAIVE_SORT_TIME, - &config, - Path::new("/tmp"), - ); - - assert!(!result.attempts[0].invariants_passed); - } - - /// Simple mock env for unit tests (no subprocess, no recompilation). - struct MockEnv { - measurements: Vec>, - index: usize, - invariant_ok: bool, - } - - impl OptimizeEnv for MockEnv { - fn measure(&mut self) -> HashMap { - let idx = self.index.min(self.measurements.len() - 1); - self.index += 1; - self.measurements[idx].clone() - } - fn check_invariants(&mut self) -> bool { - self.invariant_ok - } - fn apply_diff(&mut self, _: &str) {} - fn accept(&mut self, _: usize) {} - fn reject(&mut self) {} - } - - #[test] - #[ignore] // Requires Claude API access - fn optimize_e2e_real_agent() { - run_optimize_test("claude-sonnet-4-20250514", 10, 2, None, false); - } } diff --git a/jolt-eval/src/sort_targets.rs b/jolt-eval/src/sort_targets.rs index 153e7421a..ae735e5e8 100644 --- a/jolt-eval/src/sort_targets.rs +++ b/jolt-eval/src/sort_targets.rs @@ -1,5 +1,7 @@ //! Sorting functions used as targets for e2e optimization and red-team tests. +use crate::objective::Objective; + /// Naive bubble sort — the optimization target. /// Intentionally O(n²) so a "smarter" sort is measurably faster. pub fn naive_sort(data: &mut [i32]) { @@ -32,3 +34,33 @@ pub fn candidate_sort(data: &mut [i32]) { data[..last].sort(); } } + +const SORT_DATA_SIZE: usize = 5000; + +#[derive(Clone, Copy, Default)] +pub struct NaiveSortObjective; + +impl Objective for NaiveSortObjective { + type Setup = Vec; + + fn name(&self) -> &str { + "naive_sort_time" + } + + fn description(&self) -> &str { + "Wall-clock time of the naive_sort function in jolt-eval/src/sort_targets.rs" + } + + fn setup(&self) -> Vec { + (0..SORT_DATA_SIZE as i32).rev().collect() + } + + fn run(&self, mut setup: Vec) { + naive_sort(&mut setup); + std::hint::black_box(&setup); + } + + fn units(&self) -> Option<&str> { + Some("s") + } +} From 8fdf320bf53fbe036e174b4631516cf7bd9d713c Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Sat, 4 Apr 2026 14:34:54 -0400 Subject: [PATCH 76/86] refactor(jolt-eval): move NaiveSortObjective to performance/, remove --measure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move NaiveSortObjective from sort_targets.rs (which the optimizer agent can modify) to objective/performance/naive_sort.rs (which it cannot). This prevents the agent from cheating by modifying the objective itself. Also remove --measure flag from optimize binary — no longer needed now that the sort objective is a proper Criterion benchmark measured via RealEnv like all other perf objectives. Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/benches/naive_sort_time.rs | 2 +- jolt-eval/bin/optimize.rs | 15 --------- jolt-eval/src/objective/mod.rs | 6 ++-- jolt-eval/src/objective/performance/mod.rs | 1 + .../src/objective/performance/naive_sort.rs | 32 +++++++++++++++++++ jolt-eval/src/sort_targets.rs | 32 ------------------- 6 files changed, 37 insertions(+), 51 deletions(-) create mode 100644 jolt-eval/src/objective/performance/naive_sort.rs diff --git a/jolt-eval/benches/naive_sort_time.rs b/jolt-eval/benches/naive_sort_time.rs index e6387121e..0f03400ce 100644 --- a/jolt-eval/benches/naive_sort_time.rs +++ b/jolt-eval/benches/naive_sort_time.rs @@ -1,2 +1,2 @@ -use jolt_eval::sort_targets::NaiveSortObjective; +use jolt_eval::objective::performance::naive_sort::NaiveSortObjective; jolt_eval::bench_objective!(NaiveSortObjective); diff --git a/jolt-eval/bin/optimize.rs b/jolt-eval/bin/optimize.rs index bd42c537c..5dfa3832c 100644 --- a/jolt-eval/bin/optimize.rs +++ b/jolt-eval/bin/optimize.rs @@ -22,11 +22,6 @@ struct Cli { #[arg(long, conflicts_with = "objective")] test: bool, - /// Measure the objective's inputs and print JSON to stdout, then exit. - /// Requires --objective. Useful for subprocess-based measurement. - #[arg(long, requires = "objective")] - measure: bool, - /// List all available objective functions and exit. #[arg(long)] list: bool, @@ -209,16 +204,6 @@ fn main() -> eyre::Result<()> { let baseline = env.measure(); - if cli.measure { - let named: HashMap = baseline - .iter() - .map(|(k, &v)| (k.name().to_string(), v)) - .collect(); - let json = serde_json::to_string(&named).unwrap(); - println!("{json}"); - return Ok(()); - } - println!("=== Baseline ==="); print_measurements(&baseline); let baseline_score = (objective.evaluate)(&baseline, &baseline); diff --git a/jolt-eval/src/objective/mod.rs b/jolt-eval/src/objective/mod.rs index c6ba91da8..5032dcf23 100644 --- a/jolt-eval/src/objective/mod.rs +++ b/jolt-eval/src/objective/mod.rs @@ -140,7 +140,7 @@ impl Hash for StaticAnalysisObjective { pub enum PerformanceObjective { BindLowToHigh(performance::binding::BindLowToHighObjective), BindHighToLow(performance::binding::BindHighToLowObjective), - NaiveSortTime(crate::sort_targets::NaiveSortObjective), + NaiveSortTime(performance::naive_sort::NaiveSortObjective), } impl PerformanceObjective { @@ -148,7 +148,7 @@ impl PerformanceObjective { vec![ Self::BindLowToHigh(performance::binding::BindLowToHighObjective), Self::BindHighToLow(performance::binding::BindHighToLowObjective), - Self::NaiveSortTime(crate::sort_targets::NaiveSortObjective), + Self::NaiveSortTime(performance::naive_sort::NaiveSortObjective), ] } @@ -209,7 +209,7 @@ pub use code_quality::halstead_bugs::HALSTEAD_BUGS; pub use code_quality::lloc::LLOC; pub use performance::binding::{BIND_HIGH_TO_LOW, BIND_LOW_TO_HIGH}; pub const NAIVE_SORT_TIME: OptimizationObjective = OptimizationObjective::Performance( - PerformanceObjective::NaiveSortTime(crate::sort_targets::NaiveSortObjective), + PerformanceObjective::NaiveSortTime(performance::naive_sort::NaiveSortObjective), ); impl OptimizationObjective { diff --git a/jolt-eval/src/objective/performance/mod.rs b/jolt-eval/src/objective/performance/mod.rs index 25ba3925c..f57c79b80 100644 --- a/jolt-eval/src/objective/performance/mod.rs +++ b/jolt-eval/src/objective/performance/mod.rs @@ -1,4 +1,5 @@ pub mod binding; +pub mod naive_sort; pub mod prover_time; use std::path::Path; diff --git a/jolt-eval/src/objective/performance/naive_sort.rs b/jolt-eval/src/objective/performance/naive_sort.rs new file mode 100644 index 000000000..9afb503ff --- /dev/null +++ b/jolt-eval/src/objective/performance/naive_sort.rs @@ -0,0 +1,32 @@ +use crate::objective::Objective; +use crate::sort_targets::naive_sort; + +const SORT_DATA_SIZE: usize = 5000; + +#[derive(Clone, Copy, Default)] +pub struct NaiveSortObjective; + +impl Objective for NaiveSortObjective { + type Setup = Vec; + + fn name(&self) -> &str { + "naive_sort_time" + } + + fn description(&self) -> &str { + "Wall-clock time of the naive_sort function in jolt-eval/src/sort_targets.rs" + } + + fn setup(&self) -> Vec { + (0..SORT_DATA_SIZE as i32).rev().collect() + } + + fn run(&self, mut setup: Vec) { + naive_sort(&mut setup); + std::hint::black_box(&setup); + } + + fn units(&self) -> Option<&str> { + Some("s") + } +} diff --git a/jolt-eval/src/sort_targets.rs b/jolt-eval/src/sort_targets.rs index ae735e5e8..153e7421a 100644 --- a/jolt-eval/src/sort_targets.rs +++ b/jolt-eval/src/sort_targets.rs @@ -1,7 +1,5 @@ //! Sorting functions used as targets for e2e optimization and red-team tests. -use crate::objective::Objective; - /// Naive bubble sort — the optimization target. /// Intentionally O(n²) so a "smarter" sort is measurably faster. pub fn naive_sort(data: &mut [i32]) { @@ -34,33 +32,3 @@ pub fn candidate_sort(data: &mut [i32]) { data[..last].sort(); } } - -const SORT_DATA_SIZE: usize = 5000; - -#[derive(Clone, Copy, Default)] -pub struct NaiveSortObjective; - -impl Objective for NaiveSortObjective { - type Setup = Vec; - - fn name(&self) -> &str { - "naive_sort_time" - } - - fn description(&self) -> &str { - "Wall-clock time of the naive_sort function in jolt-eval/src/sort_targets.rs" - } - - fn setup(&self) -> Vec { - (0..SORT_DATA_SIZE as i32).rev().collect() - } - - fn run(&self, mut setup: Vec) { - naive_sort(&mut setup); - std::hint::black_box(&setup); - } - - fn units(&self) -> Option<&str> { - Some("s") - } -} From ab555b5b34fdcd919d8980f64868cd3dd71badfd Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Sat, 4 Apr 2026 14:52:01 -0400 Subject: [PATCH 77/86] fix(jolt-eval): correct limit names in soundness validation error message The error reported input/output/stack<=MAX_STACK_SIZE but input and output have their own limits (MAX_INPUT_SIZE, MAX_OUTPUT_SIZE). Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/src/invariant/soundness.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jolt-eval/src/invariant/soundness.rs b/jolt-eval/src/invariant/soundness.rs index 3a1ab58b9..ea0bae86e 100644 --- a/jolt-eval/src/invariant/soundness.rs +++ b/jolt-eval/src/invariant/soundness.rs @@ -50,7 +50,7 @@ impl GuestMemoryConfig { return Err(CheckError::InvalidInput(format!( "memory config exceeds limits: \ input={}, output={}, stack={}, heap={}; \ - limits: input/output/stack<={MAX_STACK_SIZE}, heap<={MAX_HEAP_SIZE}", + limits: input<={MAX_INPUT_SIZE}, output<={MAX_OUTPUT_SIZE}, stack<={MAX_STACK_SIZE}, heap<={MAX_HEAP_SIZE}", self.max_input_size, self.max_output_size, self.stack_size, self.heap_size, ))); } From 9ac441230205330c53749848133e95004b3b005b Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Sat, 4 Apr 2026 14:55:27 -0400 Subject: [PATCH 78/86] =?UTF-8?q?fix(jolt-eval):=20address=20PR=20review?= =?UTF-8?q?=20=E2=80=94=20docs,=20git=20clean,=20apply=5Fdiff=20stderr?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix ObjectiveFunction example: update evaluate closure signature to |m, _b| and add missing `normalized` to the use statement - Clarify that prover_time_* benchmarks are standalone Criterion targets not tracked by optimize/measure-objectives binaries - Update guest-sandbox/README to reflect actual behavior: hash guest, in-place patching with PatchGuard git-checkout revert - Add git clean -fd after git checkout . in both rejection branches of auto_optimize so agent-added untracked files don't contaminate subsequent iterations - Capture stderr in apply_diff and include it in the AgentError message Co-Authored-By: Claude Sonnet 4.6 --- jolt-eval/README.md | 6 ++++-- jolt-eval/src/agent/mod.rs | 10 ++++++---- jolt-eval/src/objective/optimize.rs | 8 ++++++++ 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/jolt-eval/README.md b/jolt-eval/README.md index 31f3b4069..062ebce7c 100644 --- a/jolt-eval/README.md +++ b/jolt-eval/README.md @@ -47,6 +47,8 @@ The motivation is twofold: | `prover_time_sha2_chain_100` | End-to-end prover time for 100 iterations of SHA-256 chain | | `prover_time_secp256k1_ecdsa_verify` | End-to-end prover time for secp256k1 ECDSA signature verification | +Note: `prover_time_*` benchmarks are standalone Criterion bench targets (run via `cargo bench -p jolt-eval --bench `). They are **not** included in `PerformanceObjective::all()` and are not tracked by the `optimize` or `measure-objectives` binaries. + ### Objective functions | Name | Inputs | Description | @@ -61,12 +63,12 @@ Custom composite objective functions can be defined as `ObjectiveFunction` struc ```rust use jolt_eval::objective::objective_fn::ObjectiveFunction; -use jolt_eval::objective::{LLOC, HALSTEAD_BUGS}; +use jolt_eval::objective::{normalized, LLOC, HALSTEAD_BUGS}; const WEIGHTED_QUALITY: ObjectiveFunction = ObjectiveFunction { name: "weighted_quality", inputs: &[LLOC, HALSTEAD_BUGS], - evaluate: |m| { + evaluate: |m, _b| { 2.0 * m.get(&LLOC).unwrap_or(&0.0) + m.get(&HALSTEAD_BUGS).unwrap_or(&0.0) }, }; diff --git a/jolt-eval/src/agent/mod.rs b/jolt-eval/src/agent/mod.rs index 4d3fc377b..13a935d4e 100644 --- a/jolt-eval/src/agent/mod.rs +++ b/jolt-eval/src/agent/mod.rs @@ -87,6 +87,7 @@ pub fn apply_diff(repo_dir: &Path, diff: &str) -> Result<(), AgentError> { .current_dir(repo_dir) .args(["apply", "--allow-empty"]) .stdin(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) .spawn() .map_err(|e| AgentError::new(format!("git apply spawn: {e}")))?; @@ -95,12 +96,13 @@ pub fn apply_diff(repo_dir: &Path, diff: &str) -> Result<(), AgentError> { let _ = stdin.write_all(diff.as_bytes()); } - let status = child - .wait() + let output = child + .wait_with_output() .map_err(|e| AgentError::new(format!("git apply wait: {e}")))?; - if !status.success() { - return Err(AgentError::new("git apply failed")); + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(AgentError::new(format!("git apply failed: {stderr}"))); } Ok(()) } diff --git a/jolt-eval/src/objective/optimize.rs b/jolt-eval/src/objective/optimize.rs index 13a8693f4..6f3fb55b9 100644 --- a/jolt-eval/src/objective/optimize.rs +++ b/jolt-eval/src/objective/optimize.rs @@ -170,6 +170,10 @@ pub fn auto_optimize( .current_dir(repo_dir) .args(["checkout", "."]) .status(); + let _ = Command::new("git") + .current_dir(repo_dir) + .args(["clean", "-fd"]) + .status(); } else { eprintln!( " ✗ iteration {iter} REJECTED (no improvement) — score {new_score:.10} ≥ best {best_score:.10}", @@ -179,6 +183,10 @@ pub fn auto_optimize( .current_dir(repo_dir) .args(["checkout", "."]) .status(); + let _ = Command::new("git") + .current_dir(repo_dir) + .args(["clean", "-fd"]) + .status(); } } From 625fe804f98fd850b209a344d1baddcccf72bd7b Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Sat, 4 Apr 2026 14:55:50 -0400 Subject: [PATCH 79/86] fix(jolt-eval): update guest-sandbox README to reflect actual behavior The harness applies patches in-place with a PatchGuard RAII guard (not copying to a temp dir), and the default guest is a wrapping hash (not an identity function). Co-Authored-By: Claude Sonnet 4.6 --- jolt-eval/guest-sandbox/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jolt-eval/guest-sandbox/README.md b/jolt-eval/guest-sandbox/README.md index 6e028fa3e..82f065f96 100644 --- a/jolt-eval/guest-sandbox/README.md +++ b/jolt-eval/guest-sandbox/README.md @@ -2,9 +2,9 @@ Template guest program for the soundness invariant's red-team harness. -During a red-team session (`cargo run --bin redteam -- --invariant soundness`), an AI agent produces a **unified diff** against this directory. The harness copies the template to a temp directory, applies the patch, compiles the patched guest with `jolt build`, then proves execution and checks that the verifier rejects any dishonest output/panic claim. +During a red-team session (`cargo run --bin redteam -- --invariant soundness`), an AI agent produces a **unified diff** against this directory. The harness applies the patch in-place via `git apply`, then compiles the patched guest with `jolt build`, proves execution, and checks that the verifier rejects any dishonest output/panic claim. A `PatchGuard` RAII guard reverts the changes (via `git checkout .`) on drop, even if the check panics. -The default guest is an identity function (`input → input`). The agent's goal is to patch it into a program that exposes a soundness bug in Jolt — i.e. one where the verifier accepts a proof paired with an incorrect output or panic flag. +The default guest computes a simple wrapping hash of the input bytes (`h = 0; for b in input { h = h * 31 + b }`). The agent's goal is to patch it into a program that exposes a soundness bug in Jolt — i.e. one where the verifier accepts a proof paired with an incorrect output or panic flag. ## Structure From f828a7d340f3ea4fcea6b9202f3100686327c2fd Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Sat, 4 Apr 2026 16:29:32 -0400 Subject: [PATCH 80/86] feat(jolt-eval): persist optimize attempts to gitignored history directory Each auto-optimize iteration now writes diff.patch, response.md, measurements.json, and status.json to jolt-eval/optimize-history/{objective}/{attempt-N}/. The baseline is persisted to attempt-0/baseline/. The prompt's previous-attempts section is slimmed down to one line per attempt pointing at the directory. Co-Authored-By: Claude Sonnet 4.6 --- .gitignore | 1 + jolt-eval/src/objective/optimize.rs | 144 +++++++++++++++++++++++----- 2 files changed, 122 insertions(+), 23 deletions(-) diff --git a/.gitignore b/.gitignore index 77ea8883c..ed8471a7f 100644 --- a/.gitignore +++ b/.gitignore @@ -58,3 +58,4 @@ benchmark-runs/ *.pb *benchmark_results.json .omc/ +optimize-history/ diff --git a/jolt-eval/src/objective/optimize.rs b/jolt-eval/src/objective/optimize.rs index 6f3fb55b9..d088f5998 100644 --- a/jolt-eval/src/objective/optimize.rs +++ b/jolt-eval/src/objective/optimize.rs @@ -39,6 +39,8 @@ pub struct OptimizationAttempt { pub measurements: HashMap, pub score: f64, pub invariants_passed: bool, + /// Relative path to the persisted attempt directory, if available. + pub path: Option, } /// Environment trait that decouples the optimization loop from side effects. @@ -59,6 +61,90 @@ pub trait OptimizeEnv { fn reject(&mut self); } +#[allow(clippy::too_many_arguments)] +fn write_attempt_files( + dir: &Path, + diff: &str, + response_text: &str, + measurements: &HashMap, + score: f64, + accepted: bool, + invariants_passed: bool, +) -> Option<()> { + std::fs::write(dir.join("diff.patch"), diff).ok()?; + std::fs::write(dir.join("response.md"), response_text).ok()?; + + let meas: HashMap = measurements + .iter() + .map(|(k, &v)| (k.name().to_string(), v)) + .collect(); + let meas_json = serde_json::to_string_pretty(&meas).ok()?; + std::fs::write(dir.join("measurements.json"), meas_json).ok()?; + + let status = serde_json::json!({ + "accepted": accepted, + "score": score, + "invariants_passed": invariants_passed, + }); + std::fs::write( + dir.join("status.json"), + serde_json::to_string_pretty(&status).ok()?, + ) + .ok()?; + + Some(()) +} + +#[allow(clippy::too_many_arguments)] +fn persist_attempt( + repo_dir: &Path, + objective_name: &str, + iteration: usize, + diff: &str, + response_text: &str, + measurements: &HashMap, + score: f64, + accepted: bool, + invariants_passed: bool, +) -> Option { + let dir = repo_dir + .join("jolt-eval/optimize-history") + .join(objective_name) + .join(format!("attempt-{iteration}")); + std::fs::create_dir_all(&dir).ok()?; + write_attempt_files( + &dir, + diff, + response_text, + measurements, + score, + accepted, + invariants_passed, + )?; + Some( + dir.strip_prefix(repo_dir) + .ok()? + .to_string_lossy() + .to_string(), + ) +} + +fn persist_baseline( + repo_dir: &Path, + objective_name: &str, + measurements: &HashMap, + score: f64, +) { + let dir = repo_dir + .join("jolt-eval/optimize-history") + .join(objective_name) + .join("baseline"); + if std::fs::create_dir_all(&dir).is_err() { + return; + } + let _ = write_attempt_files(&dir, "", "", measurements, score, true, true); +} + /// Run an AI-driven optimization loop. /// /// The agent tries to minimize `objective.evaluate(measurements)`. @@ -80,6 +166,7 @@ pub fn auto_optimize( let baseline = env.measure(); let baseline_score = (objective.evaluate)(&baseline, &baseline); + persist_baseline(repo_dir, objective.name, &baseline, baseline_score); let mut best_score = baseline_score; let mut best_measurements = baseline.clone(); let mut attempts = Vec::new(); @@ -119,6 +206,7 @@ pub fn auto_optimize( eprintln!("──────────────────────────"); } + let response_text = response.text.clone(); let diff_text = match &response.diff { Some(d) => { env.apply_diff(d); @@ -135,17 +223,30 @@ pub fn auto_optimize( let invariants_passed = env.check_invariants(); let improved = invariants_passed && new_score < best_score; + let iter = iteration + 1; + + let attempt_path = persist_attempt( + repo_dir, + objective.name, + iter, + &diff_text, + &response_text, + &new_measurements, + new_score, + improved, + invariants_passed, + ); let attempt = OptimizationAttempt { - description: format!("iteration {}", iteration + 1), + description: format!("iteration {iter}"), diff: truncate(&diff_text, 5000).to_string(), measurements: new_measurements.clone(), score: new_score, invariants_passed, + path: attempt_path, }; attempts.push(attempt); - let iter = iteration + 1; if improved { eprintln!(" ✓ iteration {iter} ACCEPTED — score {best_score:.10} → {new_score:.10}",); best_score = new_score; @@ -282,34 +383,31 @@ fn build_optimize_prompt( if !past_attempts.is_empty() { prompt.push_str("## Previous attempts\n\n"); for attempt in past_attempts { - let status = if attempt.invariants_passed { - "invariants passed" - } else { - "INVARIANTS FAILED" + let status_label = match ( + attempt.invariants_passed, + attempt.score < current_best_score, + ) { + (true, true) => "ACCEPTED", + (false, _) => "REJECTED (invariants failed)", + _ => "REJECTED (no improvement)", }; - prompt.push_str(&format!( - "- **{}** ({}, score={:.6}): ", - attempt.description, status, attempt.score - )); - let mut keys: Vec<_> = attempt.measurements.iter().collect(); - keys.sort_by_key(|(k, _)| k.name()); - for (key, val) in keys { - prompt.push_str(&format!("{}={val:.6} ", key.name())); - } - prompt.push('\n'); - if !attempt.diff.is_empty() { + if let Some(ref path) = attempt.path { prompt.push_str(&format!( - " Diff preview: `{}`\n", - truncate(&attempt.diff, 500) + "- **{}** — {status_label}, score={:.6}. Details: {path}/\n", + attempt.description, attempt.score, + )); + } else { + prompt.push_str(&format!( + "- **{}** — {status_label}, score={:.6}\n", + attempt.description, attempt.score, )); } } prompt.push('\n'); - prompt.push_str( - "If previous attempts failed or showed no improvement, try a fundamentally \ - different approach. Analyze WHY the previous approach did not reduce the score \ - and pivot to a new strategy.\n\n", + "Read the attempt directories for full diffs, measurements, and agent responses.\n\ + If previous attempts failed or showed no improvement, try a fundamentally \ + different approach.\n\n", ); } From ff5a6a716beff0646f14694ce7b4f3b40e990277 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Sat, 4 Apr 2026 16:35:24 -0400 Subject: [PATCH 81/86] feat(jolt-eval): persist redteam attempts to gitignored history directory Mirror the optimize-history pattern for the redteam loop: - Write approach.md and failure_reason.txt to jolt-eval/redteam-history/{invariant}/attempt-{N}/ - Add path: Option to FailedAttempt - Slim the prompt to show description + failure reason inline, with a pointer to the attempt directory for the full approach - Add redteam-history/ to .gitignore Co-Authored-By: Claude Opus 4.6 (1M context) --- .gitignore | 1 + jolt-eval/src/invariant/mod.rs | 2 + jolt-eval/src/invariant/synthesis/redteam.rs | 102 ++++++++++++++----- 3 files changed, 82 insertions(+), 23 deletions(-) diff --git a/.gitignore b/.gitignore index ed8471a7f..79261ef2b 100644 --- a/.gitignore +++ b/.gitignore @@ -59,3 +59,4 @@ benchmark-runs/ *benchmark_results.json .omc/ optimize-history/ +redteam-history/ diff --git a/jolt-eval/src/invariant/mod.rs b/jolt-eval/src/invariant/mod.rs index 3d8674f4b..2117c5490 100644 --- a/jolt-eval/src/invariant/mod.rs +++ b/jolt-eval/src/invariant/mod.rs @@ -218,6 +218,8 @@ pub struct FailedAttempt { pub description: String, pub approach: String, pub failure_reason: String, + /// Path to the persisted attempt directory (relative to repo root). + pub path: Option, } /// Try to extract a JSON object from free-form text. Looks for a diff --git a/jolt-eval/src/invariant/synthesis/redteam.rs b/jolt-eval/src/invariant/synthesis/redteam.rs index 2699e2ed6..51fcc841f 100644 --- a/jolt-eval/src/invariant/synthesis/redteam.rs +++ b/jolt-eval/src/invariant/synthesis/redteam.rs @@ -1,7 +1,7 @@ use std::path::Path; use super::super::{CheckError, FailedAttempt, Invariant}; -use crate::agent::{truncate, AgentHarness, DiffScope}; +use crate::agent::{AgentHarness, DiffScope}; /// Result of a red-team session. pub enum RedTeamResult { @@ -51,9 +51,9 @@ pub fn auto_redteam( let mut failed_attempts = Vec::new(); for iteration in 0..config.num_iterations { + let iter = iteration + 1; tracing::info!( - "Red team iteration {}/{} for '{}'", - iteration + 1, + "Red team iteration {iter}/{} for '{}'", config.num_iterations, invariant.name() ); @@ -64,12 +64,12 @@ pub fn auto_redteam( &input_schema, config.hint.as_deref(), &failed_attempts, - iteration + 1, + iter, config.num_iterations, ); if config.verbose { - eprintln!("── Iteration {} prompt ──", iteration + 1); + eprintln!("── Iteration {iter} prompt ──"); eprintln!("{prompt}"); eprintln!("────────────────────────"); } @@ -80,17 +80,25 @@ pub fn auto_redteam( Ok(r) => r, Err(e) => { tracing::warn!("Agent invocation failed: {e}"); + let path = persist_redteam_attempt( + repo_dir, + invariant.name(), + iter, + "Agent invocation failed", + &e.to_string(), + ); failed_attempts.push(FailedAttempt { - description: format!("Iteration {}", iteration + 1), + description: format!("Iteration {iter}"), approach: "Agent invocation failed".to_string(), failure_reason: e.to_string(), + path, }); continue; } }; if config.verbose { - eprintln!("── Iteration {} response ──", iteration + 1); + eprintln!("── Iteration {iter} response ──"); eprintln!("{}", response.text); if let Some(ref d) = response.diff { eprintln!("── diff ({} bytes) ──", d.len()); @@ -107,10 +115,19 @@ pub fn auto_redteam( None => (response.text.clone(), json), }, None => { + let failure = "Agent response did not contain valid JSON".to_string(); + let path = persist_redteam_attempt( + repo_dir, + invariant.name(), + iter, + &response.text, + &failure, + ); failed_attempts.push(FailedAttempt { - description: format!("Iteration {}", iteration + 1), + description: format!("Iteration {iter}"), approach: response.text, - failure_reason: "Agent response did not contain valid JSON".to_string(), + failure_reason: failure, + path, }); continue; } @@ -121,12 +138,14 @@ pub fn auto_redteam( Ok(v) => v, Err(e) => { tracing::info!("Agent produced unparsable input: {e}"); + let failure = format!("Could not deserialize response JSON into Input type: {e}"); + let path = + persist_redteam_attempt(repo_dir, invariant.name(), iter, &analysis, &failure); failed_attempts.push(FailedAttempt { - description: format!("Iteration {}", iteration + 1), + description: format!("Iteration {iter}"), approach: analysis, - failure_reason: format!( - "Could not deserialize response JSON into Input type: {e}" - ), + failure_reason: failure, + path, }); continue; } @@ -138,12 +157,15 @@ pub fn auto_redteam( match invariant.check(&setup, input) { Ok(()) => { + let failure = + format!("Candidate input did not violate the invariant: {counterexample_json}"); + let path = + persist_redteam_attempt(repo_dir, invariant.name(), iter, &analysis, &failure); failed_attempts.push(FailedAttempt { - description: format!("Iteration {}", iteration + 1), + description: format!("Iteration {iter}"), approach: analysis, - failure_reason: format!( - "Candidate input did not violate the invariant: {counterexample_json}" - ), + failure_reason: failure, + path, }); } Err(CheckError::Violation(violation)) => { @@ -155,10 +177,14 @@ pub fn auto_redteam( }; } Err(CheckError::InvalidInput(reason)) => { + let failure = format!("Invalid input: {reason}"); + let path = + persist_redteam_attempt(repo_dir, invariant.name(), iter, &analysis, &failure); failed_attempts.push(FailedAttempt { - description: format!("Iteration {}", iteration + 1), + description: format!("Iteration {iter}"), approach: analysis, - failure_reason: format!("Invalid input: {reason}"), + failure_reason: failure, + path, }); } } @@ -169,6 +195,29 @@ pub fn auto_redteam( } } +/// Persist a red-team attempt's approach to disk and return the relative path. +fn persist_redteam_attempt( + repo_dir: &Path, + invariant_name: &str, + iteration: usize, + approach: &str, + failure_reason: &str, +) -> Option { + let dir = repo_dir + .join("jolt-eval/redteam-history") + .join(invariant_name) + .join(format!("attempt-{iteration}")); + std::fs::create_dir_all(&dir).ok()?; + std::fs::write(dir.join("approach.md"), approach).ok()?; + std::fs::write(dir.join("failure_reason.txt"), failure_reason).ok()?; + Some( + dir.strip_prefix(repo_dir) + .ok()? + .to_string_lossy() + .to_string(), + ) +} + fn build_envelope_schema(input_schema: &serde_json::Value) -> serde_json::Value { serde_json::json!({ "type": "object", @@ -253,13 +302,20 @@ fn build_redteam_prompt( valid counterexample.\n\n", ); for attempt in failed_attempts { - let approach_preview = truncate(&attempt.approach, 200); + let path_ref = attempt + .path + .as_deref() + .map(|p| format!(" Details: {p}/")) + .unwrap_or_default(); prompt.push_str(&format!( - "- **{}**: {}\n Failure: {}\n", - attempt.description, approach_preview, attempt.failure_reason + "- **{}** — {}{path_ref}\n", + attempt.description, attempt.failure_reason, )); } - prompt.push('\n'); + prompt.push_str( + "\nRead the attempt directories for the full agent approach. \ + Try a fundamentally different strategy.\n\n", + ); } prompt.push_str( From 3ae95319c9667240bc85af87f86bec64a58cc5b6 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Sat, 4 Apr 2026 16:39:21 -0400 Subject: [PATCH 82/86] Clean up unnecessary fields in OptimizationAttempt --- jolt-eval/src/objective/optimize.rs | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/jolt-eval/src/objective/optimize.rs b/jolt-eval/src/objective/optimize.rs index d088f5998..38a257faf 100644 --- a/jolt-eval/src/objective/optimize.rs +++ b/jolt-eval/src/objective/optimize.rs @@ -34,9 +34,7 @@ pub struct OptimizeResult { /// Record of a single optimization attempt. pub struct OptimizationAttempt { - pub description: String, - pub diff: String, - pub measurements: HashMap, + pub iteration: usize, pub score: f64, pub invariants_passed: bool, /// Relative path to the persisted attempt directory, if available. @@ -238,9 +236,7 @@ pub fn auto_optimize( ); let attempt = OptimizationAttempt { - description: format!("iteration {iter}"), - diff: truncate(&diff_text, 5000).to_string(), - measurements: new_measurements.clone(), + iteration: iter, score: new_score, invariants_passed, path: attempt_path, @@ -393,13 +389,13 @@ fn build_optimize_prompt( }; if let Some(ref path) = attempt.path { prompt.push_str(&format!( - "- **{}** — {status_label}, score={:.6}. Details: {path}/\n", - attempt.description, attempt.score, + "- **Iteration {}** — {status_label}, score={:.6}. Details: {path}/\n", + attempt.iteration, attempt.score, )); } else { prompt.push_str(&format!( - "- **{}** — {status_label}, score={:.6}\n", - attempt.description, attempt.score, + "- **Iteration {}** — {status_label}, score={:.6}\n", + attempt.iteration, attempt.score, )); } } From be7b3404f4d416d4d7f763f454426ed93c0e208c Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Sat, 4 Apr 2026 16:59:06 -0400 Subject: [PATCH 83/86] refactor(jolt-eval): replace root: PathBuf with target_dir: &'static str Code quality objectives now store target_dir (e.g. "jolt-core/src") instead of a leaked root PathBuf. The repo root is computed at runtime via env!("CARGO_MANIFEST_DIR").parent(). This eliminates all Box::leak calls from the code quality objectives and removes the root parameter from StaticAnalysisObjective::all() and OptimizationObjective::all(). Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/bin/measure_objectives.rs | 4 +- jolt-eval/bin/optimize.rs | 2 +- jolt-eval/src/agent/tests.rs | 2 +- .../src/objective/code_quality/cognitive.rs | 24 ++++----- .../objective/code_quality/halstead_bugs.rs | 24 ++++----- jolt-eval/src/objective/code_quality/lloc.rs | 22 +++----- jolt-eval/src/objective/mod.rs | 50 ++++++++----------- 7 files changed, 51 insertions(+), 77 deletions(-) diff --git a/jolt-eval/bin/measure_objectives.rs b/jolt-eval/bin/measure_objectives.rs index c2c926166..10569abfd 100644 --- a/jolt-eval/bin/measure_objectives.rs +++ b/jolt-eval/bin/measure_objectives.rs @@ -29,8 +29,6 @@ fn main() -> eyre::Result<()> { tracing_subscriber::fmt::init(); let cli = Cli::parse(); - let repo_root = std::env::current_dir()?; - // Performance objectives (from Criterion) if !cli.no_bench { let perf = PerformanceObjective::all(); @@ -88,7 +86,7 @@ fn main() -> eyre::Result<()> { } // Static-analysis objectives - for sa in StaticAnalysisObjective::all(&repo_root) { + for sa in StaticAnalysisObjective::all() { if let Some(ref name) = cli.objective { if sa.name() != name.as_str() { continue; diff --git a/jolt-eval/bin/optimize.rs b/jolt-eval/bin/optimize.rs index 5dfa3832c..7be59914a 100644 --- a/jolt-eval/bin/optimize.rs +++ b/jolt-eval/bin/optimize.rs @@ -57,7 +57,7 @@ impl OptimizeEnv for RealEnv { fn measure(&mut self) -> HashMap { let mut results = HashMap::new(); - for sa in StaticAnalysisObjective::all(&self.repo_dir) { + for sa in StaticAnalysisObjective::all() { if let Ok(v) = sa.collect_measurement() { results.insert(OptimizationObjective::StaticAnalysis(sa), v); } diff --git a/jolt-eval/src/agent/tests.rs b/jolt-eval/src/agent/tests.rs index 32196e88c..5ebb6f79a 100644 --- a/jolt-eval/src/agent/tests.rs +++ b/jolt-eval/src/agent/tests.rs @@ -959,7 +959,7 @@ fn optimize_prompt_includes_past_attempts() { assert_eq!(prompts.len(), 2); assert!(!prompts[0].contains("Previous attempts")); assert!(prompts[1].contains("Previous attempts")); - assert!(prompts[1].contains("iteration 1")); + assert!(prompts[1].contains("Iteration 1")); } #[test] diff --git a/jolt-eval/src/objective/code_quality/cognitive.rs b/jolt-eval/src/objective/code_quality/cognitive.rs index 7b91ee1f2..4924f114b 100644 --- a/jolt-eval/src/objective/code_quality/cognitive.rs +++ b/jolt-eval/src/objective/code_quality/cognitive.rs @@ -8,22 +8,16 @@ use crate::objective::{ }; pub const COGNITIVE_COMPLEXITY: OptimizationObjective = OptimizationObjective::StaticAnalysis( - StaticAnalysisObjective::CognitiveComplexity(CognitiveComplexityObjective { root: "" }), + StaticAnalysisObjective::CognitiveComplexity(CognitiveComplexityObjective { + target_dir: "jolt-core/src", + }), ); /// Average cognitive complexity per function across all Rust files under -/// `jolt-core/src/`. +/// a target directory. #[derive(Clone, Copy)] pub struct CognitiveComplexityObjective { - pub(crate) root: &'static str, -} - -impl CognitiveComplexityObjective { - pub fn new(root: &Path) -> Self { - Self { - root: Box::leak(root.to_string_lossy().into_owned().into_boxed_str()), - } - } + pub(crate) target_dir: &'static str, } impl Objective for CognitiveComplexityObjective { @@ -40,7 +34,8 @@ impl Objective for CognitiveComplexityObjective { fn setup(&self) {} fn collect_measurement(&self) -> Result { - let src_dir = std::path::PathBuf::from(self.root).join("jolt-core/src"); + let repo_root = Path::new(env!("CARGO_MANIFEST_DIR")).parent().unwrap(); + let src_dir = repo_root.join(self.target_dir); let mut total = 0.0; let mut count = 0usize; for path in rust_files(&src_dir)? { @@ -75,8 +70,9 @@ mod tests { #[test] fn cognitive_on_jolt_core() { - let root = Path::new(env!("CARGO_MANIFEST_DIR")).parent().unwrap(); - let obj = CognitiveComplexityObjective::new(root); + let obj = CognitiveComplexityObjective { + target_dir: "jolt-core/src", + }; let val = obj.collect_measurement().unwrap(); assert!(val > 0.0, "avg cognitive should be > 0, got {val}"); assert!(val < 100.0, "avg cognitive should be < 100, got {val}"); diff --git a/jolt-eval/src/objective/code_quality/halstead_bugs.rs b/jolt-eval/src/objective/code_quality/halstead_bugs.rs index 408efb84b..a4da785df 100644 --- a/jolt-eval/src/objective/code_quality/halstead_bugs.rs +++ b/jolt-eval/src/objective/code_quality/halstead_bugs.rs @@ -8,23 +8,17 @@ use crate::objective::{ }; pub const HALSTEAD_BUGS: OptimizationObjective = OptimizationObjective::StaticAnalysis( - StaticAnalysisObjective::HalsteadBugs(HalsteadBugsObjective { root: "" }), + StaticAnalysisObjective::HalsteadBugs(HalsteadBugsObjective { + target_dir: "jolt-core/src", + }), ); /// Estimated number of delivered bugs across all Rust files under -/// `jolt-core/src/`, based on Halstead's bug prediction formula +/// a target directory, based on Halstead's bug prediction formula /// (B = V / 3000, where V is program volume). #[derive(Clone, Copy)] pub struct HalsteadBugsObjective { - pub(crate) root: &'static str, -} - -impl HalsteadBugsObjective { - pub fn new(root: &Path) -> Self { - Self { - root: Box::leak(root.to_string_lossy().into_owned().into_boxed_str()), - } - } + pub(crate) target_dir: &'static str, } impl Objective for HalsteadBugsObjective { @@ -41,7 +35,8 @@ impl Objective for HalsteadBugsObjective { fn setup(&self) {} fn collect_measurement(&self) -> Result { - let src_dir = std::path::PathBuf::from(self.root).join("jolt-core/src"); + let repo_root = Path::new(env!("CARGO_MANIFEST_DIR")).parent().unwrap(); + let src_dir = repo_root.join(self.target_dir); let mut total = 0.0; for path in rust_files(&src_dir)? { if let Some(space) = analyze_rust_file(&path) { @@ -67,8 +62,9 @@ mod tests { #[test] fn halstead_bugs_on_jolt_core() { - let root = Path::new(env!("CARGO_MANIFEST_DIR")).parent().unwrap(); - let obj = HalsteadBugsObjective::new(root); + let obj = HalsteadBugsObjective { + target_dir: "jolt-core/src", + }; let val = obj.collect_measurement().unwrap(); assert!(val > 0.0, "halstead bugs should be > 0, got {val}"); } diff --git a/jolt-eval/src/objective/code_quality/lloc.rs b/jolt-eval/src/objective/code_quality/lloc.rs index 5545c2bd2..e84bc0f81 100644 --- a/jolt-eval/src/objective/code_quality/lloc.rs +++ b/jolt-eval/src/objective/code_quality/lloc.rs @@ -8,22 +8,14 @@ use crate::objective::{ pub const LLOC: OptimizationObjective = OptimizationObjective::StaticAnalysis(StaticAnalysisObjective::Lloc(LlocObjective { - root: "", + target_dir: "jolt-core/src", })); /// Total logical lines of code (LLOC) across all Rust files under -/// `jolt-core/src/`. +/// a target directory. #[derive(Clone, Copy)] pub struct LlocObjective { - pub(crate) root: &'static str, -} - -impl LlocObjective { - pub fn new(root: &Path) -> Self { - Self { - root: Box::leak(root.to_string_lossy().into_owned().into_boxed_str()), - } - } + pub(crate) target_dir: &'static str, } impl Objective for LlocObjective { @@ -40,7 +32,8 @@ impl Objective for LlocObjective { fn setup(&self) {} fn collect_measurement(&self) -> Result { - let src_dir = PathBuf::from(self.root).join("jolt-core/src"); + let repo_root = Path::new(env!("CARGO_MANIFEST_DIR")).parent().unwrap(); + let src_dir = repo_root.join(self.target_dir); let mut total = 0.0; for path in rust_files(&src_dir)? { if let Some(space) = analyze_rust_file(&path) { @@ -89,8 +82,9 @@ mod tests { #[test] fn lloc_on_jolt_core() { - let root = Path::new(env!("CARGO_MANIFEST_DIR")).parent().unwrap(); - let obj = LlocObjective::new(root); + let obj = LlocObjective { + target_dir: "jolt-core/src", + }; let val = obj.collect_measurement().unwrap(); assert!(val > 1000.0, "LLOC should be > 1000, got {val}"); } diff --git a/jolt-eval/src/objective/mod.rs b/jolt-eval/src/objective/mod.rs index 5032dcf23..d20dcf85c 100644 --- a/jolt-eval/src/objective/mod.rs +++ b/jolt-eval/src/objective/mod.rs @@ -6,7 +6,6 @@ pub mod synthesis; use std::fmt; use std::hash::{Hash, Hasher}; -use std::path::Path; /// Error during objective measurement. #[derive(Debug, Clone)] @@ -74,15 +73,17 @@ pub enum StaticAnalysisObjective { } impl StaticAnalysisObjective { - pub fn all(root: &Path) -> Vec { + pub fn all() -> Vec { vec![ - Self::Lloc(code_quality::lloc::LlocObjective::new(root)), - Self::CognitiveComplexity(code_quality::cognitive::CognitiveComplexityObjective::new( - root, - )), - Self::HalsteadBugs(code_quality::halstead_bugs::HalsteadBugsObjective::new( - root, - )), + Self::Lloc(code_quality::lloc::LlocObjective { + target_dir: "jolt-core/src", + }), + Self::CognitiveComplexity(code_quality::cognitive::CognitiveComplexityObjective { + target_dir: "jolt-core/src", + }), + Self::HalsteadBugs(code_quality::halstead_bugs::HalsteadBugsObjective { + target_dir: "jolt-core/src", + }), ] } @@ -213,9 +214,9 @@ pub const NAIVE_SORT_TIME: OptimizationObjective = OptimizationObjective::Perfor ); impl OptimizationObjective { - pub fn all(root: &Path) -> Vec { + pub fn all() -> Vec { let mut all = Vec::new(); - for s in StaticAnalysisObjective::all(root) { + for s in StaticAnalysisObjective::all() { all.push(Self::StaticAnalysis(s)); } for p in PerformanceObjective::all() { @@ -331,10 +332,7 @@ mod tests { #[test] fn static_analysis_all_measures() { - let root = std::path::Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .unwrap(); - for sa in StaticAnalysisObjective::all(root) { + for sa in StaticAnalysisObjective::all() { let val = sa.collect_measurement().unwrap(); assert!(val > 0.0, "{} should be > 0, got {val}", sa.name()); } @@ -343,33 +341,25 @@ mod tests { #[test] fn optimization_objective_hashmap_key() { use std::collections::HashMap; - let root = std::path::Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .unwrap(); - let lloc = OptimizationObjective::StaticAnalysis(StaticAnalysisObjective::Lloc( - code_quality::lloc::LlocObjective::new(root), - )); - let bind = OptimizationObjective::Performance(PerformanceObjective::BindLowToHigh( - performance::binding::BindLowToHighObjective, - )); + let lloc = LLOC; + let bind = BIND_LOW_TO_HIGH; let mut m = HashMap::new(); m.insert(lloc, 100.0); m.insert(bind, 0.5); // Look up with a freshly constructed key — works because Hash/Eq - // is discriminant-based. + // is discriminant-based, inner data doesn't matter. let lloc2 = OptimizationObjective::StaticAnalysis(StaticAnalysisObjective::Lloc( - code_quality::lloc::LlocObjective::new(Path::new("/other")), + code_quality::lloc::LlocObjective { + target_dir: "other/path", + }, )); assert_eq!(m[&lloc2], 100.0); } #[test] fn optimization_objective_all() { - let root = std::path::Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .unwrap(); - let all = OptimizationObjective::all(root); + let all = OptimizationObjective::all(); assert_eq!(all.len(), 6); // 3 static + 3 perf assert!(all.iter().any(|o| o.is_perf())); assert!(all.iter().any(|o| !o.is_perf())); From bef916d803e646f83e812c4cd8086e74563486b9 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Sat, 4 Apr 2026 17:08:49 -0400 Subject: [PATCH 84/86] refactor(jolt-eval): Objective::description returns String Allows computed descriptions that include target_dir, e.g.: "Total logical lines of code in jolt-core/src/" Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/src/objective/code_quality/cognitive.rs | 7 +++++-- jolt-eval/src/objective/code_quality/halstead_bugs.rs | 7 +++++-- jolt-eval/src/objective/code_quality/lloc.rs | 4 ++-- jolt-eval/src/objective/mod.rs | 10 +++++----- jolt-eval/src/objective/performance/binding.rs | 8 ++++---- jolt-eval/src/objective/performance/naive_sort.rs | 4 ++-- 6 files changed, 23 insertions(+), 17 deletions(-) diff --git a/jolt-eval/src/objective/code_quality/cognitive.rs b/jolt-eval/src/objective/code_quality/cognitive.rs index 4924f114b..582123960 100644 --- a/jolt-eval/src/objective/code_quality/cognitive.rs +++ b/jolt-eval/src/objective/code_quality/cognitive.rs @@ -27,8 +27,11 @@ impl Objective for CognitiveComplexityObjective { "cognitive_complexity_avg" } - fn description(&self) -> &str { - "Average cognitive complexity per function in jolt-core/src/" + fn description(&self) -> String { + format!( + "Average cognitive complexity per function in {}", + self.target_dir + ) } fn setup(&self) {} diff --git a/jolt-eval/src/objective/code_quality/halstead_bugs.rs b/jolt-eval/src/objective/code_quality/halstead_bugs.rs index a4da785df..8d38cc609 100644 --- a/jolt-eval/src/objective/code_quality/halstead_bugs.rs +++ b/jolt-eval/src/objective/code_quality/halstead_bugs.rs @@ -28,8 +28,11 @@ impl Objective for HalsteadBugsObjective { "halstead_bugs" } - fn description(&self) -> &str { - "Estimated delivered bugs (Halstead volume / 3000) in jolt-core/src/" + fn description(&self) -> String { + format!( + "Estimated delivered bugs (Halstead volume / 3000) in {}", + self.target_dir + ) } fn setup(&self) {} diff --git a/jolt-eval/src/objective/code_quality/lloc.rs b/jolt-eval/src/objective/code_quality/lloc.rs index e84bc0f81..e291f68ee 100644 --- a/jolt-eval/src/objective/code_quality/lloc.rs +++ b/jolt-eval/src/objective/code_quality/lloc.rs @@ -25,8 +25,8 @@ impl Objective for LlocObjective { "lloc" } - fn description(&self) -> &str { - "Total logical lines of code in jolt-core/src/" + fn description(&self) -> String { + format!("Total logical lines of code in {}", self.target_dir) } fn setup(&self) {} diff --git a/jolt-eval/src/objective/mod.rs b/jolt-eval/src/objective/mod.rs index d20dcf85c..bced975b1 100644 --- a/jolt-eval/src/objective/mod.rs +++ b/jolt-eval/src/objective/mod.rs @@ -42,8 +42,8 @@ pub trait Objective: Send + Sync { fn name(&self) -> &str; - fn description(&self) -> &str { - self.name() + fn description(&self) -> String { + self.name().to_string() } fn units(&self) -> Option<&str> { @@ -95,7 +95,7 @@ impl StaticAnalysisObjective { } } - pub fn description(&self) -> &str { + pub fn description(&self) -> String { match self { Self::Lloc(o) => o.description(), Self::CognitiveComplexity(o) => o.description(), @@ -169,7 +169,7 @@ impl PerformanceObjective { } } - pub fn description(&self) -> &str { + pub fn description(&self) -> String { match self { Self::BindLowToHigh(o) => o.description(), Self::BindHighToLow(o) => o.description(), @@ -239,7 +239,7 @@ impl OptimizationObjective { } } - pub fn description(&self) -> &str { + pub fn description(&self) -> String { match self { Self::StaticAnalysis(s) => s.description(), Self::Performance(p) => p.description(), diff --git a/jolt-eval/src/objective/performance/binding.rs b/jolt-eval/src/objective/performance/binding.rs index bcfcdaaba..8635fab49 100644 --- a/jolt-eval/src/objective/performance/binding.rs +++ b/jolt-eval/src/objective/performance/binding.rs @@ -56,8 +56,8 @@ impl Objective for BindLowToHighObjective { Self::NAME } - fn description(&self) -> &str { - "Wall-clock time of DensePolynomial::bind_parallel with LowToHigh binding (2^20 evaluations)" + fn description(&self) -> String { + "Wall-clock time of DensePolynomial::bind_parallel with LowToHigh binding (2^20 evaluations)".to_string() } fn setup(&self) -> BindSetup { @@ -94,8 +94,8 @@ impl Objective for BindHighToLowObjective { Self::NAME } - fn description(&self) -> &str { - "Wall-clock time of DensePolynomial::bind_parallel with HighToLow binding (2^20 evaluations)" + fn description(&self) -> String { + "Wall-clock time of DensePolynomial::bind_parallel with HighToLow binding (2^20 evaluations)".to_string() } fn setup(&self) -> BindSetup { diff --git a/jolt-eval/src/objective/performance/naive_sort.rs b/jolt-eval/src/objective/performance/naive_sort.rs index 9afb503ff..9bf318028 100644 --- a/jolt-eval/src/objective/performance/naive_sort.rs +++ b/jolt-eval/src/objective/performance/naive_sort.rs @@ -13,8 +13,8 @@ impl Objective for NaiveSortObjective { "naive_sort_time" } - fn description(&self) -> &str { - "Wall-clock time of the naive_sort function in jolt-eval/src/sort_targets.rs" + fn description(&self) -> String { + "Wall-clock time of the naive_sort function in jolt-eval/src/sort_targets.rs".to_string() } fn setup(&self) -> Vec { From ef9887cb8fc10d128563ca764205cd9e53da586c Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Sat, 4 Apr 2026 17:16:02 -0400 Subject: [PATCH 85/86] Manual cleanup --- .../src/objective/code_quality/cognitive.rs | 2 +- .../objective/code_quality/halstead_bugs.rs | 2 +- jolt-eval/src/objective/code_quality/lloc.rs | 2 +- jolt-eval/src/objective/mod.rs | 28 ++----------------- .../src/objective/performance/binding.rs | 16 +++-------- .../src/objective/performance/naive_sort.rs | 2 +- .../src/objective/performance/prover_time.rs | 9 +++--- 7 files changed, 14 insertions(+), 47 deletions(-) diff --git a/jolt-eval/src/objective/code_quality/cognitive.rs b/jolt-eval/src/objective/code_quality/cognitive.rs index 582123960..1d7ae44a1 100644 --- a/jolt-eval/src/objective/code_quality/cognitive.rs +++ b/jolt-eval/src/objective/code_quality/cognitive.rs @@ -15,7 +15,7 @@ pub const COGNITIVE_COMPLEXITY: OptimizationObjective = OptimizationObjective::S /// Average cognitive complexity per function across all Rust files under /// a target directory. -#[derive(Clone, Copy)] +#[derive(Clone, Copy, PartialEq, Hash)] pub struct CognitiveComplexityObjective { pub(crate) target_dir: &'static str, } diff --git a/jolt-eval/src/objective/code_quality/halstead_bugs.rs b/jolt-eval/src/objective/code_quality/halstead_bugs.rs index 8d38cc609..e55930d1a 100644 --- a/jolt-eval/src/objective/code_quality/halstead_bugs.rs +++ b/jolt-eval/src/objective/code_quality/halstead_bugs.rs @@ -16,7 +16,7 @@ pub const HALSTEAD_BUGS: OptimizationObjective = OptimizationObjective::StaticAn /// Estimated number of delivered bugs across all Rust files under /// a target directory, based on Halstead's bug prediction formula /// (B = V / 3000, where V is program volume). -#[derive(Clone, Copy)] +#[derive(Clone, Copy, PartialEq, Hash)] pub struct HalsteadBugsObjective { pub(crate) target_dir: &'static str, } diff --git a/jolt-eval/src/objective/code_quality/lloc.rs b/jolt-eval/src/objective/code_quality/lloc.rs index e291f68ee..a4f86bafb 100644 --- a/jolt-eval/src/objective/code_quality/lloc.rs +++ b/jolt-eval/src/objective/code_quality/lloc.rs @@ -13,7 +13,7 @@ pub const LLOC: OptimizationObjective = /// Total logical lines of code (LLOC) across all Rust files under /// a target directory. -#[derive(Clone, Copy)] +#[derive(Clone, Copy, PartialEq, Hash)] pub struct LlocObjective { pub(crate) target_dir: &'static str, } diff --git a/jolt-eval/src/objective/mod.rs b/jolt-eval/src/objective/mod.rs index bced975b1..22cd9fc8e 100644 --- a/jolt-eval/src/objective/mod.rs +++ b/jolt-eval/src/objective/mod.rs @@ -65,7 +65,7 @@ pub trait Objective: Send + Sync { // Data-containing enums — Hash/Eq based on discriminant only /// Static-analysis objectives. -#[derive(Clone, Copy)] +#[derive(Clone, Copy, PartialEq, Hash)] pub enum StaticAnalysisObjective { Lloc(code_quality::lloc::LlocObjective), CognitiveComplexity(code_quality::cognitive::CognitiveComplexityObjective), @@ -124,20 +124,8 @@ impl StaticAnalysisObjective { } } -impl PartialEq for StaticAnalysisObjective { - fn eq(&self, other: &Self) -> bool { - std::mem::discriminant(self) == std::mem::discriminant(other) - } -} -impl Eq for StaticAnalysisObjective {} -impl Hash for StaticAnalysisObjective { - fn hash(&self, state: &mut H) { - std::mem::discriminant(self).hash(state); - } -} - /// Criterion-benchmarked performance objectives. -#[derive(Clone, Copy)] +#[derive(Clone, Copy, PartialEq, Hash)] pub enum PerformanceObjective { BindLowToHigh(performance::binding::BindLowToHighObjective), BindHighToLow(performance::binding::BindHighToLowObjective), @@ -185,18 +173,6 @@ impl PerformanceObjective { } } -impl PartialEq for PerformanceObjective { - fn eq(&self, other: &Self) -> bool { - std::mem::discriminant(self) == std::mem::discriminant(other) - } -} -impl Eq for PerformanceObjective {} -impl Hash for PerformanceObjective { - fn hash(&self, state: &mut H) { - std::mem::discriminant(self).hash(state); - } -} - /// Union of all known objectives — used as a type-safe HashMap key. #[derive(Clone, Copy)] pub enum OptimizationObjective { diff --git a/jolt-eval/src/objective/performance/binding.rs b/jolt-eval/src/objective/performance/binding.rs index 8635fab49..f7c43963a 100644 --- a/jolt-eval/src/objective/performance/binding.rs +++ b/jolt-eval/src/objective/performance/binding.rs @@ -42,18 +42,14 @@ impl BindShared { } /// Benchmark `DensePolynomial::bind_parallel` with `LowToHigh` binding. -#[derive(Clone, Copy, Default)] +#[derive(Clone, Copy, PartialEq, Hash)] pub struct BindLowToHighObjective; -impl BindLowToHighObjective { - pub const NAME: &str = "bind_parallel_low_to_high"; -} - impl Objective for BindLowToHighObjective { type Setup = BindSetup; fn name(&self) -> &str { - Self::NAME + "bind_parallel_low_to_high" } fn description(&self) -> String { @@ -80,18 +76,14 @@ impl Objective for BindLowToHighObjective { } /// Benchmark `DensePolynomial::bind_parallel` with `HighToLow` binding. -#[derive(Clone, Copy, Default)] +#[derive(Clone, Copy, PartialEq, Hash)] pub struct BindHighToLowObjective; -impl BindHighToLowObjective { - pub const NAME: &str = "bind_parallel_high_to_low"; -} - impl Objective for BindHighToLowObjective { type Setup = BindSetup; fn name(&self) -> &str { - Self::NAME + "bind_parallel_high_to_low" } fn description(&self) -> String { diff --git a/jolt-eval/src/objective/performance/naive_sort.rs b/jolt-eval/src/objective/performance/naive_sort.rs index 9bf318028..2badc663e 100644 --- a/jolt-eval/src/objective/performance/naive_sort.rs +++ b/jolt-eval/src/objective/performance/naive_sort.rs @@ -3,7 +3,7 @@ use crate::sort_targets::naive_sort; const SORT_DATA_SIZE: usize = 5000; -#[derive(Clone, Copy, Default)] +#[derive(Clone, Copy, PartialEq, Hash)] pub struct NaiveSortObjective; impl Objective for NaiveSortObjective { diff --git a/jolt-eval/src/objective/performance/prover_time.rs b/jolt-eval/src/objective/performance/prover_time.rs index 92031c07c..d5c7f90dd 100644 --- a/jolt-eval/src/objective/performance/prover_time.rs +++ b/jolt-eval/src/objective/performance/prover_time.rs @@ -17,11 +17,13 @@ pub struct ProverTimeSetup { #[derive(Default)] pub struct ProverTimeObjective { guest: G, + name: String, } impl ProverTimeObjective { pub fn new(guest: G) -> Self { - Self { guest } + let name = format!("{} prover time", guest.bench_name()); + Self { guest, name } } } @@ -29,10 +31,7 @@ impl Objective for ProverTimeObjective { type Setup = ProverTimeSetup; fn name(&self) -> &str { - // Leak a string so we can return &str from a computed name. - // This is fine — there are only a handful of objectives. - let name = self.guest.bench_name(); - Box::leak(name.into_boxed_str()) + self.name.as_str() } fn setup(&self) -> ProverTimeSetup { From 3f84c42c45b22a6f7b70f58b384f1119d4e5b465 Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Sat, 4 Apr 2026 17:30:59 -0400 Subject: [PATCH 86/86] fix(jolt-eval): restore Default derive on perf objective structs Required by the simple form of bench_objective! which calls ::default(). Co-Authored-By: Claude Opus 4.6 (1M context) --- jolt-eval/src/objective/performance/binding.rs | 4 ++-- jolt-eval/src/objective/performance/naive_sort.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/jolt-eval/src/objective/performance/binding.rs b/jolt-eval/src/objective/performance/binding.rs index f7c43963a..7815bb03f 100644 --- a/jolt-eval/src/objective/performance/binding.rs +++ b/jolt-eval/src/objective/performance/binding.rs @@ -42,7 +42,7 @@ impl BindShared { } /// Benchmark `DensePolynomial::bind_parallel` with `LowToHigh` binding. -#[derive(Clone, Copy, PartialEq, Hash)] +#[derive(Clone, Copy, Default, PartialEq, Hash)] pub struct BindLowToHighObjective; impl Objective for BindLowToHighObjective { @@ -76,7 +76,7 @@ impl Objective for BindLowToHighObjective { } /// Benchmark `DensePolynomial::bind_parallel` with `HighToLow` binding. -#[derive(Clone, Copy, PartialEq, Hash)] +#[derive(Clone, Copy, Default, PartialEq, Hash)] pub struct BindHighToLowObjective; impl Objective for BindHighToLowObjective { diff --git a/jolt-eval/src/objective/performance/naive_sort.rs b/jolt-eval/src/objective/performance/naive_sort.rs index 2badc663e..b022df025 100644 --- a/jolt-eval/src/objective/performance/naive_sort.rs +++ b/jolt-eval/src/objective/performance/naive_sort.rs @@ -3,7 +3,7 @@ use crate::sort_targets::naive_sort; const SORT_DATA_SIZE: usize = 5000; -#[derive(Clone, Copy, PartialEq, Hash)] +#[derive(Clone, Copy, Default, PartialEq, Hash)] pub struct NaiveSortObjective; impl Objective for NaiveSortObjective {