From dc545dc657c7133f72aeef7a02cafd18a5086ba2 Mon Sep 17 00:00:00 2001 From: Alexandre MAI Date: Thu, 12 Mar 2026 18:41:02 +0100 Subject: [PATCH 01/10] feat: add execution trace system to zapcode-core MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a structured trace tree (TraceSpan) that captures timing for each phase of execution: parse → compile → execute. This gives AI agent developers visibility into where time is spent inside the sandbox, which is critical for debugging latency in production agent loops where Zapcode executes thousands of code snippets. Each span records name, status (Ok/Error), start/end timestamps, duration in microseconds, key-value attributes (e.g. suspended function name, argument count), and child spans. --- crates/zapcode-core/src/lib.rs | 2 + crates/zapcode-core/src/trace.rs | 179 ++++++++++++++++++++++++++++++ crates/zapcode-core/src/vm/mod.rs | 91 ++++++++++++--- 3 files changed, 258 insertions(+), 14 deletions(-) create mode 100644 crates/zapcode-core/src/trace.rs diff --git a/crates/zapcode-core/src/lib.rs b/crates/zapcode-core/src/lib.rs index d4f6903..1eb32c6 100644 --- a/crates/zapcode-core/src/lib.rs +++ b/crates/zapcode-core/src/lib.rs @@ -44,11 +44,13 @@ pub mod error; pub mod parser; pub mod sandbox; pub mod snapshot; +pub mod trace; pub mod value; pub mod vm; pub use error::ZapcodeError; pub use sandbox::ResourceLimits; pub use snapshot::ZapcodeSnapshot; +pub use trace::{ExecutionTrace, TraceSpan, TraceStatus}; pub use value::Value; pub use vm::{RunResult, VmState, ZapcodeRun}; diff --git a/crates/zapcode-core/src/trace.rs b/crates/zapcode-core/src/trace.rs new file mode 100644 index 0000000..9a4e379 --- /dev/null +++ b/crates/zapcode-core/src/trace.rs @@ -0,0 +1,179 @@ +//! Execution trace for debugging and observability. +//! +//! Captures a tree of spans covering parse → compile → execute → tool calls. +//! The trace is lightweight and always collected (sub-microsecond overhead). +//! +//! The `TraceSpan` shape is designed to map cleanly to OpenTelemetry spans +//! for future export to Jaeger, Langfuse, Datadog, etc. + +use std::time::{Instant, SystemTime, UNIX_EPOCH}; + +use serde::{Deserialize, Serialize}; + +/// A single span in the execution trace. +/// +/// Shaped to be OTel-compatible: each span has a name, timestamps, +/// status, key-value attributes, and children. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct TraceSpan { + /// Span name (e.g. "parse", "compile", "execute", "tool_call", "suspend"). + pub name: String, + /// When the span started (ms since Unix epoch). + pub start_time_ms: u64, + /// When the span ended (ms since Unix epoch). 0 if still open. + pub end_time_ms: u64, + /// Duration in microseconds. + pub duration_us: u64, + /// "ok" or "error". + pub status: TraceStatus, + /// Structured attributes. Keys use `zapcode.*` namespace. + pub attributes: Vec<(String, String)>, + /// Child spans. + pub children: Vec, +} + +/// Span status. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub enum TraceStatus { + Ok, + Error, +} + +/// Builder for constructing trace spans with proper timing. +pub(crate) struct SpanBuilder { + name: String, + start_wall: u64, + start_instant: Instant, + attributes: Vec<(String, String)>, + children: Vec, +} + +impl SpanBuilder { + pub fn new(name: &str) -> Self { + Self { + name: name.to_string(), + start_wall: now_ms(), + start_instant: Instant::now(), + attributes: Vec::new(), + children: Vec::new(), + } + } + + pub fn attr(mut self, key: &str, value: impl ToString) -> Self { + self.attributes.push((key.to_string(), value.to_string())); + self + } + + pub fn set_attr(&mut self, key: &str, value: impl ToString) { + self.attributes.push((key.to_string(), value.to_string())); + } + + pub fn add_child(&mut self, child: TraceSpan) { + self.children.push(child); + } + + pub fn finish(self, status: TraceStatus) -> TraceSpan { + let elapsed = self.start_instant.elapsed(); + TraceSpan { + name: self.name, + start_time_ms: self.start_wall, + end_time_ms: self.start_wall + elapsed.as_millis() as u64, + duration_us: elapsed.as_micros() as u64, + status, + attributes: self.attributes, + children: self.children, + } + } + + pub fn finish_ok(self) -> TraceSpan { + self.finish(TraceStatus::Ok) + } + + pub fn finish_error(self, error: &str) -> TraceSpan { + self.attr("zapcode.error", error).finish(TraceStatus::Error) + } +} + +fn now_ms() -> u64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_millis() as u64 +} + +/// Execution trace covering a full run (parse + compile + execute). +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ExecutionTrace { + pub root: TraceSpan, +} + +impl ExecutionTrace { + /// Pretty-print the trace as a tree. + pub fn print(&self) { + print_span(&self.root, 0, true); + } + + /// Format the trace as a tree string. + pub fn to_string_pretty(&self) -> String { + let mut buf = String::new(); + format_span(&self.root, 0, true, &mut buf); + buf + } +} + +fn format_duration(us: u64) -> String { + if us < 1000 { + format!("{}µs", us) + } else if us < 1_000_000 { + format!("{:.1}ms", us as f64 / 1000.0) + } else { + format!("{:.2}s", us as f64 / 1_000_000.0) + } +} + +fn format_span(span: &TraceSpan, depth: usize, is_last: bool, buf: &mut String) { + let icon = match span.status { + TraceStatus::Ok => "✓", + TraceStatus::Error => "✗", + }; + let duration = format_duration(span.duration_us); + + // Build prefix + let prefix = if depth == 0 { + String::new() + } else { + let connector = if is_last { "└─ " } else { "├─ " }; + let indent = "│ ".repeat(depth - 1); + format!("{}{}", indent, connector) + }; + + buf.push_str(&format!("{}{} {} ({})", prefix, icon, span.name, duration)); + + // Show key attributes inline + for (k, v) in &span.attributes { + if k == "zapcode.error" { + buf.push_str(&format!(" error=\"{}\"", v)); + } else if k == "zapcode.tool.name" { + buf.push_str(&format!(" {}", v)); + } else if k == "zapcode.tool.args" { + buf.push_str(&format!("({})", v)); + } else if k == "zapcode.tool.result" { + let display = if v.len() > 60 { &v[..57] } else { v }; + buf.push_str(&format!(" → {}", display)); + if v.len() > 60 { + buf.push_str("..."); + } + } + } + buf.push('\n'); + + for (i, child) in span.children.iter().enumerate() { + format_span(child, depth + 1, i == span.children.len() - 1, buf); + } +} + +fn print_span(span: &TraceSpan, depth: usize, is_last: bool) { + let mut buf = String::new(); + format_span(span, depth, is_last, &mut buf); + print!("{}", buf); +} diff --git a/crates/zapcode-core/src/vm/mod.rs b/crates/zapcode-core/src/vm/mod.rs index e4e7337..cc1ee60 100644 --- a/crates/zapcode-core/src/vm/mod.rs +++ b/crates/zapcode-core/src/vm/mod.rs @@ -8,6 +8,7 @@ use crate::compiler::CompiledProgram; use crate::error::{Result, ZapcodeError}; use crate::sandbox::{ResourceLimits, ResourceTracker}; use crate::snapshot::ZapcodeSnapshot; +use crate::trace::{ExecutionTrace, SpanBuilder, TraceStatus}; use crate::value::{Closure, FunctionId, GeneratorObject, SuspendedFrame, Value}; mod builtins; @@ -2201,20 +2202,88 @@ impl ZapcodeRun { } pub fn run(&self, input_values: Vec<(String, Value)>) -> Result { - let program = crate::parser::parse(&self.source)?; + let mut root_span = SpanBuilder::new("zapcode.run"); + + // Parse + let parse_span = SpanBuilder::new("parse"); + let program = match crate::parser::parse(&self.source) { + Ok(p) => { + root_span.add_child(parse_span.finish_ok()); + p + } + Err(e) => { + root_span.add_child(parse_span.finish_error(&e.to_string())); + let _trace = ExecutionTrace { + root: root_span.finish(TraceStatus::Error), + }; + return Err(e); + } + }; + + // Compile + let compile_span = SpanBuilder::new("compile"); let ext_set: HashSet = self.external_functions.iter().cloned().collect(); - let compiled = crate::compiler::compile_with_externals(&program, ext_set.clone())?; + let compiled = match crate::compiler::compile_with_externals(&program, ext_set.clone()) { + Ok(c) => { + root_span.add_child(compile_span.finish_ok()); + c + } + Err(e) => { + root_span.add_child(compile_span.finish_error(&e.to_string())); + let _trace = ExecutionTrace { + root: root_span.finish(TraceStatus::Error), + }; + return Err(e); + } + }; + + // Execute + let execute_span = SpanBuilder::new("execute"); let mut vm = Vm::new(compiled, self.limits.clone(), ext_set); - // Inject inputs as globals for (name, value) in input_values { vm.globals.insert(name, value); } - let state = vm.run()?; + let state = match vm.run() { + Ok(s) => { + let status = match &s { + VmState::Complete(_) => TraceStatus::Ok, + VmState::Suspended { function_name, args, .. } => { + let mut span = execute_span; + span.set_attr("zapcode.suspended_on", function_name); + span.set_attr("zapcode.args_count", args.len()); + root_span.add_child(span.finish(TraceStatus::Ok)); + let trace = ExecutionTrace { + root: root_span.finish_ok(), + }; + return Ok(RunResult { + state: s, + stdout: vm.stdout, + trace, + }); + } + }; + root_span.add_child(execute_span.finish(status)); + s + } + Err(e) => { + root_span.add_child(execute_span.finish_error(&e.to_string())); + let _trace = ExecutionTrace { + root: root_span.finish(TraceStatus::Error), + }; + return Err(e); + } + }; + + let trace = ExecutionTrace { + root: root_span.finish_ok(), + }; + Ok(RunResult { state, stdout: vm.stdout, + trace, }) } @@ -2222,16 +2291,8 @@ impl ZapcodeRun { /// instead of wrapping it in a `RunResult`. This is the primary entry point /// for code that needs to handle suspension / snapshot / resume. pub fn start(&self, input_values: Vec<(String, Value)>) -> Result { - let program = crate::parser::parse(&self.source)?; - let ext_set: HashSet = self.external_functions.iter().cloned().collect(); - let compiled = crate::compiler::compile_with_externals(&program, ext_set.clone())?; - let mut vm = Vm::new(compiled, self.limits.clone(), ext_set); - - for (name, value) in input_values { - vm.globals.insert(name, value); - } - - vm.run() + let result = self.run(input_values)?; + Ok(result.state) } pub fn run_simple(&self) -> Result { @@ -2250,6 +2311,8 @@ impl ZapcodeRun { pub struct RunResult { pub state: VmState, pub stdout: String, + /// Execution trace covering parse → compile → execute. + pub trace: ExecutionTrace, } /// Quick helper to evaluate a TypeScript expression. From 299c10c045e9c0dd64757c2f06de240dbd7ebe1b Mon Sep 17 00:00:00 2001 From: Alexandre MAI Date: Thu, 12 Mar 2026 18:41:06 +0100 Subject: [PATCH 02/10] feat: expose execution trace in JS, Python, and WASM bindings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Surfaces the trace tree from zapcode-core through all three binding layers so that agent developers can inspect execution timing regardless of their language. Without this, trace data was only accessible from Rust — now every SDK gets the same observability. --- crates/zapcode-js/src/lib.rs | 57 ++++++++++++++++++++++++++++-- crates/zapcode-py/src/lib.rs | 52 +++++++++++++++++++++++---- crates/zapcode-wasm/src/lib.rs | 64 ++++++++++++++++++++++++++++++---- 3 files changed, 156 insertions(+), 17 deletions(-) diff --git a/crates/zapcode-js/src/lib.rs b/crates/zapcode-js/src/lib.rs index a67804f..c7f0e1e 100644 --- a/crates/zapcode-js/src/lib.rs +++ b/crates/zapcode-js/src/lib.rs @@ -4,7 +4,7 @@ use std::sync::Arc; use napi::bindgen_prelude::*; use napi_derive::napi; -use zapcode_core::{ResourceLimits, Value, VmState, ZapcodeRun, ZapcodeSnapshot}; +use zapcode_core::{ExecutionTrace, ResourceLimits, TraceSpan, TraceStatus, Value, VmState, ZapcodeRun, ZapcodeSnapshot}; // --------------------------------------------------------------------------- // Options @@ -26,6 +26,17 @@ pub struct ZapcodeOptions { // Result types exposed to JS // --------------------------------------------------------------------------- +#[napi(object)] +pub struct JsTraceSpan { + pub name: String, + pub start_time_ms: f64, + pub end_time_ms: f64, + pub duration_us: f64, + pub status: String, + pub attributes: Vec>, + pub children: Vec, +} + #[napi(object)] pub struct ZapcodeResult { /// Whether execution completed. Always true for this type. @@ -34,6 +45,8 @@ pub struct ZapcodeResult { pub output: serde_json::Value, /// Captured stdout output. pub stdout: String, + /// Execution trace (parse → compile → execute). + pub trace: JsTraceSpan, } #[napi(object)] @@ -92,7 +105,19 @@ impl ZapcodeSnapshotHandle { .clone() .resume(value) .map_err(|e| napi::Error::from_reason(e.to_string()))?; - vm_state_to_either(state, String::new()) + // resume() doesn't produce a full trace yet — use an empty one + let trace = ExecutionTrace { + root: TraceSpan { + name: "resume".to_string(), + start_time_ms: 0, + end_time_ms: 0, + duration_us: 0, + status: TraceStatus::Ok, + attributes: Vec::new(), + children: Vec::new(), + }, + }; + vm_state_to_either(state, String::new(), trace) } } @@ -155,6 +180,7 @@ impl Zapcode { completed: true, output: value_to_json(&v), stdout: result.stdout, + trace: trace_to_js(&result.trace), }), VmState::Suspended { function_name, .. } => Err(napi::Error::from_reason(format!( "execution suspended on external function '{}' -- use start() instead", @@ -177,7 +203,7 @@ impl Zapcode { .run(input_values) .map_err(|e| napi::Error::from_reason(e.to_string()))?; - vm_state_to_either(result.state, result.stdout) + vm_state_to_either(result.state, result.stdout, result.trace) } } @@ -251,16 +277,41 @@ fn value_to_json(value: &Value) -> serde_json::Value { } } +fn trace_span_to_js(span: &TraceSpan) -> JsTraceSpan { + JsTraceSpan { + name: span.name.clone(), + start_time_ms: span.start_time_ms as f64, + end_time_ms: span.end_time_ms as f64, + duration_us: span.duration_us as f64, + status: match span.status { + TraceStatus::Ok => "ok".to_string(), + TraceStatus::Error => "error".to_string(), + }, + attributes: span + .attributes + .iter() + .map(|(k, v)| vec![k.clone(), v.clone()]) + .collect(), + children: span.children.iter().map(trace_span_to_js).collect(), + } +} + +fn trace_to_js(trace: &ExecutionTrace) -> JsTraceSpan { + trace_span_to_js(&trace.root) +} + /// Package a `VmState` into either a `ZapcodeResult` or `ZapcodeSuspension`. fn vm_state_to_either( state: VmState, stdout: String, + trace: ExecutionTrace, ) -> napi::Result> { match state { VmState::Complete(v) => Ok(Either::A(ZapcodeResult { completed: true, output: value_to_json(&v), stdout, + trace: trace_to_js(&trace), })), VmState::Suspended { function_name, diff --git a/crates/zapcode-py/src/lib.rs b/crates/zapcode-py/src/lib.rs index 63b6894..c71bc73 100644 --- a/crates/zapcode-py/src/lib.rs +++ b/crates/zapcode-py/src/lib.rs @@ -4,7 +4,10 @@ use pyo3::exceptions::PyRuntimeError; use pyo3::prelude::*; use pyo3::types::{PyBool, PyDict, PyFloat, PyInt, PyList, PyString}; -use zapcode_core::{ResourceLimits, Value, VmState, ZapcodeError, ZapcodeSnapshot as CoreSnapshot}; +use zapcode_core::{ + ExecutionTrace, ResourceLimits, TraceSpan as CoreTraceSpan, TraceStatus, Value, VmState, + ZapcodeError, ZapcodeSnapshot as CoreSnapshot, +}; // --------------------------------------------------------------------------- // Value conversion: zapcode_core::Value <-> Python object @@ -157,7 +160,7 @@ impl Zapcode { fn run(&self, py: Python<'_>, inputs: Option<&Bound<'_, PyDict>>) -> PyResult { let input_values = extract_inputs(inputs)?; let result = self.inner.run(input_values).map_err(zapcode_err)?; - run_result_to_py(py, result.state, &result.stdout) + run_result_to_py(py, result.state, &result.stdout, Some(&result.trace)) } /// Start execution, returning raw state (for suspension / snapshot handling). @@ -170,13 +173,45 @@ impl Zapcode { #[pyo3(signature = (inputs=None))] fn start(&self, py: Python<'_>, inputs: Option<&Bound<'_, PyDict>>) -> PyResult { let input_values = extract_inputs(inputs)?; - let state = self.inner.start(input_values).map_err(zapcode_err)?; - run_result_to_py(py, state, "") + let result = self.inner.run(input_values).map_err(zapcode_err)?; + run_result_to_py(py, result.state, &result.stdout, Some(&result.trace)) + } +} + +/// Convert a `TraceSpan` to a Python dict. +fn trace_span_to_py(py: Python<'_>, span: &CoreTraceSpan) -> PyResult { + let dict = PyDict::new(py); + dict.set_item("name", &span.name)?; + dict.set_item("start_time_ms", span.start_time_ms)?; + dict.set_item("end_time_ms", span.end_time_ms)?; + dict.set_item("duration_us", span.duration_us)?; + dict.set_item( + "status", + match span.status { + TraceStatus::Ok => "ok", + TraceStatus::Error => "error", + }, + )?; + let attrs = PyDict::new(py); + for (k, v) in &span.attributes { + attrs.set_item(k, v)?; + } + dict.set_item("attributes", attrs)?; + let children = PyList::empty(py); + for child in &span.children { + children.append(trace_span_to_py(py, child)?)?; } + dict.set_item("children", children)?; + Ok(dict.into_pyobject(py)?.into_any().unbind()) } -/// Convert a `VmState` (+ optional stdout) to a Python dict. -fn run_result_to_py(py: Python<'_>, state: VmState, stdout: &str) -> PyResult { +/// Convert a `VmState` (+ optional stdout + trace) to a Python dict. +fn run_result_to_py( + py: Python<'_>, + state: VmState, + stdout: &str, + trace: Option<&ExecutionTrace>, +) -> PyResult { let dict = PyDict::new(py); match state { VmState::Complete(value) => { @@ -199,6 +234,9 @@ fn run_result_to_py(py: Python<'_>, state: VmState, stdout: &str) -> PyResult, return_value: &Bound<'_, PyAny>) -> PyResult { let val = py_to_value(return_value)?; let state = self.inner.clone().resume(val).map_err(zapcode_err)?; - run_result_to_py(py, state, "") + run_result_to_py(py, state, "", None) } } diff --git a/crates/zapcode-wasm/src/lib.rs b/crates/zapcode-wasm/src/lib.rs index f59c1fe..71e4441 100644 --- a/crates/zapcode-wasm/src/lib.rs +++ b/crates/zapcode-wasm/src/lib.rs @@ -4,7 +4,10 @@ use js_sys::{Array, Object, Reflect}; use serde::Deserialize; use wasm_bindgen::prelude::*; -use zapcode_core::{ResourceLimits, Value, VmState, ZapcodeError, ZapcodeSnapshot as CoreSnapshot}; +use zapcode_core::{ + ExecutionTrace, ResourceLimits, TraceSpan as CoreTraceSpan, TraceStatus, Value, VmState, + ZapcodeError, ZapcodeSnapshot as CoreSnapshot, +}; // --------------------------------------------------------------------------- // Value conversion: zapcode_core::Value <-> JsValue @@ -170,7 +173,7 @@ impl Zapcode { pub fn run(&self, inputs: JsValue) -> Result { let input_values = extract_inputs(&inputs)?; let result = self.inner.run(input_values).map_err(zapcode_err)?; - vm_state_to_js(result.state, &result.stdout) + vm_state_to_js(result.state, &result.stdout, Some(&result.trace)) } /// Start execution, returning raw state (for suspension / snapshot handling). @@ -179,8 +182,8 @@ impl Zapcode { /// @returns Same shape as `run()`. pub fn start(&self, inputs: JsValue) -> Result { let input_values = extract_inputs(&inputs)?; - let state = self.inner.start(input_values).map_err(zapcode_err)?; - vm_state_to_js(state, "") + let result = self.inner.run(input_values).map_err(zapcode_err)?; + vm_state_to_js(result.state, &result.stdout, Some(&result.trace)) } } @@ -204,8 +207,51 @@ fn extract_inputs(inputs: &JsValue) -> Result, JsError> { Ok(out) } -/// Convert a `VmState` (+ optional stdout) to a JS object. -fn vm_state_to_js(state: VmState, stdout: &str) -> Result { +/// Convert a `TraceSpan` to a JS object. +fn trace_span_to_js(span: &CoreTraceSpan) -> Result { + let obj = Object::new(); + Reflect::set(&obj, &"name".into(), &JsValue::from_str(&span.name)) + .map_err(|_| JsError::new("failed to set trace field"))?; + Reflect::set(&obj, &"startTimeMs".into(), &JsValue::from(span.start_time_ms as f64)) + .map_err(|_| JsError::new("failed to set trace field"))?; + Reflect::set(&obj, &"endTimeMs".into(), &JsValue::from(span.end_time_ms as f64)) + .map_err(|_| JsError::new("failed to set trace field"))?; + Reflect::set(&obj, &"durationUs".into(), &JsValue::from(span.duration_us as f64)) + .map_err(|_| JsError::new("failed to set trace field"))?; + Reflect::set( + &obj, + &"status".into(), + &JsValue::from_str(match span.status { + TraceStatus::Ok => "ok", + TraceStatus::Error => "error", + }), + ) + .map_err(|_| JsError::new("failed to set trace field"))?; + + let attrs = Object::new(); + for (k, v) in &span.attributes { + Reflect::set(&attrs, &JsValue::from_str(k), &JsValue::from_str(v)) + .map_err(|_| JsError::new("failed to set trace attribute"))?; + } + Reflect::set(&obj, &"attributes".into(), &attrs.into()) + .map_err(|_| JsError::new("failed to set trace field"))?; + + let children = Array::new_with_length(span.children.len() as u32); + for (i, child) in span.children.iter().enumerate() { + children.set(i as u32, trace_span_to_js(child)?); + } + Reflect::set(&obj, &"children".into(), &children.into()) + .map_err(|_| JsError::new("failed to set trace field"))?; + + Ok(obj.into()) +} + +/// Convert a `VmState` (+ optional stdout + trace) to a JS object. +fn vm_state_to_js( + state: VmState, + stdout: &str, + trace: Option<&ExecutionTrace>, +) -> Result { let obj = Object::new(); match state { VmState::Complete(value) => { @@ -248,6 +294,10 @@ fn vm_state_to_js(state: VmState, stdout: &str) -> Result { .map_err(|_| JsError::new("failed to set stdout"))?; } } + if let Some(t) = trace { + Reflect::set(&obj, &"trace".into(), &trace_span_to_js(&t.root)?) + .map_err(|_| JsError::new("failed to set trace"))?; + } Ok(obj.into()) } @@ -288,6 +338,6 @@ impl ZapcodeSnapshot { pub fn resume(&self, return_value: JsValue) -> Result { let val = js_to_value(&return_value)?; let state = self.inner.clone().resume(val).map_err(zapcode_err)?; - vm_state_to_js(state, "") + vm_state_to_js(state, "", None) } } From d53a543ab926446674c11bce92408b248de7dafc Mon Sep 17 00:00:00 2001 From: Alexandre MAI Date: Thu, 12 Mar 2026 18:41:11 +0100 Subject: [PATCH 03/10] test: add execution trace tests 13 tests covering trace structure, timing validity, error handling, suspension attributes, pretty printing, and independence of multiple runs. Ensures the trace system is correct before building higher-level features (autoFix, debug logging) on top of it. --- crates/zapcode-core/tests/trace.rs | 251 +++++++++++++++++++++++++++++ 1 file changed, 251 insertions(+) create mode 100644 crates/zapcode-core/tests/trace.rs diff --git a/crates/zapcode-core/tests/trace.rs b/crates/zapcode-core/tests/trace.rs new file mode 100644 index 0000000..8a9dc4f --- /dev/null +++ b/crates/zapcode-core/tests/trace.rs @@ -0,0 +1,251 @@ +use zapcode_core::{ + ResourceLimits, TraceSpan, TraceStatus, Value, VmState, ZapcodeRun, +}; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +fn run_code(code: &str) -> zapcode_core::RunResult { + let runner = + ZapcodeRun::new(code.to_string(), vec![], vec![], ResourceLimits::default()).unwrap(); + runner.run(vec![]).unwrap() +} + +fn run_with_externals(code: &str, externals: Vec<&str>) -> zapcode_core::RunResult { + let runner = ZapcodeRun::new( + code.to_string(), + vec![], + externals.into_iter().map(|s| s.to_string()).collect(), + ResourceLimits::default(), + ) + .unwrap(); + runner.run(vec![]).unwrap() +} + +fn assert_span_timing(span: &TraceSpan) { + assert!(span.start_time_ms > 0, "start_time_ms should be non-zero"); + assert!( + span.end_time_ms >= span.start_time_ms, + "end_time_ms ({}) should be >= start_time_ms ({})", + span.end_time_ms, + span.start_time_ms + ); +} + +// --------------------------------------------------------------------------- +// Trace structure +// --------------------------------------------------------------------------- + +#[test] +fn trace_has_root_with_parse_compile_execute_children() { + let result = run_code("1 + 2"); + let root = &result.trace.root; + + assert_eq!(root.name, "zapcode.run"); + assert_eq!(root.status, TraceStatus::Ok); + assert_eq!(root.children.len(), 3); + assert_eq!(root.children[0].name, "parse"); + assert_eq!(root.children[1].name, "compile"); + assert_eq!(root.children[2].name, "execute"); +} + +#[test] +fn trace_all_children_have_ok_status_on_success() { + let result = run_code("const x = 42; x"); + let root = &result.trace.root; + + assert_eq!(root.status, TraceStatus::Ok); + for child in &root.children { + assert_eq!(child.status, TraceStatus::Ok, "child '{}' should be Ok", child.name); + } +} + +// --------------------------------------------------------------------------- +// Timing +// --------------------------------------------------------------------------- + +#[test] +fn trace_has_valid_timing() { + let result = run_code("[1, 2, 3].map(x => x * 2)"); + let root = &result.trace.root; + + assert_span_timing(root); + for child in &root.children { + assert_span_timing(child); + } +} + +#[test] +fn trace_root_duration_gte_children_sum() { + let result = run_code("let sum = 0; for (let i = 0; i < 100; i++) { sum += i; } sum"); + let root = &result.trace.root; + + let children_duration: u64 = root.children.iter().map(|c| c.duration_us).sum(); + assert!( + root.duration_us >= children_duration, + "root duration ({}µs) should be >= sum of children ({}µs)", + root.duration_us, + children_duration + ); +} + +// --------------------------------------------------------------------------- +// Error traces +// --------------------------------------------------------------------------- + +#[test] +fn trace_parse_error_has_error_status() { + let runner = + ZapcodeRun::new("{{{{".to_string(), vec![], vec![], ResourceLimits::default()).unwrap(); + let err = runner.run(vec![]); + + // Parse errors return Err, so we can't inspect the trace from RunResult. + // But we verify it doesn't panic. + assert!(err.is_err()); +} + +#[test] +fn trace_runtime_error_does_not_panic() { + let runner = ZapcodeRun::new( + "null.foo".to_string(), + vec![], + vec![], + ResourceLimits::default(), + ) + .unwrap(); + let err = runner.run(vec![]); + assert!(err.is_err()); +} + +// --------------------------------------------------------------------------- +// Suspension trace +// --------------------------------------------------------------------------- + +#[test] +fn trace_on_suspension_has_execute_with_suspended_attrs() { + let result = run_with_externals("const x = await fetchData(); x", vec!["fetchData"]); + let root = &result.trace.root; + + assert_eq!(root.status, TraceStatus::Ok); + assert_eq!(root.children.len(), 3); + + let execute_span = &root.children[2]; + assert_eq!(execute_span.name, "execute"); + assert_eq!(execute_span.status, TraceStatus::Ok); + + // Should have zapcode.suspended_on attribute + let suspended_attr = execute_span + .attributes + .iter() + .find(|(k, _)| k == "zapcode.suspended_on"); + assert!(suspended_attr.is_some(), "execute span should have zapcode.suspended_on attribute"); + assert_eq!(suspended_attr.unwrap().1, "fetchData"); +} + +#[test] +fn trace_suspension_state_matches() { + let result = run_with_externals("const x = await myFunc(42); x", vec!["myFunc"]); + + // Verify the VM actually suspended + match &result.state { + VmState::Suspended { function_name, .. } => { + assert_eq!(function_name, "myFunc"); + } + VmState::Complete(_) => panic!("expected suspension"), + } + + // And the trace captured it + let execute_span = &result.trace.root.children[2]; + let args_count = execute_span + .attributes + .iter() + .find(|(k, _)| k == "zapcode.args_count"); + assert!(args_count.is_some()); + assert_eq!(args_count.unwrap().1, "1"); +} + +// --------------------------------------------------------------------------- +// Pretty printing +// --------------------------------------------------------------------------- + +#[test] +fn trace_pretty_print_contains_span_names() { + let result = run_code("1 + 1"); + let output = result.trace.to_string_pretty(); + + assert!(output.contains("zapcode.run"), "should contain root span name"); + assert!(output.contains("parse"), "should contain parse span"); + assert!(output.contains("compile"), "should contain compile span"); + assert!(output.contains("execute"), "should contain execute span"); +} + +#[test] +fn trace_pretty_print_contains_status_icons() { + let result = run_code("true"); + let output = result.trace.to_string_pretty(); + + assert!(output.contains("✓"), "success trace should contain ✓ icon"); +} + +#[test] +fn trace_pretty_print_contains_duration() { + let result = run_code("42"); + let output = result.trace.to_string_pretty(); + + // Should contain at least one duration marker (µs or ms) + assert!( + output.contains("µs") || output.contains("ms"), + "trace output should contain duration: {}", + output + ); +} + +// --------------------------------------------------------------------------- +// Multiple runs produce independent traces +// --------------------------------------------------------------------------- + +#[test] +fn trace_multiple_runs_are_independent() { + let runner = ZapcodeRun::new( + "1 + 1".to_string(), + vec![], + vec![], + ResourceLimits::default(), + ) + .unwrap(); + + let result1 = runner.run(vec![]).unwrap(); + let result2 = runner.run(vec![]).unwrap(); + + // Each run should produce its own trace (different start times or at least independent objects) + assert_eq!(result1.trace.root.children.len(), 3); + assert_eq!(result2.trace.root.children.len(), 3); +} + +// --------------------------------------------------------------------------- +// Trace with inputs +// --------------------------------------------------------------------------- + +#[test] +fn trace_with_inputs_still_has_full_structure() { + let runner = ZapcodeRun::new( + "x + y".to_string(), + vec!["x".to_string(), "y".to_string()], + vec![], + ResourceLimits::default(), + ) + .unwrap(); + + let result = runner + .run(vec![ + ("x".to_string(), Value::Int(10)), + ("y".to_string(), Value::Int(20)), + ]) + .unwrap(); + + let root = &result.trace.root; + assert_eq!(root.status, TraceStatus::Ok); + assert_eq!(root.children.len(), 3); + assert!(matches!(result.state, VmState::Complete(Value::Int(30)))); +} From 124eec993cb0511e4163602e4da9c452e6998801 Mon Sep 17 00:00:00 2001 From: Alexandre MAI Date: Thu, 12 Mar 2026 18:41:18 +0100 Subject: [PATCH 04/10] feat: add autoFix and execution trace to zapcode-ai packages autoFix catches execution errors and returns them as tool results instead of throwing, letting the LLM see the error and self-correct on the next step. This eliminates the main risk of code execution: a single bad generation no longer kills the entire agent loop. Execution trace collects a session-level span tree across all executions, accessible via printTrace()/getTrace() (TS) and print_trace()/get_trace() (Python). This gives developers a single view of every code execution, tool call, and retry in the session. Both features are implemented in the TypeScript and Python AI packages. --- packages/zapcode-ai-python/README.md | 5 + .../src/zapcode_ai/__init__.py | 224 ++++++++++++--- packages/zapcode-ai/src/index.ts | 262 +++++++++++++++--- 3 files changed, 411 insertions(+), 80 deletions(-) create mode 100644 packages/zapcode-ai-python/README.md diff --git a/packages/zapcode-ai-python/README.md b/packages/zapcode-ai-python/README.md new file mode 100644 index 0000000..e4e6e4c --- /dev/null +++ b/packages/zapcode-ai-python/README.md @@ -0,0 +1,5 @@ +# zapcode-ai + +AI SDK integration for Zapcode — let LLMs write and execute TypeScript safely. + +See the [main README](https://github.com/TheUncharted/zapcode) for full documentation. diff --git a/packages/zapcode-ai-python/src/zapcode_ai/__init__.py b/packages/zapcode-ai-python/src/zapcode_ai/__init__.py index b602487..f8a95fc 100644 --- a/packages/zapcode-ai-python/src/zapcode_ai/__init__.py +++ b/packages/zapcode-ai-python/src/zapcode_ai/__init__.py @@ -29,6 +29,8 @@ def adapt(self, ctx): from __future__ import annotations +import json +import time from dataclasses import dataclass, field from typing import Any, Callable, Awaitable @@ -55,12 +57,27 @@ class ToolDefinition: execute: Callable[..., Any] # (args: dict) -> Any or awaitable +@dataclass +class TraceSpan: + """A single span in the execution trace. OTel-compatible shape.""" + name: str + start_time: float # ms since epoch + end_time: float = 0.0 + duration_ms: float = 0.0 + status: str = "ok" # "ok" or "error" + attributes: dict[str, Any] = field(default_factory=dict) + children: list[TraceSpan] = field(default_factory=list) + + @dataclass class ExecutionResult: """Result of executing guest code.""" + code: str output: Any stdout: str tool_calls: list[dict[str, Any]] + error: str | None = None + trace: TraceSpan | None = None # --------------------------------------------------------------------------- @@ -142,6 +159,39 @@ def _build_system_prompt( return "\n\n".join(parts) +# --------------------------------------------------------------------------- +# Trace helpers +# --------------------------------------------------------------------------- + +def _create_span(name: str, attributes: dict[str, Any] | None = None) -> TraceSpan: + return TraceSpan( + name=name, + start_time=time.time() * 1000, + attributes=attributes or {}, + ) + + +def _end_span(span: TraceSpan, status: str | None = None) -> TraceSpan: + span.end_time = time.time() * 1000 + span.duration_ms = span.end_time - span.start_time + if status: + span.status = status + return span + + +def _print_trace(span: TraceSpan, indent: int = 0) -> None: + prefix = "" if indent == 0 else "│ " * (indent - 1) + "├─ " + icon = "✗" if span.status == "error" else "✓" + duration = f"<1ms" if span.duration_ms < 1 else f"{span.duration_ms:.0f}ms" + attrs = " ".join( + f"{k}={str(v)[:80]}" for k, v in span.attributes.items() + if not k.startswith("zapcode.code") # don't dump full code in trace + ) + print(f"{prefix}{icon} {span.name} ({duration}){' ' + attrs if attrs else ''}") + for child in span.children: + _print_trace(child, indent + 1) + + # --------------------------------------------------------------------------- # Execution engine # --------------------------------------------------------------------------- @@ -152,48 +202,100 @@ def _execute_code( *, memory_limit_bytes: int | None = None, time_limit_ms: int | None = None, + debug: bool = False, + auto_fix: bool = False, ) -> ExecutionResult: tool_names = list(tool_defs.keys()) tool_calls: list[dict[str, Any]] = [] + tracing = debug or auto_fix - kwargs: dict[str, Any] = {"external_functions": tool_names} - if time_limit_ms is not None: - kwargs["time_limit_ms"] = time_limit_ms - if memory_limit_bytes is not None: - kwargs["memory_limit_bytes"] = memory_limit_bytes - - sandbox = Zapcode(code, **kwargs) - state = sandbox.start() - - while state.get("suspended"): - fn_name = state["function_name"] - args = state["args"] - - tool_def = tool_defs.get(fn_name) - if not tool_def: - raise ValueError( - f"Guest code called unknown function '{fn_name}'. " - f"Available: {', '.join(tool_names)}" - ) - - # Build named args from positional args - param_names = list(tool_def.parameters.keys()) - named_args = { - param_names[i]: args[i] - for i in range(min(len(param_names), len(args))) - } + exec_span = _create_span("execute", {"zapcode.code": code}) if tracing else None - result = tool_def.execute(named_args) - tool_calls.append({"name": fn_name, "args": args, "result": result}) + try: + kwargs: dict[str, Any] = {"external_functions": tool_names} + if time_limit_ms is not None: + kwargs["time_limit_ms"] = time_limit_ms + if memory_limit_bytes is not None: + kwargs["memory_limit_bytes"] = memory_limit_bytes - snapshot: ZapcodeSnapshot = state["snapshot"] - state = snapshot.resume(result) + sandbox = Zapcode(code, **kwargs) + state = sandbox.start() - return ExecutionResult( - output=state.get("output"), - stdout=state.get("stdout", ""), - tool_calls=tool_calls, - ) + while state.get("suspended"): + fn_name = state["function_name"] + args = state["args"] + + tool_def = tool_defs.get(fn_name) + if not tool_def: + raise ValueError( + f"Guest code called unknown function '{fn_name}'. " + f"Available: {', '.join(tool_names)}" + ) + + # Build named args from positional args + param_names = list(tool_def.parameters.keys()) + named_args = { + param_names[i]: args[i] + for i in range(min(len(param_names), len(args))) + } + + tool_span = _create_span("tool_call", { + "zapcode.tool.name": fn_name, + "zapcode.tool.args": json.dumps(args, default=str), + }) if tracing else None + + result = tool_def.execute(named_args) + tool_calls.append({"name": fn_name, "args": args, "result": result}) + + if tool_span: + tool_span.attributes["zapcode.tool.result"] = json.dumps(result, default=str) + _end_span(tool_span) + exec_span.children.append(tool_span) + + snapshot: ZapcodeSnapshot = state["snapshot"] + state = snapshot.resume(result) + + stdout = state.get("stdout", "") + + if exec_span: + exec_span.attributes["zapcode.output"] = json.dumps(state.get("output"), default=str) + if stdout: + exec_span.attributes["zapcode.stdout"] = stdout + _end_span(exec_span) + + if debug and exec_span: + _print_trace(exec_span) + + return ExecutionResult( + code=code, + output=state.get("output"), + stdout=stdout, + tool_calls=tool_calls, + trace=exec_span, + ) + except Exception as err: + error_msg = str(err) + + if exec_span: + exec_span.attributes["zapcode.error"] = error_msg + _end_span(exec_span, "error") + + if not auto_fix: + if debug and exec_span: + _print_trace(exec_span) + raise + + if debug and exec_span: + _print_trace(exec_span) + + return ExecutionResult( + code=code, + output=None, + stdout="", + tool_calls=tool_calls, + error=f"Execution failed: {error_msg}. Please fix your code and try again.", + trace=exec_span, + ) # --------------------------------------------------------------------------- @@ -241,6 +343,12 @@ class ZapcodeAI: custom: dict[str, Any] = field(default_factory=dict) """Output from custom adapters, keyed by adapter name.""" + get_trace: Callable[[], TraceSpan | None] = field(default=lambda: None) + """Get the full session trace tree. Available when debug or auto_fix is enabled.""" + + print_trace: Callable[[], None] = field(default=lambda: None) + """Print the full session trace tree to the console.""" + # --------------------------------------------------------------------------- # Main entry point @@ -252,6 +360,8 @@ def zapcode( system: str | None = None, memory_limit_bytes: int | None = None, time_limit_ms: int = 10_000, + debug: bool = False, + auto_fix: bool = False, adapters: list[Adapter] | None = None, ) -> ZapcodeAI: """ @@ -263,6 +373,11 @@ def zapcode( - `handle_tool_call(code)` → Universal handler for any SDK - `custom` → Output from custom adapters + Args: + debug: Log generated code, tool calls, and output to the console. + auto_fix: When True, execution errors are returned as tool results + instead of raising. The LLM sees the error and can self-correct. + Example with Anthropic SDK:: from zapcode_ai import zapcode, ToolDefinition, ParamDef @@ -293,13 +408,30 @@ def zapcode( print(result.output) """ system_prompt = _build_system_prompt(tools, system) + tracing = debug or auto_fix + + # Session-level trace collects all attempts + session_trace: TraceSpan | None = ( + _create_span("session", {"zapcode.tools": ", ".join(tools.keys())}) + if tracing else None + ) + attempt_count = 0 def handle_tool_call(code: str) -> ExecutionResult: - return _execute_code( + nonlocal attempt_count + attempt_count += 1 + result = _execute_code( code, tools, memory_limit_bytes=memory_limit_bytes, time_limit_ms=time_limit_ms, + debug=debug, + auto_fix=auto_fix, ) + if session_trace and result.trace: + result.trace.name = f"attempt_{attempt_count}" + result.trace.attributes["zapcode.attempt"] = attempt_count + session_trace.children.append(result.trace) + return result # Anthropic SDK format anthropic_tools = [ @@ -335,12 +467,28 @@ def handle_tool_call(code: str) -> ExecutionResult: for adapter in adapters: custom[adapter.name] = adapter.adapt(ctx) + def get_trace() -> TraceSpan | None: + if not session_trace: + return None + status = "ok" if any(c.status == "ok" for c in session_trace.children) else "error" + _end_span(session_trace, status) + return session_trace + + def print_session_trace() -> None: + trace = get_trace() + if trace: + print("\n─── Zapcode Trace ───") + _print_trace(trace) + print("─────────────────────\n") + return ZapcodeAI( system=system_prompt, anthropic_tools=anthropic_tools, openai_tools=openai_tools, handle_tool_call=handle_tool_call, custom=custom, + get_trace=get_trace, + print_trace=print_session_trace, ) @@ -350,6 +498,8 @@ def execute( *, memory_limit_bytes: int | None = None, time_limit_ms: int | None = None, + debug: bool = False, + auto_fix: bool = False, ) -> ExecutionResult: """ Execute TypeScript code directly in a Zapcode sandbox with tool resolution. @@ -374,4 +524,6 @@ def execute( code, tools, memory_limit_bytes=memory_limit_bytes, time_limit_ms=time_limit_ms, + debug=debug, + auto_fix=auto_fix, ) diff --git a/packages/zapcode-ai/src/index.ts b/packages/zapcode-ai/src/index.ts index ac22cc3..0d36b9e 100644 --- a/packages/zapcode-ai/src/index.ts +++ b/packages/zapcode-ai/src/index.ts @@ -62,13 +62,48 @@ export interface ZapcodeAIOptions { timeLimitMs?: number; /** Custom adapters for additional AI SDKs. */ adapters?: ZapcodeAdapter[]; + /** + * Log generated code, tool calls, and output to the console. + * Useful for understanding what the LLM generates. + */ + debug?: boolean; + /** + * When true, execution errors are returned as tool results instead of + * throwing. The LLM sees the error and can self-correct on the next step. + * Works with `maxSteps` in the Vercel AI SDK. Default: false. + */ + autoFix?: boolean; +} + +/** A single span in the execution trace. OTel-compatible shape. */ +export interface TraceSpan { + /** Span name (e.g. "execute", "tool_call", "error", "retry"). */ + name: string; + /** When the span started (ms since epoch). */ + startTime: number; + /** When the span ended (ms since epoch). */ + endTime: number; + /** Duration in ms. */ + durationMs: number; + /** "ok" or "error". */ + status: "ok" | "error"; + /** Structured attributes — keys map to OTel attribute naming. */ + attributes: Record; + /** Child spans. */ + children: TraceSpan[]; } /** Result of executing guest code. */ export interface ExecutionResult { + /** The TypeScript code that the LLM generated. */ + code: string; output: unknown; stdout: string; toolCalls: Array<{ name: string; args: unknown[]; result: unknown }>; + /** Present when autoFix is enabled and execution failed. */ + error?: string; + /** Execution trace. Present when debug or autoFix is enabled. */ + trace?: TraceSpan; } /** What `zapcode()` returns — adapters for every major AI SDK. */ @@ -106,6 +141,19 @@ export interface ZapcodeAIResult { * Access with `result.custom["my-adapter-name"]`. */ custom: Record; + + /** + * Get the full session trace tree (all attempts). + * Available when debug or autoFix is enabled. + * Call after generateText/streamText completes. + */ + getTrace: () => TraceSpan | undefined; + + /** + * Print the full session trace tree to the console. + * Available when debug or autoFix is enabled. + */ + printTrace: () => void; } // --------------------------------------------------------------------------- @@ -212,6 +260,46 @@ const CODE_TOOL_DESCRIPTION = "The code can call the available tool functions using await. " + "The last expression is the return value."; +// --------------------------------------------------------------------------- +// Trace helpers +// --------------------------------------------------------------------------- + +function createSpan(name: string, attributes: Record = {}): TraceSpan { + return { + name, + startTime: Date.now(), + endTime: 0, + durationMs: 0, + status: "ok", + attributes, + children: [], + }; +} + +function endSpan(span: TraceSpan, status?: "ok" | "error"): TraceSpan { + span.endTime = Date.now(); + span.durationMs = span.endTime - span.startTime; + if (status) span.status = status; + return span; +} + +function printTrace(span: TraceSpan, indent = 0): void { + const prefix = indent === 0 ? "" : "│ ".repeat(indent - 1) + "├─ "; + const icon = span.status === "error" ? "✗" : "✓"; + const duration = span.durationMs < 1 ? "<1ms" : `${span.durationMs}ms`; + const attrs = Object.entries(span.attributes) + .map(([k, v]) => { + const str = typeof v === "string" && v.length > 80 ? v.slice(0, 77) + "..." : String(v); + return `${k}=${str}`; + }) + .join(" "); + + console.log(`${prefix}${icon} ${span.name} (${duration})${attrs ? " " + attrs : ""}`); + for (const child of span.children) { + printTrace(child, indent + 1); + } +} + // --------------------------------------------------------------------------- // Execution engine // --------------------------------------------------------------------------- @@ -219,56 +307,111 @@ const CODE_TOOL_DESCRIPTION = async function executeCode( code: string, toolDefs: Record, - options: { memoryLimitMb?: number; timeLimitMs?: number } + options: { memoryLimitMb?: number; timeLimitMs?: number; debug?: boolean; autoFix?: boolean } ): Promise { const toolNames = Object.keys(toolDefs); const toolCalls: ExecutionResult["toolCalls"] = []; + const debug = options.debug ?? false; + const autoFix = options.autoFix ?? false; + const tracing = debug || autoFix; + + const execSpan = tracing ? createSpan("execute", { "zapcode.code": code }) : undefined; + + try { + const sandbox = new Zapcode(code, { + externalFunctions: toolNames, + timeLimitMs: options.timeLimitMs ?? 10_000, + memoryLimitMb: options.memoryLimitMb ?? 32, + }); + + let state = sandbox.start(); + let stdout = ""; + + // Snapshot/resume loop — resolve each tool call as the VM suspends + while (!state.completed) { + const { functionName, args } = state; + + const toolDef = toolDefs[functionName]; + if (!toolDef) { + throw new Error( + `Guest code called unknown function '${functionName}'. ` + + `Available: ${toolNames.join(", ")}` + ); + } + + // Build named args from positional args using the parameter schema + const paramNames = Object.keys(toolDef.parameters); + const namedArgs: Record = {}; + for (let i = 0; i < paramNames.length && i < args.length; i++) { + namedArgs[paramNames[i]] = args[i]; + } + + const toolSpan = tracing ? createSpan("tool_call", { + "zapcode.tool.name": functionName, + "zapcode.tool.args": JSON.stringify(args), + }) : undefined; + + const result = await toolDef.execute(namedArgs); + toolCalls.push({ name: functionName, args, result }); + + if (toolSpan) { + toolSpan.attributes["zapcode.tool.result"] = JSON.stringify(result); + endSpan(toolSpan); + execSpan!.children.push(toolSpan); + } + + // Resume the VM with the tool's return value + const snapshot = ZapcodeSnapshotHandle.load(state.snapshot); + state = snapshot.resume(result); + } - const sandbox = new Zapcode(code, { - externalFunctions: toolNames, - timeLimitMs: options.timeLimitMs ?? 10_000, - memoryLimitMb: options.memoryLimitMb ?? 32, - }); - - let state = sandbox.start(); - let stdout = ""; - - // Snapshot/resume loop — resolve each tool call as the VM suspends - while (!state.completed) { - const { functionName, args } = state; - - const toolDef = toolDefs[functionName]; - if (!toolDef) { - throw new Error( - `Guest code called unknown function '${functionName}'. ` + - `Available: ${toolNames.join(", ")}` - ); + if (state.stdout) { + stdout = state.stdout; } - // Build named args from positional args using the parameter schema - const paramNames = Object.keys(toolDef.parameters); - const namedArgs: Record = {}; - for (let i = 0; i < paramNames.length && i < args.length; i++) { - namedArgs[paramNames[i]] = args[i]; + if (execSpan) { + execSpan.attributes["zapcode.output"] = JSON.stringify(state.output); + if (stdout) execSpan.attributes["zapcode.stdout"] = stdout; + endSpan(execSpan); } - const result = await toolDef.execute(namedArgs); - toolCalls.push({ name: functionName, args, result }); + if (debug && execSpan) { + printTrace(execSpan); + } - // Resume the VM with the tool's return value - const snapshot = ZapcodeSnapshotHandle.load(state.snapshot); - state = snapshot.resume(result); - } + return { + code, + output: state.output, + stdout, + toolCalls, + ...(execSpan ? { trace: execSpan } : {}), + }; + } catch (err: any) { + const errorMsg = err.message ?? String(err); - if (state.stdout) { - stdout = state.stdout; - } + if (execSpan) { + execSpan.attributes["zapcode.error"] = errorMsg; + endSpan(execSpan, "error"); + } - return { - output: state.output, - stdout, - toolCalls, - }; + if (!autoFix) { + if (debug && execSpan) printTrace(execSpan); + throw err; + } + + if (debug && execSpan) { + printTrace(execSpan); + } + + return { + code, + output: null, + stdout: "", + toolCalls, + error: `Execution failed: ${errorMsg}. Please fix your code and try again.`, + ...(execSpan ? { trace: execSpan } : {}), + }; + } } // --------------------------------------------------------------------------- @@ -309,15 +452,31 @@ async function executeCode( * ``` */ export function zapcode(options: ZapcodeAIOptions): ZapcodeAIResult { - const { tools: toolDefs, system: userSystem, memoryLimitMb, timeLimitMs, adapters } = options; + const { tools: toolDefs, system: userSystem, memoryLimitMb, timeLimitMs, adapters, debug, autoFix } = options; const system = buildSystemPrompt(toolDefs, userSystem); - const execOptions = { memoryLimitMb, timeLimitMs }; + const execOptions = { memoryLimitMb, timeLimitMs, debug: false, autoFix }; + const tracing = debug || autoFix; + + // Session-level trace collects all attempts + const sessionTrace: TraceSpan | undefined = tracing + ? createSpan("session", { "zapcode.tools": Object.keys(toolDefs).join(", ") }) + : undefined; + let attemptCount = 0; // Universal handler const handleToolCall = async (code: string): Promise => { - return executeCode(code, toolDefs, execOptions); + attemptCount++; + const result = await executeCode(code, toolDefs, execOptions); + + if (sessionTrace && result.trace) { + result.trace.name = `attempt_${attemptCount}`; + result.trace.attributes["zapcode.attempt"] = attemptCount; + sessionTrace.children.push(result.trace); + } + + return result; }; // Vercel AI SDK format — use tool() + jsonSchema() for proper integration @@ -365,7 +524,22 @@ export function zapcode(options: ZapcodeAIOptions): ZapcodeAIResult { } } - return { system, tools, openaiTools, anthropicTools, handleToolCall, custom }; + const getTrace = (): TraceSpan | undefined => { + if (!sessionTrace) return undefined; + endSpan(sessionTrace, sessionTrace.children.some(c => c.status === "ok") ? "ok" : "error"); + return sessionTrace; + }; + + const printSessionTrace = (): void => { + const trace = getTrace(); + if (trace) { + console.log(`\n─── Zapcode Trace ───`); + printTrace(trace); + console.log(`─────────────────────\n`); + } + }; + + return { system, tools, openaiTools, anthropicTools, handleToolCall, custom, getTrace, printTrace: printSessionTrace }; } // --------------------------------------------------------------------------- @@ -483,7 +657,7 @@ export function createAdapter( export async function execute( code: string, tools: Record, - options?: { memoryLimitMb?: number; timeLimitMs?: number } + options?: { memoryLimitMb?: number; timeLimitMs?: number; debug?: boolean; autoFix?: boolean } ): Promise { return executeCode(code, tools, options ?? {}); } From a65b8e513415f72923af1e76a28a030bf2db8f53 Mon Sep 17 00:00:00 2001 From: Alexandre MAI Date: Thu, 12 Mar 2026 18:41:30 +0100 Subject: [PATCH 05/10] refactor: reorganize examples by language and topic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Restructures examples/ from a flat layout to language-first, topic-second (e.g. examples/typescript/debug-tracing/). Each example is now a self-contained project with its own package.json/pyproject.toml, making it easy to cd in and run without affecting other examples. Also adds debug-tracing examples (TypeScript + Python) that demonstrate autoFix, step-by-step logging of generated code and tool calls, and execution trace printing — serving as the reference for developers who want full observability into their agent's code execution. --- examples/README.md | 39 + examples/ai-bedrock/README.md | 33 - examples/python/README.md | 56 -- examples/python/ai-agent/README.md | 28 + .../{ => ai-agent}/ai_agent_anthropic.py | 0 .../{ => ai-agent}/ai_agent_zapcode_ai.py | 0 examples/python/{ => ai-agent}/pyproject.toml | 8 +- examples/python/ai-bedrock/README.md | 19 + examples/{ => python}/ai-bedrock/main.py | 0 .../{ => python}/ai-bedrock/pyproject.toml | 0 examples/python/basic/README.md | 16 + examples/python/{basic.py => basic/main.py} | 4 +- examples/python/basic/pyproject.toml | 8 + examples/python/debug-tracing/README.md | 25 + examples/python/debug-tracing/main.py | 200 +++++ examples/python/debug-tracing/pyproject.toml | 9 + examples/rust/README.md | 19 - examples/rust/basic/Cargo.lock | 704 ++++++++++++++++++ examples/rust/{ => basic}/Cargo.toml | 2 +- examples/rust/basic/README.md | 9 + examples/rust/{ => basic}/basic.rs | 0 examples/typescript/README.md | 48 -- examples/typescript/ai-agent/README.md | 31 + .../{ => ai-agent}/ai-agent-anthropic.ts | 4 +- .../{ => ai-agent}/ai-agent-vercel-ai.ts | 4 +- .../{ => ai-agent}/ai-agent-zapcode-ai.ts | 4 +- examples/typescript/ai-agent/package.json | 21 + examples/typescript/ai-bedrock/README.md | 18 + examples/{ => typescript}/ai-bedrock/main.ts | 0 .../{ => typescript}/ai-bedrock/package.json | 4 +- examples/typescript/basic/README.md | 15 + .../typescript/{basic.ts => basic/main.ts} | 4 +- examples/typescript/basic/package.json | 15 + examples/typescript/debug-tracing/README.md | 52 ++ examples/typescript/debug-tracing/main.ts | 139 ++++ .../typescript/debug-tracing/package.json | 19 + examples/typescript/package.json | 30 - examples/typescript/tsconfig.json | 11 - examples/wasm/{ => basic}/index.html | 0 39 files changed, 1382 insertions(+), 216 deletions(-) create mode 100644 examples/README.md delete mode 100644 examples/ai-bedrock/README.md delete mode 100644 examples/python/README.md create mode 100644 examples/python/ai-agent/README.md rename examples/python/{ => ai-agent}/ai_agent_anthropic.py (100%) rename examples/python/{ => ai-agent}/ai_agent_zapcode_ai.py (100%) rename examples/python/{ => ai-agent}/pyproject.toml (51%) create mode 100644 examples/python/ai-bedrock/README.md rename examples/{ => python}/ai-bedrock/main.py (100%) rename examples/{ => python}/ai-bedrock/pyproject.toml (100%) create mode 100644 examples/python/basic/README.md rename examples/python/{basic.py => basic/main.py} (96%) create mode 100644 examples/python/basic/pyproject.toml create mode 100644 examples/python/debug-tracing/README.md create mode 100644 examples/python/debug-tracing/main.py create mode 100644 examples/python/debug-tracing/pyproject.toml delete mode 100644 examples/rust/README.md create mode 100644 examples/rust/basic/Cargo.lock rename examples/rust/{ => basic}/Cargo.toml (74%) create mode 100644 examples/rust/basic/README.md rename examples/rust/{ => basic}/basic.rs (100%) delete mode 100644 examples/typescript/README.md create mode 100644 examples/typescript/ai-agent/README.md rename examples/typescript/{ => ai-agent}/ai-agent-anthropic.ts (97%) rename examples/typescript/{ => ai-agent}/ai-agent-vercel-ai.ts (96%) rename examples/typescript/{ => ai-agent}/ai-agent-zapcode-ai.ts (96%) create mode 100644 examples/typescript/ai-agent/package.json create mode 100644 examples/typescript/ai-bedrock/README.md rename examples/{ => typescript}/ai-bedrock/main.ts (100%) rename examples/{ => typescript}/ai-bedrock/package.json (72%) create mode 100644 examples/typescript/basic/README.md rename examples/typescript/{basic.ts => basic/main.ts} (96%) create mode 100644 examples/typescript/basic/package.json create mode 100644 examples/typescript/debug-tracing/README.md create mode 100644 examples/typescript/debug-tracing/main.ts create mode 100644 examples/typescript/debug-tracing/package.json delete mode 100644 examples/typescript/package.json delete mode 100644 examples/typescript/tsconfig.json rename examples/wasm/{ => basic}/index.html (100%) diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 0000000..fe7374a --- /dev/null +++ b/examples/README.md @@ -0,0 +1,39 @@ +# Examples + +Examples organized by language, then by topic. + +``` +examples/ +├── typescript/ +│ ├── basic/ Simple expressions, inputs, snapshot/resume, classes +│ ├── ai-agent/ AI agent with Anthropic SDK, Vercel AI SDK, zapcode-ai +│ ├── ai-bedrock/ AWS Bedrock integration +│ └── debug-tracing/ Debug mode, autoFix, execution tracing +├── python/ +│ ├── basic/ Simple expressions, inputs, snapshot/resume +│ ├── ai-agent/ AI agent with Anthropic SDK, zapcode-ai +│ ├── ai-bedrock/ AWS Bedrock Converse API +│ └── debug-tracing/ Debug mode, autoFix, execution tracing +├── rust/ +│ └── basic/ Simple expressions, inputs, snapshot/resume +└── wasm/ + └── basic/ Browser playground (single HTML file) +``` + +## Quick start + +Each example has its own `README.md` with setup and run instructions. Pick a language and topic: + +```bash +# TypeScript — basic usage (no API key needed) +cd typescript/basic && npm install && npm start + +# Python — basic usage (no API key needed) +cd python/basic && pip install zapcode && python main.py + +# Rust — basic usage +cd rust/basic && cargo run --example basic + +# WASM — open in browser +open wasm/basic/index.html +``` diff --git a/examples/ai-bedrock/README.md b/examples/ai-bedrock/README.md deleted file mode 100644 index aa53d79..0000000 --- a/examples/ai-bedrock/README.md +++ /dev/null @@ -1,33 +0,0 @@ -# Zapcode + AWS Bedrock Example - -End-to-end example using Zapcode with the Vercel AI SDK and AWS Bedrock. - -## Prerequisites - -- AWS credentials configured (`~/.aws/credentials`, env vars, or IAM role) -- Access to a Bedrock model (default: `moonshotai.kimi-k2.5` in `eu-west-2`) - -## TypeScript - -```bash -npm install -npm start -``` - -Override model/region: -```bash -MODEL_ID=eu.anthropic.claude-sonnet-4-20250514-v1:0 AWS_REGION=eu-west-1 npm start -``` - -## Python - -```bash -uv venv .venv && source .venv/bin/activate -uv pip install zapcode-ai boto3 -python main.py -``` - -Override model/region: -```bash -MODEL_ID=eu.anthropic.claude-sonnet-4-20250514-v1:0 AWS_REGION=eu-west-1 python main.py -``` diff --git a/examples/python/README.md b/examples/python/README.md deleted file mode 100644 index d3cc89f..0000000 --- a/examples/python/README.md +++ /dev/null @@ -1,56 +0,0 @@ -# Python Examples - -## Setup - -### Prerequisites - -- [Rust toolchain](https://rustup.rs/) (for building the native module) -- Python 3.10+ (recommended: [pyenv](https://github.com/pyenv/pyenv)) -- [uv](https://docs.astral.sh/uv/) or pip - -### Create a virtualenv (recommended) - -```bash -pyenv virtualenv 3.13.8 zapcode -pyenv local zapcode -``` - -### Build the native module - -```bash -# Install maturin -uv pip install maturin - -# Build and install zapcode -cd ../../crates/zapcode-py -maturin develop --release -``` - -### With uv (alternative) - -```bash -uv sync # install dependencies + build zapcode from source -uv sync --extra ai # also install anthropic SDK for the AI agent example -``` - -## Run - -```bash -# Basic usage (no API key needed) -python basic.py - -# AI agent with zapcode-ai wrapper (requires ANTHROPIC_API_KEY) -export ANTHROPIC_API_KEY=sk-ant-... -python ai_agent_zapcode_ai.py - -# AI agent with raw Anthropic SDK -python ai_agent_anthropic.py -``` - -## What's here - -| File | Description | -|---|---| -| `basic.py` | Simple expressions, inputs, data processing, snapshot/resume, serialization | -| `ai_agent_zapcode_ai.py` | **Recommended** — uses `zapcode-ai` wrapper with Anthropic SDK | -| `ai_agent_anthropic.py` | Raw Anthropic SDK + manual snapshot/resume loop | diff --git a/examples/python/ai-agent/README.md b/examples/python/ai-agent/README.md new file mode 100644 index 0000000..8053f48 --- /dev/null +++ b/examples/python/ai-agent/README.md @@ -0,0 +1,28 @@ +# AI Agent Examples (Python) + +Two ways to build AI agents with Zapcode in Python. + +## Setup + +```bash +pip install zapcode zapcode-ai anthropic +# or: uv pip install zapcode zapcode-ai anthropic +export ANTHROPIC_API_KEY=sk-ant-... +``` + +## Run + +```bash +# Recommended — zapcode-ai wrapper +python ai_agent_zapcode_ai.py + +# Raw Anthropic SDK + manual snapshot/resume loop +python ai_agent_anthropic.py +``` + +## What's here + +| File | Description | +|---|---| +| `ai_agent_zapcode_ai.py` | **Recommended** — uses `zapcode-ai` wrapper with Anthropic SDK | +| `ai_agent_anthropic.py` | Raw Anthropic SDK + manual snapshot/resume loop | diff --git a/examples/python/ai_agent_anthropic.py b/examples/python/ai-agent/ai_agent_anthropic.py similarity index 100% rename from examples/python/ai_agent_anthropic.py rename to examples/python/ai-agent/ai_agent_anthropic.py diff --git a/examples/python/ai_agent_zapcode_ai.py b/examples/python/ai-agent/ai_agent_zapcode_ai.py similarity index 100% rename from examples/python/ai_agent_zapcode_ai.py rename to examples/python/ai-agent/ai_agent_zapcode_ai.py diff --git a/examples/python/pyproject.toml b/examples/python/ai-agent/pyproject.toml similarity index 51% rename from examples/python/pyproject.toml rename to examples/python/ai-agent/pyproject.toml index 0e1f335..4065651 100644 --- a/examples/python/pyproject.toml +++ b/examples/python/ai-agent/pyproject.toml @@ -1,14 +1,10 @@ [project] -name = "zapcode-examples" +name = "zapcode-ai-agent-example" version = "0.0.1" -description = "Example usage of Zapcode with AI agents" +description = "Zapcode AI agent examples (Python)" requires-python = ">=3.10" dependencies = [ "zapcode", "zapcode-ai", -] - -[project.optional-dependencies] -ai = [ "anthropic>=0.39.0", ] diff --git a/examples/python/ai-bedrock/README.md b/examples/python/ai-bedrock/README.md new file mode 100644 index 0000000..82a5835 --- /dev/null +++ b/examples/python/ai-bedrock/README.md @@ -0,0 +1,19 @@ +# AWS Bedrock Example (Python) + +Zapcode + AWS Bedrock Converse API. + +## Setup + +```bash +pip install zapcode-ai boto3 +# or: uv pip install zapcode-ai boto3 +``` + +## Run + +```bash +python main.py + +# Override model/region +MODEL_ID=eu.anthropic.claude-sonnet-4-20250514-v1:0 AWS_REGION=eu-west-1 python main.py +``` diff --git a/examples/ai-bedrock/main.py b/examples/python/ai-bedrock/main.py similarity index 100% rename from examples/ai-bedrock/main.py rename to examples/python/ai-bedrock/main.py diff --git a/examples/ai-bedrock/pyproject.toml b/examples/python/ai-bedrock/pyproject.toml similarity index 100% rename from examples/ai-bedrock/pyproject.toml rename to examples/python/ai-bedrock/pyproject.toml diff --git a/examples/python/basic/README.md b/examples/python/basic/README.md new file mode 100644 index 0000000..8831bf0 --- /dev/null +++ b/examples/python/basic/README.md @@ -0,0 +1,16 @@ +# Basic Python Example + +Simple expressions, inputs, data processing, snapshot/resume, and serialization. + +## Setup + +```bash +pip install zapcode +# or: uv pip install zapcode +``` + +## Run + +```bash +python main.py +``` diff --git a/examples/python/basic.py b/examples/python/basic/main.py similarity index 96% rename from examples/python/basic.py rename to examples/python/basic/main.py index 5198ae9..d1a2f44 100644 --- a/examples/python/basic.py +++ b/examples/python/basic/main.py @@ -1,8 +1,8 @@ """ Basic Zapcode example — execute TypeScript from Python. -Prerequisites: build zapcode-py (see README) -Run with: python examples/python/basic.py +Prerequisites: pip install zapcode +Run with: python main.py """ from zapcode import Zapcode, ZapcodeSnapshot diff --git a/examples/python/basic/pyproject.toml b/examples/python/basic/pyproject.toml new file mode 100644 index 0000000..f1b718f --- /dev/null +++ b/examples/python/basic/pyproject.toml @@ -0,0 +1,8 @@ +[project] +name = "zapcode-basic-example" +version = "0.0.1" +description = "Basic Zapcode usage from Python" +requires-python = ">=3.10" +dependencies = [ + "zapcode", +] diff --git a/examples/python/debug-tracing/README.md b/examples/python/debug-tracing/README.md new file mode 100644 index 0000000..6766d04 --- /dev/null +++ b/examples/python/debug-tracing/README.md @@ -0,0 +1,25 @@ +# Debug & Tracing Example (Python) + +Demonstrates Zapcode's debug mode, auto-fix error recovery, and execution tracing. + +## Features + +- **`debug=True`** — Prints the LLM-generated code, external tool calls, and output for each execution +- **`auto_fix=True`** — When the LLM generates code that fails, the error is returned as a tool result instead of raising, letting the LLM self-correct on the next step +- **`print_trace()`** — Displays the full execution trace tree (parse -> compile -> execute) with timing + +## Setup + +```bash +pip install zapcode-ai boto3 +# or: uv pip install zapcode-ai boto3 +``` + +## Run + +```bash +python main.py + +# With a specific model +MODEL_ID=anthropic.claude-sonnet-4-20250514 python main.py +``` diff --git a/examples/python/debug-tracing/main.py b/examples/python/debug-tracing/main.py new file mode 100644 index 0000000..0c48b99 --- /dev/null +++ b/examples/python/debug-tracing/main.py @@ -0,0 +1,200 @@ +""" +Zapcode debug & tracing example (Python). + +Demonstrates: + - Logging LLM-generated code, tool calls, and output + - auto_fix=True — catches execution errors and feeds them back to the LLM + - print_trace() — displays the execution trace tree with timing + +Prerequisites: + pip install zapcode-ai boto3 + AWS credentials configured (env vars, ~/.aws/credentials, or IAM role) + +Run: python main.py +""" + +import json +import os +import time + +import boto3 +from zapcode_ai import zapcode, ToolDefinition, ParamDef + + +# --- Bedrock setup --- +REGION = os.environ.get("AWS_REGION", "eu-west-1") +MODEL_ID = os.environ.get("MODEL_ID", "global.amazon.nova-2-lite-v1:0") + +bedrock = boto3.client("bedrock-runtime", region_name=REGION) + + +# --- Tools --- +def get_weather(args): + data = { + "London": {"condition": "Overcast", "temp": 12}, + "Tokyo": {"condition": "Clear", "temp": 26}, + "Paris": {"condition": "Sunny", "temp": 22}, + "New York": {"condition": "Rain", "temp": 14}, + } + return data.get(args["city"], {"condition": "Unknown", "temp": 0}) + + +def search_flights(args): + origin = args["from"] + destination = args["to"] + return [ + {"from": origin, "to": destination, "airline": "BA", "flight": "BA123", "price": 450, "departure": "08:00"}, + {"from": origin, "to": destination, "airline": "AF", "flight": "AF456", "price": 380, "departure": "14:30"}, + ] + + +# --- Zapcode setup with auto_fix --- +zap = zapcode( + auto_fix=True, + system="You are a helpful assistant that can look up weather and do math.", + tools={ + "getWeather": ToolDefinition( + description="Get current weather for a city. Returns { condition: string, temp: number }", + parameters={"city": ParamDef(type="string", description="City name")}, + execute=get_weather, + ), + "searchFlights": ToolDefinition( + description="Search flights between two cities. Returns Array<{ from, to, airline, flight, price, departure }>", + parameters={ + "from": ParamDef(type="string", description="Departure city"), + "to": ParamDef(type="string", description="Arrival city"), + }, + execute=search_flights, + ), + }, +) + + +# --- Debug: log generated code, tool calls, and output --- +def log_execution(result): + # Print the generated code + indented = "\n".join(" " + line for line in result.code.split("\n")) + print(f"\n[zapcode] Code:\n{indented}") + + # Print each tool call + for tc in result.tool_calls: + args_str = ", ".join(json.dumps(a, default=str) for a in tc["args"]) + print(f"[zapcode] Tool call: {tc['name']}({args_str}) → {json.dumps(tc['result'], default=str)}") + + # Print output or error + if result.error: + print(f"[zapcode] Error: {result.error}") + else: + print(f"[zapcode] Output: {json.dumps(result.output, default=str)}") + + +def main(): + print(f"Model: {MODEL_ID} | Region: {REGION}") + print(f"Debug: ON | AutoFix: ON") + + t0 = time.perf_counter() + + messages = [ + {"role": "user", "content": [{"text": "What's the weather in Tokyo and Paris? Find flights from the colder city to the warmer one."}]} + ] + + tool_config = { + "tools": [ + { + "toolSpec": { + "name": "execute_code", + "description": "Execute TypeScript code in a secure sandbox. The code can call the available tool functions using await. The last expression is the return value.", + "inputSchema": { + "json": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "TypeScript code to execute in the sandbox", + } + }, + "required": ["code"], + } + }, + } + } + ] + } + + max_steps = 10 + steps = 0 + total_tokens = 0 + + while steps < max_steps: + steps += 1 + response = bedrock.converse( + modelId=MODEL_ID, + messages=messages, + system=[{"text": zap.system}], + toolConfig=tool_config, + ) + + total_tokens += response["usage"]["inputTokens"] + response["usage"]["outputTokens"] + stop_reason = response["stopReason"] + + if stop_reason == "tool_use": + assistant_content = response["output"]["message"]["content"] + messages.append({"role": "assistant", "content": assistant_content}) + + tool_results = [] + for block in assistant_content: + if "toolUse" in block: + tool_use = block["toolUse"] + code = tool_use["input"]["code"] + result = zap.handle_tool_call(code) + + # Debug: log the execution + log_execution(result) + + if result.error: + tool_results.append({ + "toolResult": { + "toolUseId": tool_use["toolUseId"], + "content": [{"text": result.error}], + "status": "error", + } + }) + else: + tool_results.append({ + "toolResult": { + "toolUseId": tool_use["toolUseId"], + "content": [{"json": {"output": result.output, "stdout": result.stdout}}], + } + }) + + messages.append({"role": "user", "content": tool_results}) + elif stop_reason in ("end_turn", "stop_sequence"): + text = "" + for block in response["output"]["message"]["content"]: + if "text" in block: + text += block["text"] + + total_ms = (time.perf_counter() - t0) * 1000 + + print(f"\nAnswer: {text}") + print("\n--- Timing ---") + print(f"Total (LLM + Zapcode): {total_ms:.0f}ms") + print(f"Steps: {steps}") + print(f"Tokens: {total_tokens}") + + # Print the full execution trace tree + print("\n--- Execution Trace ---") + zap.print_trace() + return + else: + raise RuntimeError( + f"Bedrock Converse returned unexpected stop reason: {stop_reason}" + ) + + raise RuntimeError( + f"Model did not produce a final answer within {max_steps} steps" + ) + + +if __name__ == "__main__": + main() diff --git a/examples/python/debug-tracing/pyproject.toml b/examples/python/debug-tracing/pyproject.toml new file mode 100644 index 0000000..4411220 --- /dev/null +++ b/examples/python/debug-tracing/pyproject.toml @@ -0,0 +1,9 @@ +[project] +name = "zapcode-debug-tracing-example" +version = "0.0.1" +description = "Zapcode debug & tracing example (Python)" +requires-python = ">=3.10" +dependencies = [ + "zapcode-ai", + "boto3", +] diff --git a/examples/rust/README.md b/examples/rust/README.md deleted file mode 100644 index 34aea19..0000000 --- a/examples/rust/README.md +++ /dev/null @@ -1,19 +0,0 @@ -# Rust Examples - -## Prerequisites - -- [Rust toolchain](https://rustup.rs/) - -## Run - -```bash -cargo run --example basic -``` - -> **Note:** The examples crate is excluded from the workspace. It has its own `Cargo.toml` that depends on `zapcode-core` via path. - -## What's here - -| File | Description | -|---|---| -| `basic.rs` | Simple expressions, inputs, external functions (snapshot/resume), snapshot serialization | diff --git a/examples/rust/basic/Cargo.lock b/examples/rust/basic/Cargo.lock new file mode 100644 index 0000000..89a2364 --- /dev/null +++ b/examples/rust/basic/Cargo.lock @@ -0,0 +1,704 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + +[[package]] +name = "atomic-polyfill" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cf2bce30dfe09ef0bfaef228b9d414faaf7e563035494d7fe092dba54b300f4" +dependencies = [ + "critical-section", +] + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "bitflags" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "castaway" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dec551ab6e7578819132c713a93c022a05d60159dc86e7a7050223577484c55a" +dependencies = [ + "rustversion", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "cobs" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa961b519f0b462e3a3b4a34b64d119eeaca1d59af726fe450bbba07a9fc0a1" +dependencies = [ + "thiserror", +] + +[[package]] +name = "compact_str" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb1325a1cece981e8a296ab8f0f9b63ae357bd0784a9faaf548cc7b480707a" +dependencies = [ + "castaway", + "cfg-if", + "itoa", + "rustversion", + "ryu", + "static_assertions", +] + +[[package]] +name = "cow-utils" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "417bef24afe1460300965a25ff4a24b8b45ad011948302ec221e8a0a81eb2c79" + +[[package]] +name = "critical-section" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "790eea4361631c5e7d22598ecd5723ff611904e3344ce8720784c93e3d83d40b" + +[[package]] +name = "dragonbox_ecma" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd8e701084c37e7ef62d3f9e453b618130cbc0ef3573847785952a3ac3f746bf" + +[[package]] +name = "embedded-io" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced" + +[[package]] +name = "embedded-io" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d" + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "hash32" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0c35f58762feb77d74ebe43bdbc3210f09be9fe6742234d573bacc26ed92b67" +dependencies = [ + "byteorder", +] + +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +dependencies = [ + "allocator-api2", +] + +[[package]] +name = "heapless" +version = "0.7.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdc6457c0eb62c71aac4bc17216026d8410337c4126773b9c5daba343f17964f" +dependencies = [ + "atomic-polyfill", + "hash32", + "rustc_version", + "serde", + "spin", + "stable_deref_trait", +] + +[[package]] +name = "indexmap" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +dependencies = [ + "equivalent", + "hashbrown", + "serde", + "serde_core", +] + +[[package]] +name = "itoa" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" + +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] + +[[package]] +name = "memchr" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" + +[[package]] +name = "nonmax" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "610a5acd306ec67f907abe5567859a3c693fb9886eb1f012ab8f2a47bef3db51" + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "owo-colors" +version = "4.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d211803b9b6b570f68772237e415a029d5a50c65d382910b879fb19d3271f94d" + +[[package]] +name = "oxc-miette" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a7ba54c704edefead1f44e9ef09c43e5cfae666bdc33516b066011f0e6ebf7" +dependencies = [ + "cfg-if", + "owo-colors", + "oxc-miette-derive", + "textwrap", + "thiserror", + "unicode-segmentation", + "unicode-width", +] + +[[package]] +name = "oxc-miette-derive" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4faecb54d0971f948fbc1918df69b26007e6f279a204793669542e1e8b75eb3" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "oxc_allocator" +version = "0.117.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97b44277218c002c09167474648a478d3d29a29095ef8950ec9f1fac016c62d7" +dependencies = [ + "allocator-api2", + "hashbrown", + "oxc_data_structures", + "rustc-hash", +] + +[[package]] +name = "oxc_ast" +version = "0.117.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4222e4e7a1ab01b2a20420a5a65798377a748ea37ee7ece4d7a6b733f86eb61" +dependencies = [ + "bitflags", + "oxc_allocator", + "oxc_ast_macros", + "oxc_data_structures", + "oxc_diagnostics", + "oxc_estree", + "oxc_regular_expression", + "oxc_span", + "oxc_syntax", +] + +[[package]] +name = "oxc_ast_macros" +version = "0.117.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e65a38ae589e284dd45a85008024f04aa680e9ddf1321c163cf7f187c805e91" +dependencies = [ + "phf", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "oxc_data_structures" +version = "0.117.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f53bed71cad192596aee8f87f6d6bc2a38a4f898255a69b1d41da1968b9b2c6f" + +[[package]] +name = "oxc_diagnostics" +version = "0.117.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a2d2491c0a1ea29a83abe645424f85c64b5c825f60e5304a453e4314a8b6d88" +dependencies = [ + "cow-utils", + "oxc-miette", + "percent-encoding", +] + +[[package]] +name = "oxc_ecmascript" +version = "0.117.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71b23b64fa8c4a84b1406de383c4666366c9f54ffb9cb11a63b8d7433950460a" +dependencies = [ + "cow-utils", + "num-bigint", + "num-traits", + "oxc_allocator", + "oxc_ast", + "oxc_regular_expression", + "oxc_span", + "oxc_syntax", +] + +[[package]] +name = "oxc_estree" +version = "0.117.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a47515ead44bc8beec1ae1514f10ecca63cde043da167c0395dc914f098ea5d2" + +[[package]] +name = "oxc_index" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb3e6120999627ec9703025eab7c9f410ebb7e95557632a8902ca48210416c2b" +dependencies = [ + "nonmax", + "serde", +] + +[[package]] +name = "oxc_parser" +version = "0.117.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3278d4f34d01cdaf85a2391d7b12daba1d95c20c1ff2ac9316d3c28f36353e4e" +dependencies = [ + "bitflags", + "cow-utils", + "memchr", + "num-bigint", + "num-traits", + "oxc_allocator", + "oxc_ast", + "oxc_data_structures", + "oxc_diagnostics", + "oxc_ecmascript", + "oxc_regular_expression", + "oxc_span", + "oxc_syntax", + "rustc-hash", + "seq-macro", +] + +[[package]] +name = "oxc_regular_expression" +version = "0.117.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3d680252672b22c24abbaf6e401eace0be9f53072a03411936204625ff349d0" +dependencies = [ + "bitflags", + "oxc_allocator", + "oxc_ast_macros", + "oxc_diagnostics", + "oxc_span", + "phf", + "rustc-hash", + "unicode-id-start", +] + +[[package]] +name = "oxc_span" +version = "0.117.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6eb1bd62de89fb0c646bfb053b72370750fab43a84ebe09ad97cfa020712314" +dependencies = [ + "compact_str", + "oxc-miette", + "oxc_allocator", + "oxc_ast_macros", + "oxc_estree", + "oxc_str", +] + +[[package]] +name = "oxc_str" +version = "0.117.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e65cbfb06ecbae07e0da931815b6b03ade886d016302c400bda7dc0a2f600d3" +dependencies = [ + "compact_str", + "hashbrown", + "oxc_allocator", + "oxc_estree", +] + +[[package]] +name = "oxc_syntax" +version = "0.117.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0f1617f0aa890517fb61ffa1d2d73a8497aca52e84ef6f027fad1e93250eccc" +dependencies = [ + "bitflags", + "cow-utils", + "dragonbox_ecma", + "nonmax", + "oxc_allocator", + "oxc_ast_macros", + "oxc_estree", + "oxc_index", + "oxc_span", + "phf", + "unicode-id-start", +] + +[[package]] +name = "percent-encoding" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" + +[[package]] +name = "phf" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf" +dependencies = [ + "phf_macros", + "phf_shared", + "serde", +] + +[[package]] +name = "phf_generator" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737" +dependencies = [ + "fastrand", + "phf_shared", +] + +[[package]] +name = "phf_macros" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "812f032b54b1e759ccd5f8b6677695d5268c588701effba24601f6932f8269ef" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "phf_shared" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266" +dependencies = [ + "siphasher", +] + +[[package]] +name = "postcard" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6764c3b5dd454e283a30e6dfe78e9b31096d9e32036b5d1eaac7a6119ccb9a24" +dependencies = [ + "cobs", + "embedded-io 0.4.0", + "embedded-io 0.6.1", + "heapless", + "serde", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "ryu" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "semver" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" + +[[package]] +name = "seq-macro" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "siphasher" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" + +[[package]] +name = "smawk" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c388c1b5e93756d0c740965c41e8822f866621d41acbdf6336a6a168f8840c" + +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +dependencies = [ + "lock_api", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "textwrap" +version = "0.16.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c13547615a44dc9c452a8a534638acdf07120d4b6847c8178705da06306a3057" +dependencies = [ + "smawk", + "unicode-linebreak", + "unicode-width", +] + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "unicode-id-start" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81b79ad29b5e19de4260020f8919b443b2ef0277d242ce532ec7b7a2cc8b6007" + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "unicode-linebreak" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b09c83c3c29d37506a3e260c08c03743a6bb66a9cd432c6934ab501a190571f" + +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + +[[package]] +name = "unicode-width" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" + +[[package]] +name = "zapcode-core" +version = "1.0.1" +dependencies = [ + "indexmap", + "oxc_allocator", + "oxc_ast", + "oxc_parser", + "oxc_span", + "oxc_syntax", + "postcard", + "serde", + "thiserror", +] + +[[package]] +name = "zapcode-examples" +version = "0.0.1" +dependencies = [ + "indexmap", + "zapcode-core", +] diff --git a/examples/rust/Cargo.toml b/examples/rust/basic/Cargo.toml similarity index 74% rename from examples/rust/Cargo.toml rename to examples/rust/basic/Cargo.toml index 5e7424f..f22d9a0 100644 --- a/examples/rust/Cargo.toml +++ b/examples/rust/basic/Cargo.toml @@ -9,5 +9,5 @@ name = "basic" path = "basic.rs" [dependencies] -zapcode-core = { path = "../../crates/zapcode-core" } +zapcode-core = { path = "../../../crates/zapcode-core" } indexmap = "2" diff --git a/examples/rust/basic/README.md b/examples/rust/basic/README.md new file mode 100644 index 0000000..25ad15c --- /dev/null +++ b/examples/rust/basic/README.md @@ -0,0 +1,9 @@ +# Basic Rust Example + +Simple expressions, inputs, external functions (snapshot/resume), and snapshot serialization. + +## Run + +```bash +cargo run --example basic +``` diff --git a/examples/rust/basic.rs b/examples/rust/basic/basic.rs similarity index 100% rename from examples/rust/basic.rs rename to examples/rust/basic/basic.rs diff --git a/examples/typescript/README.md b/examples/typescript/README.md deleted file mode 100644 index c788686..0000000 --- a/examples/typescript/README.md +++ /dev/null @@ -1,48 +0,0 @@ -# TypeScript Examples - -## Setup - -### Prerequisites - -- [Rust toolchain](https://rustup.rs/) (for building the native addon) -- [Node.js](https://nodejs.org/) (v18+) - -### Build the native addon - -```bash -cd ../../crates/zapcode-js -npm install @napi-rs/cli --save-dev -npx napi build --release --platform --js index.js --dts index.d.ts -``` - -### Install dependencies - -```bash -npm install -``` - -## Run - -```bash -# Basic usage (no API key needed) -npm run basic - -# AI agent with @unchartedfr/zapcode-ai wrapper (requires ANTHROPIC_API_KEY) -export ANTHROPIC_API_KEY=sk-ant-... -npm run agent - -# AI agent with raw Anthropic SDK -npm run agent:anthropic - -# AI agent with Vercel AI SDK -npm run agent:vercel -``` - -## What's here - -| File | Description | -|---|---| -| `basic.ts` | Simple expressions, inputs, data processing, classes, resource limits | -| `ai-agent-zapcode-ai.ts` | **Recommended** — uses `@unchartedfr/zapcode-ai` wrapper with Vercel AI SDK | -| `ai-agent-anthropic.ts` | Raw Anthropic SDK + manual snapshot/resume loop | -| `ai-agent-vercel-ai.ts` | Vercel AI SDK with manual code generation | diff --git a/examples/typescript/ai-agent/README.md b/examples/typescript/ai-agent/README.md new file mode 100644 index 0000000..e63d077 --- /dev/null +++ b/examples/typescript/ai-agent/README.md @@ -0,0 +1,31 @@ +# AI Agent Examples (TypeScript) + +Three ways to build AI agents with Zapcode, from high-level to low-level. + +## Setup + +```bash +npm install +export ANTHROPIC_API_KEY=sk-ant-... +``` + +## Run + +```bash +# Recommended — zapcode-ai wrapper +npm run agent + +# Vercel AI SDK with streamText +npm run agent:vercel + +# Raw Anthropic SDK + manual snapshot/resume loop +npm run agent:anthropic +``` + +## What's here + +| File | Description | +|---|---| +| `ai-agent-zapcode-ai.ts` | **Recommended** — uses `@unchartedfr/zapcode-ai` wrapper with Vercel AI SDK | +| `ai-agent-vercel-ai.ts` | Vercel AI SDK with `generateText` and `streamText` | +| `ai-agent-anthropic.ts` | Raw Anthropic SDK + manual snapshot/resume loop | diff --git a/examples/typescript/ai-agent-anthropic.ts b/examples/typescript/ai-agent/ai-agent-anthropic.ts similarity index 97% rename from examples/typescript/ai-agent-anthropic.ts rename to examples/typescript/ai-agent/ai-agent-anthropic.ts index 055a425..6114ad4 100644 --- a/examples/typescript/ai-agent-anthropic.ts +++ b/examples/typescript/ai-agent/ai-agent-anthropic.ts @@ -12,10 +12,10 @@ * 4. Your app resolves the tool call, then resumes Zapcode with the result * * Prerequisites: - * npm install @anthropic-ai/sdk @unchartedfr/zapcode + * npm install * export ANTHROPIC_API_KEY=sk-... * - * Run with: npx tsx ai-agent-anthropic.ts + * Run with: npm run agent:anthropic */ import Anthropic from "@anthropic-ai/sdk"; diff --git a/examples/typescript/ai-agent-vercel-ai.ts b/examples/typescript/ai-agent/ai-agent-vercel-ai.ts similarity index 96% rename from examples/typescript/ai-agent-vercel-ai.ts rename to examples/typescript/ai-agent/ai-agent-vercel-ai.ts index b62d14e..7e4ce0c 100644 --- a/examples/typescript/ai-agent-vercel-ai.ts +++ b/examples/typescript/ai-agent/ai-agent-vercel-ai.ts @@ -8,10 +8,10 @@ * Works with any AI SDK provider: Anthropic, OpenAI, Google, etc. * * Prerequisites: - * npm install @unchartedfr/zapcode-ai ai @ai-sdk/anthropic + * npm install * export ANTHROPIC_API_KEY=sk-... * - * Run with: npx tsx ai-agent-vercel-ai.ts + * Run with: npm run agent:vercel */ import { zapcode } from "@unchartedfr/zapcode-ai"; diff --git a/examples/typescript/ai-agent-zapcode-ai.ts b/examples/typescript/ai-agent/ai-agent-zapcode-ai.ts similarity index 96% rename from examples/typescript/ai-agent-zapcode-ai.ts rename to examples/typescript/ai-agent/ai-agent-zapcode-ai.ts index 2d193e2..c587205 100644 --- a/examples/typescript/ai-agent-zapcode-ai.ts +++ b/examples/typescript/ai-agent/ai-agent-zapcode-ai.ts @@ -6,10 +6,10 @@ * directly into Vercel AI SDK's `generateText` / `streamText`. * * Prerequisites: - * npm install @unchartedfr/zapcode-ai ai @ai-sdk/anthropic + * npm install * export ANTHROPIC_API_KEY=sk-... * - * Run with: npx tsx ai-agent-zapcode-ai.ts + * Run with: npm run agent */ import { zapcode } from "@unchartedfr/zapcode-ai"; diff --git a/examples/typescript/ai-agent/package.json b/examples/typescript/ai-agent/package.json new file mode 100644 index 0000000..9d9fc01 --- /dev/null +++ b/examples/typescript/ai-agent/package.json @@ -0,0 +1,21 @@ +{ + "name": "zapcode-ai-agent-example", + "private": true, + "type": "module", + "scripts": { + "agent": "npx tsx ai-agent-zapcode-ai.ts", + "agent:anthropic": "npx tsx ai-agent-anthropic.ts", + "agent:vercel": "npx tsx ai-agent-vercel-ai.ts" + }, + "dependencies": { + "@unchartedfr/zapcode": "file:../../../crates/zapcode-js", + "@unchartedfr/zapcode-ai": "file:../../../packages/zapcode-ai", + "@anthropic-ai/sdk": "^0.39.0", + "@ai-sdk/anthropic": "^1.1.0", + "ai": "^4.1.0" + }, + "devDependencies": { + "tsx": "^4.0.0", + "typescript": "^5.0.0" + } +} diff --git a/examples/typescript/ai-bedrock/README.md b/examples/typescript/ai-bedrock/README.md new file mode 100644 index 0000000..1cee546 --- /dev/null +++ b/examples/typescript/ai-bedrock/README.md @@ -0,0 +1,18 @@ +# AWS Bedrock Example (TypeScript) + +Zapcode + Vercel AI SDK + AWS Bedrock. + +## Setup + +```bash +npm install +``` + +## Run + +```bash +npm start + +# Override model/region +MODEL_ID=eu.anthropic.claude-sonnet-4-20250514-v1:0 AWS_REGION=eu-west-1 npm start +``` diff --git a/examples/ai-bedrock/main.ts b/examples/typescript/ai-bedrock/main.ts similarity index 100% rename from examples/ai-bedrock/main.ts rename to examples/typescript/ai-bedrock/main.ts diff --git a/examples/ai-bedrock/package.json b/examples/typescript/ai-bedrock/package.json similarity index 72% rename from examples/ai-bedrock/package.json rename to examples/typescript/ai-bedrock/package.json index 185cb03..c7af8f9 100644 --- a/examples/ai-bedrock/package.json +++ b/examples/typescript/ai-bedrock/package.json @@ -8,8 +8,8 @@ "dependencies": { "@ai-sdk/amazon-bedrock": "^2.0.0", "@aws-sdk/credential-providers": "^3.0.0", - "@unchartedfr/zapcode": "latest", - "@unchartedfr/zapcode-ai": "latest", + "@unchartedfr/zapcode": "file:../../../crates/zapcode-js", + "@unchartedfr/zapcode-ai": "file:../../../packages/zapcode-ai", "ai": "^4.0.0" }, "devDependencies": { diff --git a/examples/typescript/basic/README.md b/examples/typescript/basic/README.md new file mode 100644 index 0000000..c288195 --- /dev/null +++ b/examples/typescript/basic/README.md @@ -0,0 +1,15 @@ +# Basic TypeScript Example + +Simple expressions, inputs, data processing, snapshot/resume, classes, and resource limits. + +## Setup + +```bash +npm install +``` + +## Run + +```bash +npm start +``` diff --git a/examples/typescript/basic.ts b/examples/typescript/basic/main.ts similarity index 96% rename from examples/typescript/basic.ts rename to examples/typescript/basic/main.ts index 5f3aecc..6637977 100644 --- a/examples/typescript/basic.ts +++ b/examples/typescript/basic/main.ts @@ -1,8 +1,8 @@ /** * Basic Zapcode example — execute TypeScript from Node.js. * - * Prerequisites: build zapcode-js (see README) - * Run with: npx ts-node examples/typescript/basic.ts + * Prerequisites: npm install + * Run with: npx tsx main.ts */ import { Zapcode, ZapcodeSnapshotHandle } from "@unchartedfr/zapcode"; diff --git a/examples/typescript/basic/package.json b/examples/typescript/basic/package.json new file mode 100644 index 0000000..0a4ad4c --- /dev/null +++ b/examples/typescript/basic/package.json @@ -0,0 +1,15 @@ +{ + "name": "zapcode-basic-example", + "private": true, + "type": "module", + "scripts": { + "start": "npx tsx main.ts" + }, + "dependencies": { + "@unchartedfr/zapcode": "file:../../../crates/zapcode-js" + }, + "devDependencies": { + "tsx": "^4.0.0", + "typescript": "^5.0.0" + } +} diff --git a/examples/typescript/debug-tracing/README.md b/examples/typescript/debug-tracing/README.md new file mode 100644 index 0000000..7982779 --- /dev/null +++ b/examples/typescript/debug-tracing/README.md @@ -0,0 +1,52 @@ +# Debug & Tracing Example + +Demonstrates Zapcode's debug mode, auto-fix error recovery, and execution tracing. + +## Features + +- **`debug: true`** — Prints the LLM-generated code, external tool calls, and output for each execution +- **`autoFix: true`** — When the LLM generates code that fails, the error is returned as a tool result instead of throwing, letting the LLM self-correct on the next step +- **`printTrace()`** — Displays the full execution trace tree (parse -> compile -> execute) with timing + +## Setup + +```bash +npm install +``` + +## Run + +```bash +# Default model (Amazon Nova) +npm start + +# With a specific model +MODEL_ID=anthropic.claude-sonnet-4-20250514 npm start +``` + +## Example output + +``` +Model: global.amazon.nova-2-lite-v1:0 | Region: eu-west-1 +Debug: ON | AutoFix: ON + +[zapcode] Code: + const tokyo = await getWeather("Tokyo"); + const paris = await getWeather("Paris"); + const colder = tokyo.temp < paris.temp ? "Tokyo" : "Paris"; + const warmer = tokyo.temp < paris.temp ? "Paris" : "Tokyo"; + const flights = await searchFlights(colder, warmer); + flights; + +[zapcode] Tool call: getWeather("Tokyo") -> {"condition":"Clear","temp":26} +[zapcode] Tool call: getWeather("Paris") -> {"condition":"Sunny","temp":22} +[zapcode] Tool call: searchFlights("Paris", "Tokyo") -> [...] +[zapcode] Output: [{"from":"Paris","to":"Tokyo",...}] + +--- Execution Trace --- +session [zapcode.tools: getWeather, searchFlights] 12.4ms + attempt_1 8.2ms + parse 0.1ms + compile 0.0ms + execute 8.1ms +``` diff --git a/examples/typescript/debug-tracing/main.ts b/examples/typescript/debug-tracing/main.ts new file mode 100644 index 0000000..3f1761b --- /dev/null +++ b/examples/typescript/debug-tracing/main.ts @@ -0,0 +1,139 @@ +/** + * Zapcode debug & tracing example. + * + * Demonstrates: + * - Logging LLM-generated code, tool calls, and output + * - `autoFix: true` — catches execution errors and feeds them back to the LLM + * - `printTrace()` — displays the execution trace tree with timing + * + * Prerequisites: + * npm install + * AWS credentials configured (env vars, ~/.aws/credentials, or IAM role) + * + * Run: npm start + */ + +import { zapcode, type ExecutionResult } from "@unchartedfr/zapcode-ai"; +import { generateText } from "ai"; +import { createAmazonBedrock } from "@ai-sdk/amazon-bedrock"; +import { fromNodeProviderChain } from "@aws-sdk/credential-providers"; + +// --- Bedrock setup --- +const REGION = process.env.AWS_REGION ?? "eu-west-1"; + +const bedrock = createAmazonBedrock({ + credentialProvider: fromNodeProviderChain(), + region: REGION, +}); + +const MODEL_ID = process.env.MODEL_ID ?? "global.amazon.nova-2-lite-v1:0"; +const model = bedrock(MODEL_ID); + +// --- Zapcode setup with autoFix --- +const { system, tools, printTrace } = zapcode({ + autoFix: true, + system: "You are a helpful assistant that can look up weather and do math.", + tools: { + getWeather: { + description: + "Get current weather for a city. Returns { condition: string, temp: number }", + parameters: { + city: { type: "string", description: "City name" }, + }, + execute: async ({ city }) => { + const data: Record = { + London: { condition: "Overcast", temp: 12 }, + Tokyo: { condition: "Clear", temp: 26 }, + Paris: { condition: "Sunny", temp: 22 }, + "New York": { condition: "Rain", temp: 14 }, + }; + return data[city as string] ?? { condition: "Unknown", temp: 0 }; + }, + }, + searchFlights: { + description: + "Search flights between two cities. Returns Array<{ from, to, airline, flight, price, departure }>", + parameters: { + from: { type: "string", description: "Departure city" }, + to: { type: "string", description: "Arrival city" }, + }, + execute: async ({ from, to }) => { + return [ + { from, to, airline: "BA", flight: "BA123", price: 450, departure: "08:00" }, + { from, to, airline: "AF", flight: "AF456", price: 380, departure: "14:30" }, + ]; + }, + }, + }, +}); + +// --- Debug: log each step's generated code, tool calls, and output --- +function logExecution(result: ExecutionResult) { + // Print the generated code + const indented = result.code.split("\n").map((l) => " " + l).join("\n"); + console.log(`\n[zapcode] Code:\n${indented}`); + + // Print each tool call + for (const tc of result.toolCalls) { + const argsStr = (tc.args as unknown[]).map((a) => JSON.stringify(a)).join(", "); + console.log(`[zapcode] Tool call: ${tc.name}(${argsStr}) → ${JSON.stringify(tc.result)}`); + } + + // Print output or error + if (result.error) { + console.log(`[zapcode] Error: ${result.error}`); + } else { + console.log(`[zapcode] Output: ${JSON.stringify(result.output)}`); + } +} + +// --- Run --- +async function main() { + console.log(`Model: ${MODEL_ID} | Region: ${REGION}`); + console.log(`Debug: ON | AutoFix: ON`); + + const t0 = performance.now(); + + const result = await generateText({ + model, + system, + tools, + maxSteps: 10, + messages: [ + { + role: "user", + content: + "What's the weather in Tokyo and Paris? Find flights from the colder city to the warmer one.", + }, + ], + onStepFinish: (step) => { + // Log every execute_code tool call result + for (const toolResult of step.toolResults) { + if (toolResult.toolName === "execute_code") { + logExecution(toolResult.result as ExecutionResult); + } + } + }, + }); + + const totalMs = (performance.now() - t0).toFixed(0); + + console.log("\nAnswer:", result.text); + console.log(`\n--- Timing ---`); + console.log(`Total (LLM + Zapcode): ${totalMs}ms`); + console.log(`Steps: ${result.steps.length}`); + const toolCallCount = result.steps.reduce( + (count, step) => count + step.toolCalls.length, + 0, + ); + console.log(`Tool calls: ${toolCallCount}`); + console.log( + `Usage: ${result.usage.promptTokens} prompt + ${result.usage.completionTokens} completion = ${result.usage.totalTokens} tokens`, + ); + + // Print the full execution trace tree + console.log(`\n--- Execution Trace ---`); + printTrace(); +} + +main().catch(console.error); diff --git a/examples/typescript/debug-tracing/package.json b/examples/typescript/debug-tracing/package.json new file mode 100644 index 0000000..db4ba76 --- /dev/null +++ b/examples/typescript/debug-tracing/package.json @@ -0,0 +1,19 @@ +{ + "name": "zapcode-debug-tracing-example", + "private": true, + "type": "module", + "scripts": { + "start": "npx tsx main.ts" + }, + "dependencies": { + "@ai-sdk/amazon-bedrock": "^2.0.0", + "@aws-sdk/credential-providers": "^3.0.0", + "@unchartedfr/zapcode": "file:../../../crates/zapcode-js", + "@unchartedfr/zapcode-ai": "file:../../../packages/zapcode-ai", + "ai": "^4.0.0" + }, + "devDependencies": { + "tsx": "^4.0.0", + "typescript": "^5.0.0" + } +} diff --git a/examples/typescript/package.json b/examples/typescript/package.json deleted file mode 100644 index 885739e..0000000 --- a/examples/typescript/package.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "name": "zapcode-examples", - "version": "0.0.1", - "private": true, - "type": "module", - "description": "Example usage of Zapcode with AI agents", - "scripts": { - "basic": "tsx basic.ts", - "agent": "tsx ai-agent-zapcode-ai.ts", - "agent:anthropic": "tsx ai-agent-anthropic.ts", - "agent:vercel": "tsx ai-agent-vercel-ai.ts" - }, - "dependencies": { - "@unchartedfr/zapcode": "latest", - "@unchartedfr/zapcode-ai": "latest" - }, - "devDependencies": { - "@types/node": "^22.0.0", - "tsx": "^4.19.0", - "typescript": "^5.7.0" - }, - "optionalDependencies": { - "@ai-sdk/amazon-bedrock": "^2.0.0", - "@ai-sdk/anthropic": "^1.1.0", - "@ai-sdk/google": "^1.1.0", - "@ai-sdk/openai": "^1.1.0", - "@anthropic-ai/sdk": "^0.39.0", - "ai": "^4.1.0" - } -} diff --git a/examples/typescript/tsconfig.json b/examples/typescript/tsconfig.json deleted file mode 100644 index 18993d9..0000000 --- a/examples/typescript/tsconfig.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "compilerOptions": { - "target": "ES2022", - "module": "ESNext", - "moduleResolution": "bundler", - "strict": true, - "esModuleInterop": true, - "skipLibCheck": true - }, - "include": ["*.ts"] -} diff --git a/examples/wasm/index.html b/examples/wasm/basic/index.html similarity index 100% rename from examples/wasm/index.html rename to examples/wasm/basic/index.html From eb94d59d2a1874a2ac7d185d15bd9fece93a6474 Mon Sep 17 00:00:00 2001 From: Alexandre MAI Date: Thu, 12 Mar 2026 18:41:37 +0100 Subject: [PATCH 06/10] docs: document autoFix, execution trace, and debug logging in README MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These features were implemented but undiscoverable — a user looking at the README had no idea they could enable error recovery or inspect execution timing. Adds a dedicated section explaining the why (LLM self-correction, production observability) and links to the debug-tracing examples for step-by-step logging patterns. Also updates example paths to reflect the new directory structure. --- CONTRIBUTING.md | 4 +-- README.md | 69 ++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 65 insertions(+), 8 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index dabc550..af4dd4e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -60,8 +60,8 @@ Key rules: - Write tests before considering a feature done - Core tests: `cargo test -p zapcode-core` - Security tests: `cargo test -p zapcode-core --test security` -- E2E JS: build bindings then run `examples/typescript/basic.ts` -- E2E Python: build bindings then run `examples/python/basic.py` +- E2E JS: build bindings then run `examples/typescript/basic/main.ts` +- E2E Python: build bindings then run `examples/python/basic/main.py` ## Reporting issues diff --git a/README.md b/README.md index 12c157e..fb88300 100644 --- a/README.md +++ b/README.md @@ -150,7 +150,7 @@ if (!state.completed) { } ``` -See [`examples/typescript/basic.ts`](examples/typescript/basic.ts) for more. +See [`examples/typescript/basic/main.ts`](examples/typescript/basic/main.ts) for more. ### Python @@ -187,7 +187,7 @@ if state.get("suspended"): result = restored.resume({"condition": "Clear", "temp": 26}) ``` -See [`examples/python/basic.py`](examples/python/basic.py) for more. +See [`examples/python/basic/main.py`](examples/python/basic/main.py) for more.
Rust @@ -225,7 +225,7 @@ if let VmState::Suspended { snapshot, .. } = state { } ``` -See [`examples/rust/basic.rs`](examples/rust/basic.rs) for more. +See [`examples/rust/basic/basic.rs`](examples/rust/basic/basic.rs) for more.
@@ -246,7 +246,7 @@ console.log(result.output); // 120 ``` -See [`examples/wasm/index.html`](examples/wasm/index.html) for a full playground. +See [`examples/wasm/basic/index.html`](examples/wasm/basic/index.html) for a full playground.
## AI Agent Usage @@ -300,7 +300,7 @@ const { text } = await generateText({ Under the hood: the LLM writes TypeScript code that calls your tools → Zapcode executes it in a sandbox → tool calls suspend the VM → your `execute` functions run on the host → results flow back in. All in ~2µs startup + tool execution time. -See [`examples/typescript/ai-agent-zapcode-ai.ts`](examples/typescript/ai-agent-zapcode-ai.ts) for the full working example. +See [`examples/typescript/ai-agent/ai-agent-zapcode-ai.ts`](examples/typescript/ai-agent/ai-agent-zapcode-ai.ts) for the full working example.
Anthropic SDK @@ -365,7 +365,7 @@ while state.get("suspended"): print(state["output"]) ``` -See [`examples/typescript/ai-agent-anthropic.ts`](examples/typescript/ai-agent-anthropic.ts) and [`examples/python/ai_agent_anthropic.py`](examples/python/ai_agent_anthropic.py). +See [`examples/typescript/ai-agent/ai-agent-anthropic.ts`](examples/typescript/ai-agent/ai-agent-anthropic.ts) and [`examples/python/ai-agent/ai_agent_anthropic.py`](examples/python/ai-agent/ai_agent_anthropic.py).
@@ -452,6 +452,63 @@ langchain_tool = b.custom["langchain"] The adapter receives an `AdapterContext` with everything needed: system prompt, tool name, tool JSON schema, and a `handleToolCall` function. Return whatever shape your SDK expects.
+## Auto-Fix, Debug & Execution Tracing + +### Auto-fix (`autoFix`) + +When enabled, execution errors are returned as tool results instead of throwing — letting the LLM see the error and self-correct on the next step. + +**TypeScript:** +```typescript +const { system, tools } = zapcode({ + autoFix: true, + tools: { /* ... */ }, +}); +``` + +**Python:** +```python +zap = zapcode(auto_fix=True, tools={...}) +``` + +### Execution Trace + +Every execution produces a trace tree with timing for each phase (parse → compile → execute). Use `printTrace()` / `print_trace()` to display the full session trace, or `getTrace()` / `get_trace()` to access the trace programmatically. + +**TypeScript:** +```typescript +const { system, tools, printTrace, getTrace } = zapcode({ + autoFix: true, + tools: { /* ... */ }, +}); + +// After running... +printTrace(); +// ✓ zapcode.session 12.3ms +// ✓ execute_code 8.1ms +// ✓ parse 0.2ms +// ✓ compile 0.1ms +// ✓ execute 7.8ms + +const trace = getTrace(); // TraceSpan tree +``` + +**Python:** +```python +zap = zapcode(auto_fix=True, tools={...}) + +# After running... +zap.print_trace() +trace = zap.get_trace() # TraceSpan tree +``` + +### Debug Logging + +For detailed logging of generated code, tool calls, and output, see the debug-tracing examples which show how to inspect each execution step: + +- [TypeScript debug-tracing example](examples/typescript/debug-tracing/main.ts) +- [Python debug-tracing example](examples/python/debug-tracing/main.py) + ## What Zapcode Can and Cannot Do **Can do:** From 312c7b678022954a106a32a38424746a532618e7 Mon Sep 17 00:00:00 2001 From: Alexandre MAI Date: Thu, 12 Mar 2026 18:41:40 +0100 Subject: [PATCH 07/10] chore: update Cargo.lock --- Cargo.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8de140a..e6c734e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1544,7 +1544,7 @@ dependencies = [ [[package]] name = "zapcode-core" -version = "1.0.0" +version = "1.0.1" dependencies = [ "divan", "indexmap", @@ -1561,7 +1561,7 @@ dependencies = [ [[package]] name = "zapcode-js" -version = "1.0.0" +version = "1.0.1" dependencies = [ "napi", "napi-build", @@ -1572,7 +1572,7 @@ dependencies = [ [[package]] name = "zapcode-py" -version = "1.0.0" +version = "1.0.1" dependencies = [ "indexmap", "pyo3", @@ -1581,7 +1581,7 @@ dependencies = [ [[package]] name = "zapcode-wasm" -version = "1.0.0" +version = "1.0.1" dependencies = [ "indexmap", "js-sys", From 01e29a8dc2b3e30ad53424290b5003dbf6d12de5 Mon Sep 17 00:00:00 2001 From: Alexandre MAI Date: Thu, 12 Mar 2026 18:46:08 +0100 Subject: [PATCH 08/10] style: fix cargo fmt formatting in trace code --- crates/zapcode-core/src/vm/mod.rs | 6 +++++- crates/zapcode-core/tests/trace.rs | 30 ++++++++++++++++++++++-------- crates/zapcode-js/src/lib.rs | 5 ++++- crates/zapcode-wasm/src/lib.rs | 24 ++++++++++++++++++------ 4 files changed, 49 insertions(+), 16 deletions(-) diff --git a/crates/zapcode-core/src/vm/mod.rs b/crates/zapcode-core/src/vm/mod.rs index cc1ee60..93a516e 100644 --- a/crates/zapcode-core/src/vm/mod.rs +++ b/crates/zapcode-core/src/vm/mod.rs @@ -2249,7 +2249,11 @@ impl ZapcodeRun { Ok(s) => { let status = match &s { VmState::Complete(_) => TraceStatus::Ok, - VmState::Suspended { function_name, args, .. } => { + VmState::Suspended { + function_name, + args, + .. + } => { let mut span = execute_span; span.set_attr("zapcode.suspended_on", function_name); span.set_attr("zapcode.args_count", args.len()); diff --git a/crates/zapcode-core/tests/trace.rs b/crates/zapcode-core/tests/trace.rs index 8a9dc4f..2875e5b 100644 --- a/crates/zapcode-core/tests/trace.rs +++ b/crates/zapcode-core/tests/trace.rs @@ -1,6 +1,4 @@ -use zapcode_core::{ - ResourceLimits, TraceSpan, TraceStatus, Value, VmState, ZapcodeRun, -}; +use zapcode_core::{ResourceLimits, TraceSpan, TraceStatus, Value, VmState, ZapcodeRun}; // --------------------------------------------------------------------------- // Helpers @@ -57,7 +55,12 @@ fn trace_all_children_have_ok_status_on_success() { assert_eq!(root.status, TraceStatus::Ok); for child in &root.children { - assert_eq!(child.status, TraceStatus::Ok, "child '{}' should be Ok", child.name); + assert_eq!( + child.status, + TraceStatus::Ok, + "child '{}' should be Ok", + child.name + ); } } @@ -96,8 +99,13 @@ fn trace_root_duration_gte_children_sum() { #[test] fn trace_parse_error_has_error_status() { - let runner = - ZapcodeRun::new("{{{{".to_string(), vec![], vec![], ResourceLimits::default()).unwrap(); + let runner = ZapcodeRun::new( + "{{{{".to_string(), + vec![], + vec![], + ResourceLimits::default(), + ) + .unwrap(); let err = runner.run(vec![]); // Parse errors return Err, so we can't inspect the trace from RunResult. @@ -139,7 +147,10 @@ fn trace_on_suspension_has_execute_with_suspended_attrs() { .attributes .iter() .find(|(k, _)| k == "zapcode.suspended_on"); - assert!(suspended_attr.is_some(), "execute span should have zapcode.suspended_on attribute"); + assert!( + suspended_attr.is_some(), + "execute span should have zapcode.suspended_on attribute" + ); assert_eq!(suspended_attr.unwrap().1, "fetchData"); } @@ -174,7 +185,10 @@ fn trace_pretty_print_contains_span_names() { let result = run_code("1 + 1"); let output = result.trace.to_string_pretty(); - assert!(output.contains("zapcode.run"), "should contain root span name"); + assert!( + output.contains("zapcode.run"), + "should contain root span name" + ); assert!(output.contains("parse"), "should contain parse span"); assert!(output.contains("compile"), "should contain compile span"); assert!(output.contains("execute"), "should contain execute span"); diff --git a/crates/zapcode-js/src/lib.rs b/crates/zapcode-js/src/lib.rs index c7f0e1e..131fc33 100644 --- a/crates/zapcode-js/src/lib.rs +++ b/crates/zapcode-js/src/lib.rs @@ -4,7 +4,10 @@ use std::sync::Arc; use napi::bindgen_prelude::*; use napi_derive::napi; -use zapcode_core::{ExecutionTrace, ResourceLimits, TraceSpan, TraceStatus, Value, VmState, ZapcodeRun, ZapcodeSnapshot}; +use zapcode_core::{ + ExecutionTrace, ResourceLimits, TraceSpan, TraceStatus, Value, VmState, ZapcodeRun, + ZapcodeSnapshot, +}; // --------------------------------------------------------------------------- // Options diff --git a/crates/zapcode-wasm/src/lib.rs b/crates/zapcode-wasm/src/lib.rs index 71e4441..552b1e6 100644 --- a/crates/zapcode-wasm/src/lib.rs +++ b/crates/zapcode-wasm/src/lib.rs @@ -212,12 +212,24 @@ fn trace_span_to_js(span: &CoreTraceSpan) -> Result { let obj = Object::new(); Reflect::set(&obj, &"name".into(), &JsValue::from_str(&span.name)) .map_err(|_| JsError::new("failed to set trace field"))?; - Reflect::set(&obj, &"startTimeMs".into(), &JsValue::from(span.start_time_ms as f64)) - .map_err(|_| JsError::new("failed to set trace field"))?; - Reflect::set(&obj, &"endTimeMs".into(), &JsValue::from(span.end_time_ms as f64)) - .map_err(|_| JsError::new("failed to set trace field"))?; - Reflect::set(&obj, &"durationUs".into(), &JsValue::from(span.duration_us as f64)) - .map_err(|_| JsError::new("failed to set trace field"))?; + Reflect::set( + &obj, + &"startTimeMs".into(), + &JsValue::from(span.start_time_ms as f64), + ) + .map_err(|_| JsError::new("failed to set trace field"))?; + Reflect::set( + &obj, + &"endTimeMs".into(), + &JsValue::from(span.end_time_ms as f64), + ) + .map_err(|_| JsError::new("failed to set trace field"))?; + Reflect::set( + &obj, + &"durationUs".into(), + &JsValue::from(span.duration_us as f64), + ) + .map_err(|_| JsError::new("failed to set trace field"))?; Reflect::set( &obj, &"status".into(), From ccfc2ee8ee05e05d628b10211635a834a350f79d Mon Sep 17 00:00:00 2001 From: Alexandre MAI Date: Thu, 12 Mar 2026 18:52:27 +0100 Subject: [PATCH 09/10] fix: update CI paths after examples directory reorganization CI was referencing old flat paths (examples/typescript/package.json, examples/python/basic.py) which no longer exist after the move to language-first, topic-second structure. --- .github/workflows/ci.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f4e0e38..6728bd1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -63,17 +63,17 @@ jobs: uses: actions/cache@v4 with: path: ~/.npm - key: npm-${{ runner.os }}-${{ hashFiles('crates/zapcode-js/package.json', 'examples/typescript/package.json') }} + key: npm-${{ runner.os }}-${{ hashFiles('crates/zapcode-js/package.json', 'examples/typescript/basic/package.json') }} - name: Build JS bindings working-directory: crates/zapcode-js run: | npm install npx napi build --release --platform --js index.js --dts index.d.ts - name: Run basic example - working-directory: examples/typescript + working-directory: examples/typescript/basic run: | npm install - npx tsx basic.ts + npx tsx main.ts # ── Python bindings — build + e2e ─────────────────────────────────── e2e-python: @@ -101,10 +101,10 @@ jobs: source ${{ github.workspace }}/.venv/bin/activate maturin develop --release - name: Run basic example - working-directory: examples/python + working-directory: examples/python/basic run: | source ${{ github.workspace }}/.venv/bin/activate - python basic.py + python main.py # ── WASM — build + smoke test ─────────────────────────────────────── e2e-wasm: From cdbc6eeed8357a3b4fbc4066d584e003c42ff143 Mon Sep 17 00:00:00 2001 From: Alexandre MAI Date: Thu, 12 Mar 2026 19:11:16 +0100 Subject: [PATCH 10/10] fix: address CodeRabbit review findings - Fix debug flag not passed through in zapcode-ai TS (was hardcoded to false, so debug:true had no effect on per-attempt traces) - Remove unnecessary f-strings (Ruff F541) in Python files - Add language tags to fenced code blocks (markdownlint MD040) - Fix cd paths in examples/README.md to work from repo root - Remove unnecessary npx prefix in npm scripts (tsx is a devDep) - Make CONTRIBUTING.md E2E commands copy-pasteable - Add AWS credentials prerequisites to Bedrock/debug-tracing READMEs - Clarify working directory for Rust example --- CONTRIBUTING.md | 4 ++-- examples/README.md | 12 ++++++------ examples/python/ai-bedrock/README.md | 4 ++++ examples/python/debug-tracing/README.md | 4 ++++ examples/python/debug-tracing/main.py | 2 +- examples/rust/basic/README.md | 1 + examples/typescript/ai-agent/package.json | 6 +++--- examples/typescript/ai-bedrock/README.md | 4 ++++ examples/typescript/debug-tracing/README.md | 6 +++++- .../zapcode-ai-python/src/zapcode_ai/__init__.py | 2 +- packages/zapcode-ai/src/index.ts | 2 +- 11 files changed, 32 insertions(+), 15 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index af4dd4e..a729a0b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -60,8 +60,8 @@ Key rules: - Write tests before considering a feature done - Core tests: `cargo test -p zapcode-core` - Security tests: `cargo test -p zapcode-core --test security` -- E2E JS: build bindings then run `examples/typescript/basic/main.ts` -- E2E Python: build bindings then run `examples/python/basic/main.py` +- E2E JS: `cd crates/zapcode-js && npm install && npx napi build --release --platform --js index.js --dts index.d.ts && cd ../../examples/typescript/basic && npm install && npx tsx main.ts` +- E2E Python: `cd crates/zapcode-py && maturin develop --release && cd ../../examples/python/basic && python main.py` ## Reporting issues diff --git a/examples/README.md b/examples/README.md index fe7374a..8d1fe03 100644 --- a/examples/README.md +++ b/examples/README.md @@ -2,7 +2,7 @@ Examples organized by language, then by topic. -``` +```text examples/ ├── typescript/ │ ├── basic/ Simple expressions, inputs, snapshot/resume, classes @@ -26,14 +26,14 @@ Each example has its own `README.md` with setup and run instructions. Pick a lan ```bash # TypeScript — basic usage (no API key needed) -cd typescript/basic && npm install && npm start +cd examples/typescript/basic && npm install && npm start # Python — basic usage (no API key needed) -cd python/basic && pip install zapcode && python main.py +cd examples/python/basic && pip install zapcode && python main.py # Rust — basic usage -cd rust/basic && cargo run --example basic +cd examples/rust/basic && cargo run --example basic -# WASM — open in browser -open wasm/basic/index.html +# WASM — open in browser (macOS: open, Linux: xdg-open, Windows: start) +xdg-open examples/wasm/basic/index.html ``` diff --git a/examples/python/ai-bedrock/README.md b/examples/python/ai-bedrock/README.md index 82a5835..7d11aaf 100644 --- a/examples/python/ai-bedrock/README.md +++ b/examples/python/ai-bedrock/README.md @@ -2,6 +2,10 @@ Zapcode + AWS Bedrock Converse API. +## Prerequisites + +AWS credentials must be configured (env vars, `~/.aws/credentials`, or IAM role) with access to the Bedrock model specified by `MODEL_ID` in your target `AWS_REGION`. + ## Setup ```bash diff --git a/examples/python/debug-tracing/README.md b/examples/python/debug-tracing/README.md index 6766d04..1b0b5a5 100644 --- a/examples/python/debug-tracing/README.md +++ b/examples/python/debug-tracing/README.md @@ -8,6 +8,10 @@ Demonstrates Zapcode's debug mode, auto-fix error recovery, and execution tracin - **`auto_fix=True`** — When the LLM generates code that fails, the error is returned as a tool result instead of raising, letting the LLM self-correct on the next step - **`print_trace()`** — Displays the full execution trace tree (parse -> compile -> execute) with timing +## Prerequisites + +AWS credentials must be configured (env vars, `~/.aws/credentials`, or IAM role) with access to the Bedrock model specified by `MODEL_ID` in your target `AWS_REGION`. + ## Setup ```bash diff --git a/examples/python/debug-tracing/main.py b/examples/python/debug-tracing/main.py index 0c48b99..cf7e8fa 100644 --- a/examples/python/debug-tracing/main.py +++ b/examples/python/debug-tracing/main.py @@ -90,7 +90,7 @@ def log_execution(result): def main(): print(f"Model: {MODEL_ID} | Region: {REGION}") - print(f"Debug: ON | AutoFix: ON") + print("Debug: ON | AutoFix: ON") t0 = time.perf_counter() diff --git a/examples/rust/basic/README.md b/examples/rust/basic/README.md index 25ad15c..52c5486 100644 --- a/examples/rust/basic/README.md +++ b/examples/rust/basic/README.md @@ -5,5 +5,6 @@ Simple expressions, inputs, external functions (snapshot/resume), and snapshot s ## Run ```bash +# From this directory (examples/rust/basic/) cargo run --example basic ``` diff --git a/examples/typescript/ai-agent/package.json b/examples/typescript/ai-agent/package.json index 9d9fc01..da3afcd 100644 --- a/examples/typescript/ai-agent/package.json +++ b/examples/typescript/ai-agent/package.json @@ -3,9 +3,9 @@ "private": true, "type": "module", "scripts": { - "agent": "npx tsx ai-agent-zapcode-ai.ts", - "agent:anthropic": "npx tsx ai-agent-anthropic.ts", - "agent:vercel": "npx tsx ai-agent-vercel-ai.ts" + "agent": "tsx ai-agent-zapcode-ai.ts", + "agent:anthropic": "tsx ai-agent-anthropic.ts", + "agent:vercel": "tsx ai-agent-vercel-ai.ts" }, "dependencies": { "@unchartedfr/zapcode": "file:../../../crates/zapcode-js", diff --git a/examples/typescript/ai-bedrock/README.md b/examples/typescript/ai-bedrock/README.md index 1cee546..c42df4b 100644 --- a/examples/typescript/ai-bedrock/README.md +++ b/examples/typescript/ai-bedrock/README.md @@ -2,6 +2,10 @@ Zapcode + Vercel AI SDK + AWS Bedrock. +## Prerequisites + +AWS credentials must be configured (env vars, `~/.aws/credentials`, or IAM role) with access to the Bedrock model specified by `MODEL_ID` in your target `AWS_REGION`. + ## Setup ```bash diff --git a/examples/typescript/debug-tracing/README.md b/examples/typescript/debug-tracing/README.md index 7982779..afcc71e 100644 --- a/examples/typescript/debug-tracing/README.md +++ b/examples/typescript/debug-tracing/README.md @@ -8,6 +8,10 @@ Demonstrates Zapcode's debug mode, auto-fix error recovery, and execution tracin - **`autoFix: true`** — When the LLM generates code that fails, the error is returned as a tool result instead of throwing, letting the LLM self-correct on the next step - **`printTrace()`** — Displays the full execution trace tree (parse -> compile -> execute) with timing +## Prerequisites + +AWS credentials must be configured (env vars, `~/.aws/credentials`, or IAM role) with access to the Bedrock model specified by `MODEL_ID` in your target `AWS_REGION`. + ## Setup ```bash @@ -26,7 +30,7 @@ MODEL_ID=anthropic.claude-sonnet-4-20250514 npm start ## Example output -``` +```text Model: global.amazon.nova-2-lite-v1:0 | Region: eu-west-1 Debug: ON | AutoFix: ON diff --git a/packages/zapcode-ai-python/src/zapcode_ai/__init__.py b/packages/zapcode-ai-python/src/zapcode_ai/__init__.py index f8a95fc..53f7fec 100644 --- a/packages/zapcode-ai-python/src/zapcode_ai/__init__.py +++ b/packages/zapcode-ai-python/src/zapcode_ai/__init__.py @@ -182,7 +182,7 @@ def _end_span(span: TraceSpan, status: str | None = None) -> TraceSpan: def _print_trace(span: TraceSpan, indent: int = 0) -> None: prefix = "" if indent == 0 else "│ " * (indent - 1) + "├─ " icon = "✗" if span.status == "error" else "✓" - duration = f"<1ms" if span.duration_ms < 1 else f"{span.duration_ms:.0f}ms" + duration = "<1ms" if span.duration_ms < 1 else f"{span.duration_ms:.0f}ms" attrs = " ".join( f"{k}={str(v)[:80]}" for k, v in span.attributes.items() if not k.startswith("zapcode.code") # don't dump full code in trace diff --git a/packages/zapcode-ai/src/index.ts b/packages/zapcode-ai/src/index.ts index 0d36b9e..5a7c153 100644 --- a/packages/zapcode-ai/src/index.ts +++ b/packages/zapcode-ai/src/index.ts @@ -456,7 +456,7 @@ export function zapcode(options: ZapcodeAIOptions): ZapcodeAIResult { const system = buildSystemPrompt(toolDefs, userSystem); - const execOptions = { memoryLimitMb, timeLimitMs, debug: false, autoFix }; + const execOptions = { memoryLimitMb, timeLimitMs, debug, autoFix }; const tracing = debug || autoFix; // Session-level trace collects all attempts