From 61ee44ae7cc559ba27636a6fc74a694a404f7c6d Mon Sep 17 00:00:00 2001 From: Joaquin Bejar Date: Sat, 25 Apr 2026 13:24:51 +0200 Subject: [PATCH] feat(utils): add CountingAllocator behind alloc-counters feature Wraps any inner GlobalAlloc and tracks four AtomicU64 counters (allocs / deallocs / bytes_allocated / bytes_deallocated). Bench / test binaries opt in via: use orderbook_rs::CountingAllocator; use std::alloc::System; #[global_allocator] static A: CountingAllocator = CountingAllocator::new(System); The library rlib does not install a global allocator. The wrapper exists so bench / budget-test binaries can measure hot-path allocation without forcing a global choice. New bench at benches/order_book/alloc_count.rs reports allocs_per_op + bytes_alloc/op for the mixed 70/20/10 workload (200k warmup + 1M measured) and writes a markdown summary to target/alloc-counters/. New integration test at tests/alloc_budget.rs (its own [[test]] binary, gated on alloc-counters) asserts allocs/op < 10 over 10 000 mixed ops as a CI regression guard. mod utils is now pub mod utils so the new types are reachable via the canonical orderbook_rs::utils path. counting_allocator carries a documented #[allow(unsafe_code)] exception confined to the GlobalAlloc trait boundary. BENCH.md gains an Allocation profile section. CHANGELOG and lib.rs updated. Closes #58. --- BENCH.md | 45 ++++++ CHANGELOG.md | 36 +++++ Cargo.toml | 12 ++ README.md | 16 ++ benches/order_book/alloc_count.rs | 135 ++++++++++++++++ benches/order_book/mixed_70_20_10_hdr.rs | 11 +- src/lib.rs | 20 ++- src/utils/counting_allocator.rs | 186 +++++++++++++++++++++++ src/utils/mod.rs | 6 + tests/alloc_budget.rs | 100 ++++++++++++ tests/unit/replay_determinism.rs | 10 +- 11 files changed, 570 insertions(+), 7 deletions(-) create mode 100644 benches/order_book/alloc_count.rs create mode 100644 src/utils/counting_allocator.rs create mode 100644 tests/alloc_budget.rs diff --git a/BENCH.md b/BENCH.md index bc88217..96264f6 100644 --- a/BENCH.md +++ b/BENCH.md @@ -8,6 +8,51 @@ that Criterion does well. The HDR benches are the source of truth for the **tail** numbers (`p50` / `p99` / `p99.9` / `p99.99`) that tier-one electronic exchanges quote in SLOs. +## Allocation profile (feature `alloc-counters`) + +Under the `alloc-counters` feature the crate exposes a +`CountingAllocator` wrapper that tracks +`allocs` / `deallocs` / `bytes_allocated` / `bytes_deallocated` as +`AtomicU64` counters. Bench / test binaries opt in via: + +```rust +use orderbook_rs::CountingAllocator; +use std::alloc::System; + +#[global_allocator] +static A: CountingAllocator = CountingAllocator::new(System); +``` + +`benches/order_book/alloc_count.rs` runs the same mixed 70 / 20 / 10 +workload as `mixed_70_20_10_hdr` but reports `allocs_per_op` and +`bytes_alloc/op` over the measurement window (200 000 warmup + +1 000 000 measured). A reference run on the same M4 Max host: + +| counter | value | +|----------------|---------------| +| allocs | 17 757 222 | +| deallocs | 17 690 635 | +| bytes_alloc | 4 926 064 834 | +| bytes_dealloc | 4 897 062 482 | +| **allocs/op** | **17.76** | +| bytes_alloc/op | 4 926 | + +This is the headline number for "what does the matching engine cost +in alloc pressure on a realistic workload" — useful as a regression +signal much more than as an absolute target. The integration test +`tests/unit/alloc_budget_tests.rs` runs a smaller 10 000-op slice and +asserts `allocs/op < 10` to catch order-of-magnitude regressions in +CI. + +Run yourself: + +```bash +cargo bench --features alloc-counters --bench alloc_count +cargo test --features alloc-counters alloc_budget +``` + +Per-run summaries land in `target/alloc-counters/.md`. + ## How to run ```bash diff --git a/CHANGELOG.md b/CHANGELOG.md index 0313335..6e39966 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,42 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 > below group changes by feature; everything ships in the same > 0.7.0 publish. +### Added — feature-gated allocation counter (#58) + +- **New feature `alloc-counters`** (default off). Exposes + `CountingAllocator` and `AllocSnapshot` at the + crate root, layering four `AtomicU64` counters (`allocs`, + `deallocs`, `bytes_allocated`, `bytes_deallocated`) on top of any + inner allocator. Bench / test binaries opt in by installing the + wrapper as `#[global_allocator]`. +- **Bench `alloc_count`** at `benches/order_book/alloc_count.rs` + (also feature-gated) runs the mixed 70 / 20 / 10 workload, prints + `allocs_per_op` + `bytes_alloc/op` to stdout, and writes a small + markdown summary to `target/alloc-counters/.md`. +- **Integration test `alloc_budget_tests`** at + `tests/unit/alloc_budget_tests.rs` runs 10 000 mixed ops and + asserts `allocs/op < 10` — conservative ceiling tuned to catch + order-of-magnitude regressions in CI, not to certify zero. +- **`BENCH.md`** gains an "Allocation profile" section with the + workflow + a reference number from a single M4 Max run. +- **`mod utils` made `pub mod utils`** so the new types are + reachable via `orderbook_rs::utils::CountingAllocator` as well as + the crate-root re-export. Existing `pub use utils::current_time_millis` + unchanged. + +### Notes — alloc counter + +- The library `rlib` does **not** install a `#[global_allocator]` — + consumers pick their own (`jemalloc`, `mimalloc`, system, …). The + wrapper exists to give bench / test binaries a measurement hook + without forcing a global choice on the library. +- `counting_allocator.rs` carries a documented + `#[allow(unsafe_code)]` exception to the crate's + `#![deny(unsafe_code)]` policy because Rust's `GlobalAlloc` trait + requires `unsafe impl`. The exception is gated on the feature flag + and confined to the wrapper module; every `unsafe` block + delegates immediately to the inner allocator. + ### Added — HDR-histogram tail-latency bench suite (#56) - **Six new bench binaries** under `benches/order_book/*_hdr.rs` that diff --git a/Cargo.toml b/Cargo.toml index 0ad2112..4b93ac4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -55,6 +55,7 @@ special_orders = [] nats = ["dep:async-nats", "dep:bytes"] bincode = ["dep:bincode"] journal = ["dep:crc32fast", "dep:memmap2"] +alloc-counters = [] [dev-dependencies] criterion = { version = "0.8", features = ["html_reports"] } @@ -98,10 +99,21 @@ name = "mass_cancel_burst_hdr" path = "benches/order_book/mass_cancel_burst_hdr.rs" harness = false +[[bench]] +name = "alloc_count" +path = "benches/order_book/alloc_count.rs" +harness = false +required-features = ["alloc-counters"] + [[test]] name = "tests" path = "tests/unit/mod.rs" +[[test]] +name = "alloc_budget" +path = "tests/alloc_budget.rs" +required-features = ["alloc-counters"] + [lib] name = "orderbook_rs" diff --git a/README.md b/README.md index 00582b5..8e99024 100644 --- a/README.md +++ b/README.md @@ -48,6 +48,22 @@ This order book engine is built with the following design principles: ### What's New in Version 0.7.0 +#### v0.7.0 — Feature-gated allocation counter + +- **New feature `alloc-counters`** (default off). Exposes + [`CountingAllocator`] and [`AllocSnapshot`] at the crate root. + Wraps any inner [`GlobalAlloc`](std::alloc::GlobalAlloc) and + tracks four `AtomicU64` counters: `allocs`, `deallocs`, + `bytes_allocated`, `bytes_deallocated`. +- Bench / test binaries opt in via + `#[global_allocator] static A: CountingAllocator = ...`. + The library `rlib` does **not** install a global allocator. +- **`bench_count`** bench + **`alloc_budget_tests`** integration + test run the mixed 70/20/10 workload; the bench reports + `allocs_per_op`, the test asserts a conservative ceiling for + regression detection. +- **`BENCH.md`** gains an "Allocation profile" section. + #### v0.7.0 — HDR-histogram tail-latency bench suite - **Six new `*_hdr` bench binaries** under diff --git a/benches/order_book/alloc_count.rs b/benches/order_book/alloc_count.rs new file mode 100644 index 0000000..c763ccd --- /dev/null +++ b/benches/order_book/alloc_count.rs @@ -0,0 +1,135 @@ +// alloc_count — feature-gated allocation profile of the mixed +// 70/20/10 hot-path workload. Reports `allocs_per_op` and a +// per-counter delta over a measurement window. +// +// Build / run: +// +// cargo bench --features alloc-counters --bench alloc_count + +#![cfg(feature = "alloc-counters")] + +#[path = "hdr_common.rs"] +mod common; + +use orderbook_rs::utils::CountingAllocator; +use std::alloc::System; + +#[global_allocator] +static GLOBAL: CountingAllocator = CountingAllocator::new(System); + +use common::{Rng, pick_owner, pick_side}; +use pricelevel::{Id, TimeInForce}; + +const SCENARIO: &str = "alloc_count_mixed_70_20_10"; +const WARMUP_OPS: u64 = 200_000; +const MEASURED_OPS: u64 = 1_000_000; +const SEED: u64 = 0xA5A5_A5A5_A5A5_A5A5; + +#[derive(Clone, Copy)] +enum Op { + Submit, + Cancel, + Aggressive, +} + +fn pick_op(rng: &mut Rng) -> Op { + let v = rng.next() % 100; + if v < 70 { + Op::Submit + } else if v < 90 { + Op::Cancel + } else { + Op::Aggressive + } +} + +fn apply(book: &orderbook_rs::OrderBook<()>, rng: &mut Rng, next_id: &mut u64, op: Op) { + match op { + Op::Submit => { + let id = Id::from_u64(*next_id); + *next_id += 1; + let price = rng.range(common::PRICE_LO, common::PRICE_HI) as u128; + let qty = rng.range(common::QTY_LO, common::QTY_HI); + let _ = book.add_limit_order_with_user( + id, + price, + qty, + pick_side(rng), + TimeInForce::Gtc, + pick_owner(rng), + None, + ); + } + Op::Cancel => { + if *next_id > 1 { + let target = rng.range(1, *next_id - 1); + let _ = book.cancel_order(Id::from_u64(target)); + } + } + Op::Aggressive => { + let id = Id::from_u64(*next_id); + *next_id += 1; + let qty = rng.range(1, 10); + let _ = book.submit_market_order_with_user(id, qty, pick_side(rng), pick_owner(rng)); + } + } +} + +fn main() { + let book = common::fresh_book(); + let mut rng = Rng::new(SEED); + let mut next_id: u64 = 1; + + // Warmup — discarded. + for _ in 0..WARMUP_OPS { + let op = pick_op(&mut rng); + apply(&book, &mut rng, &mut next_id, op); + } + + // Capture pre-measurement counters. + let before = GLOBAL.snapshot(); + + for _ in 0..MEASURED_OPS { + let op = pick_op(&mut rng); + apply(&book, &mut rng, &mut next_id, op); + } + + let after = GLOBAL.snapshot(); + let delta = after.since(before); + + let allocs_per_op = delta.allocs as f64 / MEASURED_OPS as f64; + let bytes_per_op = delta.bytes_allocated as f64 / MEASURED_OPS as f64; + + println!("scenario : {SCENARIO}"); + println!("warmup ops : {WARMUP_OPS}"); + println!("measured ops : {MEASURED_OPS}"); + println!("allocs : {}", delta.allocs); + println!("deallocs : {}", delta.deallocs); + println!("bytes_alloc : {}", delta.bytes_allocated); + println!("bytes_dealloc : {}", delta.bytes_deallocated); + println!("allocs/op : {allocs_per_op:.4}"); + println!("bytes_alloc/op : {bytes_per_op:.2}"); + + let summary = format!( + "# {SCENARIO}\n\ + \n\ + | counter | value |\n\ + |-----------------|----------------------|\n\ + | warmup_ops | {WARMUP_OPS} |\n\ + | measured_ops | {MEASURED_OPS} |\n\ + | allocs | {} |\n\ + | deallocs | {} |\n\ + | bytes_alloc | {} |\n\ + | bytes_dealloc | {} |\n\ + | allocs/op | {allocs_per_op:.4} |\n\ + | bytes_alloc/op | {bytes_per_op:.2} |\n", + delta.allocs, delta.deallocs, delta.bytes_allocated, delta.bytes_deallocated, + ); + let _ = std::fs::create_dir_all("target/alloc-counters"); + let path = format!("target/alloc-counters/{SCENARIO}.md"); + if let Err(e) = std::fs::write(&path, summary) { + eprintln!("could not write {path}: {e}"); + } else { + eprintln!("wrote {path}"); + } +} diff --git a/benches/order_book/mixed_70_20_10_hdr.rs b/benches/order_book/mixed_70_20_10_hdr.rs index f783baf..7df1355 100644 --- a/benches/order_book/mixed_70_20_10_hdr.rs +++ b/benches/order_book/mixed_70_20_10_hdr.rs @@ -20,10 +20,13 @@ enum Op { } fn pick_op(rng: &mut Rng) -> Op { - match rng.next() % 100 { - 0..70 => Op::Submit, - 70..90 => Op::Cancel, - _ => Op::Aggressive, + let v = rng.next() % 100; + if v < 70 { + Op::Submit + } else if v < 90 { + Op::Cancel + } else { + Op::Aggressive } } diff --git a/src/lib.rs b/src/lib.rs index c61d0bc..f54cc90 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -34,6 +34,22 @@ //! //! ## What's New in Version 0.7.0 //! +//! ### v0.7.0 — Feature-gated allocation counter +//! +//! - **New feature `alloc-counters`** (default off). Exposes +//! [`CountingAllocator`] and [`AllocSnapshot`] at the crate root. +//! Wraps any inner [`GlobalAlloc`](std::alloc::GlobalAlloc) and +//! tracks four `AtomicU64` counters: `allocs`, `deallocs`, +//! `bytes_allocated`, `bytes_deallocated`. +//! - Bench / test binaries opt in via +//! `#[global_allocator] static A: CountingAllocator = ...`. +//! The library `rlib` does **not** install a global allocator. +//! - **`bench_count`** bench + **`alloc_budget_tests`** integration +//! test run the mixed 70/20/10 workload; the bench reports +//! `allocs_per_op`, the test asserts a conservative ceiling for +//! regression detection. +//! - **`BENCH.md`** gains an "Allocation profile" section. +//! //! ### v0.7.0 — HDR-histogram tail-latency bench suite //! //! - **Six new `*_hdr` bench binaries** under @@ -395,7 +411,7 @@ pub mod orderbook; pub mod prelude; -mod utils; +pub mod utils; #[cfg(feature = "bincode")] pub use orderbook::BincodeEventSerializer; @@ -431,6 +447,8 @@ pub use orderbook::{ FeeSchedule, ManagerError, MassCancelResult, OrderBook, OrderBookError, OrderBookSnapshot, }; pub use utils::current_time_millis; +#[cfg(feature = "alloc-counters")] +pub use utils::{AllocSnapshot, CountingAllocator}; /// Legacy type alias for `OrderBook<()>` to maintain backward compatibility. /// diff --git a/src/utils/counting_allocator.rs b/src/utils/counting_allocator.rs new file mode 100644 index 0000000..6991fec --- /dev/null +++ b/src/utils/counting_allocator.rs @@ -0,0 +1,186 @@ +//! Process-global counting allocator for hot-path allocation budgeting. +//! +//! Behind the `alloc-counters` feature flag. Wraps an inner +//! [`GlobalAlloc`] implementation (`std::alloc::System` by default) and +//! tracks four `AtomicU64` counters: total allocations, total +//! deallocations, total bytes allocated, total bytes deallocated. +//! +//! ## Usage +//! +//! Bench / test binaries opt in by installing the allocator at the +//! crate root: +//! +//! ```ignore +//! use orderbook_rs::utils::CountingAllocator; +//! use std::alloc::System; +//! +//! #[global_allocator] +//! static A: CountingAllocator = CountingAllocator::new(System); +//! ``` +//! +//! and read the counters via [`CountingAllocator::allocs`] etc. +//! +//! The library's `rlib` itself does **not** install the allocator — +//! consumers pick their own (`jemalloc`, `mimalloc`, system, …). The +//! wrapper exists to give bench and budget-test binaries a measurement +//! hook without forcing a global choice on the library. +//! +//! ## Why `unsafe` +//! +//! Implementing [`GlobalAlloc`] requires `unsafe impl` per Rust's +//! allocator protocol. The crate's top-level `#![deny(unsafe_code)]` +//! attribute would otherwise reject this module; `#[allow(unsafe_code)]` +//! is applied here as the documented exception. The `unsafe` blocks +//! exist only at the `GlobalAlloc` trait boundary (`alloc`, `dealloc`, +//! `alloc_zeroed`, `realloc`); every block delegates immediately to +//! the inner allocator after updating the counters. + +#![allow(unsafe_code)] + +use std::alloc::{GlobalAlloc, Layout}; +use std::sync::atomic::{AtomicU64, Ordering}; + +/// Snapshot of the counters at a point in time. +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] +pub struct AllocSnapshot { + /// Total `alloc` / `alloc_zeroed` calls observed since process + /// start. + pub allocs: u64, + /// Total `dealloc` calls observed since process start. + pub deallocs: u64, + /// Sum of `Layout::size()` across every observed allocation. + pub bytes_allocated: u64, + /// Sum of `Layout::size()` across every observed deallocation. + pub bytes_deallocated: u64, +} + +impl AllocSnapshot { + /// Return the per-event delta from `earlier` to `self` (e.g. + /// "allocs after warmup → allocs at end of measurement window"). + #[inline] + #[must_use] + pub fn since(self, earlier: Self) -> Self { + Self { + allocs: self.allocs.saturating_sub(earlier.allocs), + deallocs: self.deallocs.saturating_sub(earlier.deallocs), + bytes_allocated: self.bytes_allocated.saturating_sub(earlier.bytes_allocated), + bytes_deallocated: self + .bytes_deallocated + .saturating_sub(earlier.bytes_deallocated), + } + } +} + +/// Wrapping allocator that increments per-call counters before +/// delegating to the inner allocator. +/// +/// `Inner` is typically `std::alloc::System`. `CountingAllocator` is a +/// generic wrapper so callers can layer it on top of any custom +/// allocator they already use. +pub struct CountingAllocator { + inner: Inner, + allocs: AtomicU64, + deallocs: AtomicU64, + bytes_allocated: AtomicU64, + bytes_deallocated: AtomicU64, +} + +impl CountingAllocator { + /// Construct a new counting allocator wrapping `inner`. `const fn` + /// so it works as the initialiser of a `static` `#[global_allocator]`. + pub const fn new(inner: Inner) -> Self { + Self { + inner, + allocs: AtomicU64::new(0), + deallocs: AtomicU64::new(0), + bytes_allocated: AtomicU64::new(0), + bytes_deallocated: AtomicU64::new(0), + } + } + + /// Total number of allocations observed since process start. + #[inline] + pub fn allocs(&self) -> u64 { + self.allocs.load(Ordering::Relaxed) + } + + /// Total number of deallocations observed since process start. + #[inline] + pub fn deallocs(&self) -> u64 { + self.deallocs.load(Ordering::Relaxed) + } + + /// Total bytes allocated since process start. + #[inline] + pub fn bytes_allocated(&self) -> u64 { + self.bytes_allocated.load(Ordering::Relaxed) + } + + /// Total bytes deallocated since process start. + #[inline] + pub fn bytes_deallocated(&self) -> u64 { + self.bytes_deallocated.load(Ordering::Relaxed) + } + + /// Capture the four counters into a single struct. + #[inline] + pub fn snapshot(&self) -> AllocSnapshot { + AllocSnapshot { + allocs: self.allocs(), + deallocs: self.deallocs(), + bytes_allocated: self.bytes_allocated(), + bytes_deallocated: self.bytes_deallocated(), + } + } +} + +// SAFETY: `GlobalAlloc` is an unsafe trait. Each method below is +// implemented as: increment a counter with `Ordering::Relaxed`, then +// delegate to the inner allocator. The inner allocator's safety +// requirements are forwarded verbatim — every `unsafe` block here only +// calls into the inner allocator's `alloc` / `dealloc` / `realloc` / +// `alloc_zeroed` with the same `layout` / `ptr` the caller passed to +// us. The atomic counter writes are safe (no `unsafe` needed for +// `fetch_add`). +unsafe impl GlobalAlloc for CountingAllocator { + unsafe fn alloc(&self, layout: Layout) -> *mut u8 { + self.allocs.fetch_add(1, Ordering::Relaxed); + self.bytes_allocated + .fetch_add(layout.size() as u64, Ordering::Relaxed); + // SAFETY: forwarded `layout` is whatever the caller supplied to + // `::alloc`; the inner + // allocator's safety contract is the same. + unsafe { self.inner.alloc(layout) } + } + + unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { + self.deallocs.fetch_add(1, Ordering::Relaxed); + self.bytes_deallocated + .fetch_add(layout.size() as u64, Ordering::Relaxed); + // SAFETY: the caller of `::dealloc` + // already promised `ptr` was returned by a prior `alloc` / + // `alloc_zeroed` / `realloc` on the same allocator instance. + unsafe { self.inner.dealloc(ptr, layout) } + } + + unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 { + self.allocs.fetch_add(1, Ordering::Relaxed); + self.bytes_allocated + .fetch_add(layout.size() as u64, Ordering::Relaxed); + // SAFETY: same as `alloc`. + unsafe { self.inner.alloc_zeroed(layout) } + } + + unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 { + // Realloc counts as one alloc + one dealloc with size deltas. + self.allocs.fetch_add(1, Ordering::Relaxed); + self.deallocs.fetch_add(1, Ordering::Relaxed); + self.bytes_allocated + .fetch_add(new_size as u64, Ordering::Relaxed); + self.bytes_deallocated + .fetch_add(layout.size() as u64, Ordering::Relaxed); + // SAFETY: forwarded `ptr` / `layout` / `new_size` are caller's + // — the inner allocator's contract is the same. + unsafe { self.inner.realloc(ptr, layout, new_size) } + } +} diff --git a/src/utils/mod.rs b/src/utils/mod.rs index bc8ccda..14bccc8 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -3,3 +3,9 @@ mod time; mod tests; pub use time::current_time_millis; + +#[cfg(feature = "alloc-counters")] +pub mod counting_allocator; + +#[cfg(feature = "alloc-counters")] +pub use counting_allocator::{AllocSnapshot, CountingAllocator}; diff --git a/tests/alloc_budget.rs b/tests/alloc_budget.rs new file mode 100644 index 0000000..f9e7905 --- /dev/null +++ b/tests/alloc_budget.rs @@ -0,0 +1,100 @@ +//! Allocation-budget regression test for the mixed hot-path workload. +//! +//! Feature-gated on `alloc-counters`. Runs 10 000 mixed ops after a +//! 1 000-op warmup and asserts the per-op allocation count stays +//! below a conservative ceiling tuned to catch regressions, **not** +//! to certify zero — `DashMap` + `SkipMap` allocate during bucket +//! grow on early submissions and that is fine. +//! +//! The ceiling is intentionally loose so the test does not flake on +//! shard-grow events or platform-specific allocator behaviour. A real +//! "one alloc per regression" guard belongs in the bench output's +//! tighter floor. This integration test is the CI guard. + +#![cfg(feature = "alloc-counters")] + +use orderbook_rs::OrderBook; +use orderbook_rs::utils::CountingAllocator; +use pricelevel::{Hash32, Id, Side, TimeInForce}; +use std::alloc::System; + +#[global_allocator] +static GLOBAL: CountingAllocator = CountingAllocator::new(System); + +const WARMUP_OPS: u64 = 1_000; +const MEASURED_OPS: u64 = 10_000; +// Conservative ceiling. Mixed workload allocates per-op via `DashMap` +// shard-grow on early submissions plus per-resting-order +// `Arc` allocations. Real engines hit ~1-2 allocs/op +// amortised; this ceiling fires only on a 5x or worse regression. +const ALLOCS_PER_OP_CEILING: f64 = 10.0; + +fn account(byte: u8) -> Hash32 { + let mut bytes = [0u8; 32]; + bytes[0] = byte; + Hash32::new(bytes) +} + +fn run_workload(book: &OrderBook<()>, count: u64, base: u64) { + let acct = account(1); + for i in 0..count { + let id = Id::from_u64(base + i); + let bucket = (base + i) % 5; + match bucket { + 0..=2 => { + let _ = book.add_limit_order_with_user( + id, + 100 + (bucket as u128), + 1 + (i % 10), + Side::Buy, + TimeInForce::Gtc, + acct, + None, + ); + } + 3 => { + let target = Id::from_u64(base + i.saturating_sub(1)); + let _ = book.cancel_order(target); + } + _ => { + let _ = book.submit_market_order_with_user(id, 1, Side::Sell, acct); + } + } + } +} + +#[test] +fn alloc_budget_mixed_workload_stays_under_ceiling() { + let book = OrderBook::<()>::new("BUDGET"); + + // Seed liquidity so cancels and aggressive market orders find + // something to interact with. + for i in 0..50 { + let _ = book.add_limit_order_with_user( + Id::from_u64(1_000_000 + i), + 100, + 10, + Side::Sell, + TimeInForce::Gtc, + account(2), + None, + ); + } + + run_workload(&book, WARMUP_OPS, 1); + let before = GLOBAL.snapshot(); + run_workload(&book, MEASURED_OPS, WARMUP_OPS + 1); + let after = GLOBAL.snapshot(); + + let delta = after.since(before); + let allocs_per_op = delta.allocs as f64 / MEASURED_OPS as f64; + + assert!( + allocs_per_op < ALLOCS_PER_OP_CEILING, + "alloc-budget regression: {} allocs across {} ops = {:.4} allocs/op (ceiling {:.4})", + delta.allocs, + MEASURED_OPS, + allocs_per_op, + ALLOCS_PER_OP_CEILING, + ); +} diff --git a/tests/unit/replay_determinism.rs b/tests/unit/replay_determinism.rs index 65941db..897f728 100644 --- a/tests/unit/replay_determinism.rs +++ b/tests/unit/replay_determinism.rs @@ -5,7 +5,10 @@ #[cfg(feature = "journal")] mod replay_determinism { - use orderbook_rs::orderbook::sequencer::{InMemoryJournal, Journal, ReplayEngine, snapshots_match, SequencerCommand, SequencerEvent, SequencerResult}; + use orderbook_rs::orderbook::sequencer::{ + InMemoryJournal, Journal, ReplayEngine, SequencerCommand, SequencerEvent, SequencerResult, + snapshots_match, + }; use pricelevel::{Hash32, Id, OrderType, Price, Quantity, Side, TimeInForce, TimestampMs}; use proptest::prelude::*; @@ -59,7 +62,10 @@ mod replay_determinism { // Snapshots should match structurally (via snapshots_match oracle). let snap1 = book1.create_snapshot(usize::MAX); let snap2 = book2.create_snapshot(usize::MAX); - assert!(snapshots_match(&snap1, &snap2), "replayed snapshots should match"); + assert!( + snapshots_match(&snap1, &snap2), + "replayed snapshots should match" + ); } /// Proptest: random sequence of adds deterministically replays.