From 542318bf9309df900dc10bb8b90a48cc78e521fa Mon Sep 17 00:00:00 2001 From: anaslimem Date: Wed, 18 Mar 2026 21:50:39 +0100 Subject: [PATCH] Updated everything for the new stabel release --- Cargo.lock | 2 +- README.md | 25 ++++---- crates/cortexadb-core/Cargo.toml | 2 +- crates/cortexadb-core/src/engine.rs | 14 ++--- crates/cortexadb-core/src/lib.rs | 2 +- crates/cortexadb-py/cortexadb/client.py | 27 +++++---- crates/cortexadb-py/pyproject.toml | 2 +- docs/content/docs/resources/benchmarks.mdx | 66 +++++++++++++--------- docs/resources/benchmarks.md | 49 ++++++++++------ 9 files changed, 112 insertions(+), 77 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dd23ed1..2bc3703 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -158,7 +158,7 @@ checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "cortexadb-core" -version = "0.1.8" +version = "1.0.0" dependencies = [ "arc-swap", "bincode", diff --git a/README.md b/README.md index 62fff01..5e8ed27 100644 --- a/README.md +++ b/README.md @@ -10,8 +10,8 @@

License - Status - Version + Status + Version Downloads Documentation

@@ -82,23 +82,26 @@ pip install cortexadb[docs,pdf] # Optional: For PDF/Docx support
Technical Architecture & Benchmarks -### Performance Benchmarks (v0.1.8) +### Performance Benchmarks (v1.0.0) -CortexaDB `v0.1.8` introduced a new batching architecture. Measured on an M2 Mac with 1,000 chunks of text: +Measured on an M-series Mac — 10,000 embeddings × 384 dimensions. -| Operation | v0.1.6 (Sync) | v0.1.8 (Batch) | Improvement | -|-----------|---------------|----------------|-------------| -| Ingestion | 12.4s | **0.12s** | **103x Faster** | -| Memory Add| 15ms | 1ms | 15x Faster | -| HNSW Search| 0.3ms | 0.28ms | - | +| Operation | Latency / Time | +|-----------|---------------| +| Bulk Ingestion (1,000 chunks) | **0.12s** | +| Single Memory Add | **1ms** | +| HNSW Search p50 | **1.03ms** (debug) / ~0.3ms (release) | +| HNSW Recall | **95%** | + +See the [full benchmark docs](https://cortexa-db.vercel.app/docs/resources/benchmarks) for HNSW vs Exact comparison and how to reproduce.
--- ## License & Status -CortexaDB is currently in **Beta (v0.1.8)**. It is released under the **MIT** and **Apache-2.0** licenses. -We are actively refining the API and welcome feedback! +CortexaDB `v1.0.0` is a **stable release** available under the **MIT** and **Apache-2.0** licenses. +We welcome feedback and contributions! --- > *CortexaDB — Because agents shouldn't have to choose between speed and a soul (memory).* diff --git a/crates/cortexadb-core/Cargo.toml b/crates/cortexadb-core/Cargo.toml index 3d9b329..defa20c 100644 --- a/crates/cortexadb-core/Cargo.toml +++ b/crates/cortexadb-core/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cortexadb-core" -version = "0.1.8" +version = "1.0.0" edition = "2021" authors = ["Anas Limem "] description = "Fast, embedded vector + graph memory for AI agents" diff --git a/crates/cortexadb-core/src/engine.rs b/crates/cortexadb-core/src/engine.rs index 4041c3a..f5a613d 100644 --- a/crates/cortexadb-core/src/engine.rs +++ b/crates/cortexadb-core/src/engine.rs @@ -271,7 +271,7 @@ impl Engine { match &cmd { Command::Add(entry) => { // Write entry to segment storage - self._write_entry_to_segments(entry)?; + self.write_entry_to_segments(entry)?; } Command::Delete(id) => { // Mark as deleted in segments @@ -288,7 +288,7 @@ impl Engine { // In relaxed modes caller flushes later via sync policy. self.state_machine.apply_command(cmd)?; - // 5. Update tracking + // 3. Update tracking self.last_applied_id = cmd_id; Ok(cmd_id) @@ -425,8 +425,8 @@ impl Engine { collection_bytes + content_bytes + embedding_bytes + metadata_bytes } - /// Helper: Write entry to segments - fn _write_entry_to_segments( + /// Write entry to segments. + fn write_entry_to_segments( &mut self, entry: &crate::core::memory_entry::MemoryEntry, ) -> Result<()> { @@ -439,10 +439,8 @@ impl Engine { &self.state_machine } - /// Get mutable reference to the state machine - /// NOTE: If you modify state directly (not via execute_command), - /// you bypass WAL durability! Use execute_command() instead. - pub fn get_state_machine_mut(&mut self) -> &mut StateMachine { + /// Get mutable reference to the state machine + pub(crate) fn get_state_machine_mut(&mut self) -> &mut StateMachine { &mut self.state_machine } diff --git a/crates/cortexadb-core/src/lib.rs b/crates/cortexadb-core/src/lib.rs index f965f10..e568c0f 100644 --- a/crates/cortexadb-core/src/lib.rs +++ b/crates/cortexadb-core/src/lib.rs @@ -9,5 +9,5 @@ pub mod store; // Re-export the primary facade types for convenience. pub use chunker::{chunk, ChunkMetadata, ChunkResult, ChunkingStrategy}; -pub use facade::{CortexaDB, CortexaDBConfig, CortexaDBError, Memory, Stats}; +pub use facade::{BatchRecord, CortexaDB, CortexaDBBuilder, CortexaDBConfig, CortexaDBError, Hit, Memory, Stats}; pub use index::{HnswBackend, HnswConfig, HnswError, IndexMode, MetricKind}; diff --git a/crates/cortexadb-py/cortexadb/client.py b/crates/cortexadb-py/cortexadb/client.py index 6e139cf..a580e31 100644 --- a/crates/cortexadb-py/cortexadb/client.py +++ b/crates/cortexadb-py/cortexadb/client.py @@ -281,7 +281,8 @@ def search( if self.get(target_id).collection not in collections: continue scored_candidates[target_id] = max(scored_candidates.get(target_id, 0), hit.score * 0.9) - except: pass + except Exception: + pass if recency_bias: now = time.time() @@ -291,7 +292,8 @@ def search( age = max(0, now - mem.created_at) decay = 0.5 ** (age / (30 * 86400)) scored_candidates[obj_id] *= (1.0 + 0.2 * decay) - except: pass + except Exception: + pass final = [Hit(mid, s) for mid, s in scored_candidates.items()] final.sort(key=lambda h: h.score, reverse=True) @@ -311,28 +313,33 @@ def export_replay(self, path: str): """Export all memories to a replay log.""" from .replay import ReplayWriter writer = ReplayWriter(path, dimension=self._dimension) - report = {"checked": 0, "exported": 0, "skipped_missing_embedding": 0} - - # This is a bit slow as we iterate all IDs + report = {"checked": 0, "exported": 0, "skipped_missing_embedding": 0, "errors": []} + stats = self.stats() - for i in range(1, stats.entries + 1): + total_live = stats.entries + found = 0 + mid = 1 + scan_limit = max(total_live * 4, 1000) + while found < total_live and mid <= scan_limit: report["checked"] += 1 try: - mem = self.get(i) + mem = self.get(mid) if mem.embedding: writer.record_add( id=mem.id, text=bytes(mem.content).decode("utf-8") if mem.content else "", embedding=mem.embedding, collection=mem.collection, - metadata=mem.metadata + metadata=mem.metadata, ) report["exported"] += 1 else: report["skipped_missing_embedding"] += 1 - except: + found += 1 + except Exception: pass - + mid += 1 + writer.close() self._last_export_replay_report = report diff --git a/crates/cortexadb-py/pyproject.toml b/crates/cortexadb-py/pyproject.toml index da03786..3f6ae9e 100644 --- a/crates/cortexadb-py/pyproject.toml +++ b/crates/cortexadb-py/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "cortexadb" -version = "0.1.8" +version = "1.0.0" requires-python = ">=3.9" description = "Fast, embedded vector + graph memory for AI agents" authors = [ diff --git a/docs/content/docs/resources/benchmarks.mdx b/docs/content/docs/resources/benchmarks.mdx index 6f67aa3..2845759 100644 --- a/docs/content/docs/resources/benchmarks.mdx +++ b/docs/content/docs/resources/benchmarks.mdx @@ -1,22 +1,22 @@ --- title: Benchmarks -description: Performance benchmarks and methodology +description: Performance benchmarks and methodology for v1.0.0 --- -CortexaDB delivers sub-millisecond query latency and rapid ingestion, optimized for local agentic workflows. +CortexaDB delivers fast, local vector search optimized for AI agent memory workloads. Numbers below are from a **debug build** on an M-series Mac — a release build is 5–10x faster. ## Performance Overview -Key metrics measured with **10,000 embeddings** (384 dimensions) on an M1 Pro Mac. +Key metrics measured with **10,000 embeddings** (384 dimensions) on an M-series Mac, v1.0.0 debug build. - }> - Batch ingestion processed 1,000 chunks in **0.12s** (formerly 12.4s). + }> + HNSW search on 10,000 vectors. Release build achieves **~0.3ms** p50. - }> - High-throughput HNSW search with sub-millisecond p50 latency. + }> + HNSW throughput (debug build). Release build exceeds **3,000 QPS**. - }> + }> Approximate search maintains high accuracy relative to brute-force. @@ -25,47 +25,59 @@ Key metrics measured with **10,000 embeddings** (384 dimensions) on an M1 Pro Ma ## Retrieval Benchmarks -| Mode | Latency (p50) | Throughput | Recall | Index Time | -|------|---------------|------------|--------|------------| -| **HNSW** | **0.29ms** | **3,203 QPS** | 95% | 151s | -| Exact | 1.34ms | 690 QPS | 100% | 138s | +Measured: 10,000 embeddings × 384 dimensions, 1,000 queries, 100 warmup, top-10. -## Ingestion Benchmarks +| Mode | p50 | p95 | p99 | Throughput | Recall | Disk | +|------|-----|-----|-----|-----------|--------|------| +| **HNSW** | **1.03ms** | 1.18ms | 1.29ms | **952 QPS** | **95%** | 47 MB | +| Exact | 16.38ms | 22.69ms | 35.77ms | 56 QPS | 100% | 31 MB | -| Operation | Previous | Current | Speedup | -|-----------|----------|---------|---------| -| **Bulk Ingest** | 12.4s | **0.12s** | **103x** | -| Memory Add | 15ms | 1ms | 15x | -| HNSW Build | 151s | 151s | - | +> [!NOTE] +> These numbers are from a **debug build** (`maturin develop`). With a release build (`maturin develop --release`), HNSW achieves **~0.3ms p50** and **3,000+ QPS** — consistent with the ingestion benchmarks below. + +--- + +## Ingestion + +| Operation | Time | +|-----------|------| +| Bulk Ingest (1,000 chunks) | **0.12s** | +| Single Memory Add | **1ms** | +| HNSW Index Build (10,000 vectors) | ~286s (debug) / ~140s (release) | --- ## Methodology -- **Dataset**: 10,000 embeddings x 384 dimensions (Sentence-Transformers standard). -- **Environment**: MacBook Pro M1 Pro (16-core GPU, 32GB RAM). -- **Query Latency**: p50 measured across 1,000 queries after 100 warmup cycles. -- **Recall**: Percentage of HNSW results identical to brute-force exact scan. +- **Dataset**: 10,000 random embeddings × 384 dimensions. +- **Environment**: M-series Mac. Debug build via `maturin develop`. +- **Query Latency**: p50/p95/p99 measured across 1,000 queries after 100 warmup cycles. +- **Recall**: % of HNSW results identical to brute-force exact scan (100 queries, top-10). --- ## Reproducing Results -Build the release extension: +Build the release extension for best performance: ```bash cd crates/cortexadb-py maturin develop --release +cd ../.. +pip install numpy psutil ``` Run the automated benchmark suite: ```bash # Generate 10k test vectors -python benchmark/generate_embeddings.py --count 10000 --dimensions 384 +python3 benchmark/generate_embeddings.py --count 10000 --dimensions 384 # Benchmark HNSW performance -python benchmark/run_benchmark.py --index-mode hnsw +python3 benchmark/run_benchmark.py --index-mode hnsw + +# Benchmark Exact performance +python3 benchmark/run_benchmark.py --index-mode exact ``` --- @@ -76,11 +88,11 @@ python benchmark/run_benchmark.py --index-mode hnsw |--------|-----------|----------| | **Dataset Size** | < 10,000 entries | > 10,000 entries | | **Recall Needed** | 100% (Strict) | 95-99% (Semantic) | -| **Latency Target** | < 5ms | < 1ms | +| **Latency Target** | < 20ms (debug) / < 2ms (release) | < 5ms (debug) / < 1ms (release) | | **Resource Profile** | Minimum Memory | High Performance | > [!TIP] -> For datasets between 1k and 10k, **Exact mode** is often faster due to zero index-building overhead while maintaining sub-millisecond latency on modern CPUs. +> For datasets between 1k and 10k, **Exact mode** is often a good choice due to zero index-building overhead and 100% recall. HNSW shines at 10k+ entries where its sub-linear search complexity pays off. --- diff --git a/docs/resources/benchmarks.md b/docs/resources/benchmarks.md index 6398a11..c96591d 100644 --- a/docs/resources/benchmarks.md +++ b/docs/resources/benchmarks.md @@ -1,52 +1,67 @@ # Benchmarks -CortexaDB has been benchmarked with **10,000 embeddings** at **384 dimensions** (typical sentence-transformer size). +CortexaDB v1.0.0 benchmarked with **10,000 embeddings** at **384 dimensions** (typical sentence-transformer size) on an M-series Mac. + +> **Build mode note:** Numbers below are from a debug build. A release build (`maturin develop --release`) is 5–10x faster. ## Results -| Mode | Indexing Time | Query (p50) | Throughput | Recall | -|------|--------------|-------------|-----------|--------| -| Exact (baseline) | 138s | 1.34ms | 690 QPS | 100% | -| HNSW | 151s | 0.29ms | 3,203 QPS | 95% | +| Mode | Index Time | p50 | p95 | p99 | Throughput | Recall | +|------|-----------|-----|-----|-----|-----------|--------| +| **HNSW** | 286s | **1.03ms** | 1.18ms | 1.29ms | **952 QPS** | **95%** | +| Exact | 275s | 16.38ms | 22.69ms | 35.77ms | 56 QPS | 100% | + +**HNSW is ~16x faster than exact search (debug build) while maintaining 95% recall.** + +> With a release build (`maturin develop --release`), expect HNSW p50 ≈ 0.3ms and 3,000+ QPS. + +--- + +## Disk Usage -**HNSW is ~5x faster than exact search while maintaining 95% recall.** +| Mode | Disk Size | +|------|-----------| +| HNSW | 47 MB | +| Exact | 31 MB | --- ## Methodology -- **Dataset**: 10,000 embeddings x 384 dimensions (realistic sentence-transformer size) -- **Indexing**: Time to build fresh index from scratch -- **Query Latency**: p50/p95/p99 measured across 1,000 queries (after 100 warmup queries) -- **Recall**: Percentage of HNSW results that match brute-force exact search +- **Dataset**: 10,000 random embeddings × 384 dimensions +- **Environment**: M-series Mac, debug build via `maturin develop` +- **Indexing**: Time to add 10,000 vectors + `checkpoint()` to flush +- **Query Latency**: p50/p95/p99 across 1,000 queries after 100 warmup queries +- **Recall**: % of HNSW results that match brute-force exact scan (100 queries, top-10) --- -## Running Benchmarks +## Reproducing Results ### Prerequisites ```bash -# Build the Rust extension +# Build the Rust extension (release mode for published numbers) cd crates/cortexadb-py maturin develop --release cd ../.. +pip install numpy psutil ``` ### Generate Test Data ```bash -python benchmark/generate_embeddings.py --count 10000 --dimensions 384 +python3 benchmark/generate_embeddings.py --count 10000 --dimensions 384 ``` ### Run Benchmarks ```bash # Exact mode (baseline, 100% recall) -python benchmark/run_benchmark.py --index-mode exact +python3 benchmark/run_benchmark.py --index-mode exact # HNSW mode (fast, ~95% recall) -python benchmark/run_benchmark.py --index-mode hnsw +python3 benchmark/run_benchmark.py --index-mode hnsw ``` Results are saved to `benchmark/results/`. @@ -54,7 +69,7 @@ Results are saved to `benchmark/results/`. ### Custom Options ```bash -python benchmark/run_benchmark.py \ +python3 benchmark/run_benchmark.py \ --count 10000 \ --dimensions 384 \ --top-k 10 \ @@ -85,7 +100,7 @@ python benchmark/run_benchmark.py \ ### When to Use HNSW - Dataset over 10,000 entries -- Sub-millisecond latency is needed +- Sub-millisecond latency is needed (release build) - 95%+ recall is acceptable - High query throughput is needed