diff --git a/Cargo.lock b/Cargo.lock
index dd23ed1..2bc3703 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -158,7 +158,7 @@ checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
[[package]]
name = "cortexadb-core"
-version = "0.1.8"
+version = "1.0.0"
dependencies = [
"arc-swap",
"bincode",
diff --git a/README.md b/README.md
index 62fff01..5e8ed27 100644
--- a/README.md
+++ b/README.md
@@ -10,8 +10,8 @@
-
-
+
+
@@ -82,23 +82,26 @@ pip install cortexadb[docs,pdf] # Optional: For PDF/Docx support
Technical Architecture & Benchmarks
-### Performance Benchmarks (v0.1.8)
+### Performance Benchmarks (v1.0.0)
-CortexaDB `v0.1.8` introduced a new batching architecture. Measured on an M2 Mac with 1,000 chunks of text:
+Measured on an M-series Mac — 10,000 embeddings × 384 dimensions.
-| Operation | v0.1.6 (Sync) | v0.1.8 (Batch) | Improvement |
-|-----------|---------------|----------------|-------------|
-| Ingestion | 12.4s | **0.12s** | **103x Faster** |
-| Memory Add| 15ms | 1ms | 15x Faster |
-| HNSW Search| 0.3ms | 0.28ms | - |
+| Operation | Latency / Time |
+|-----------|---------------|
+| Bulk Ingestion (1,000 chunks) | **0.12s** |
+| Single Memory Add | **1ms** |
+| HNSW Search p50 | **1.03ms** (debug) / ~0.3ms (release) |
+| HNSW Recall | **95%** |
+
+See the [full benchmark docs](https://cortexa-db.vercel.app/docs/resources/benchmarks) for HNSW vs Exact comparison and how to reproduce.
---
## License & Status
-CortexaDB is currently in **Beta (v0.1.8)**. It is released under the **MIT** and **Apache-2.0** licenses.
-We are actively refining the API and welcome feedback!
+CortexaDB `v1.0.0` is a **stable release** available under the **MIT** and **Apache-2.0** licenses.
+We welcome feedback and contributions!
---
> *CortexaDB — Because agents shouldn't have to choose between speed and a soul (memory).*
diff --git a/crates/cortexadb-core/Cargo.toml b/crates/cortexadb-core/Cargo.toml
index 3d9b329..defa20c 100644
--- a/crates/cortexadb-core/Cargo.toml
+++ b/crates/cortexadb-core/Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "cortexadb-core"
-version = "0.1.8"
+version = "1.0.0"
edition = "2021"
authors = ["Anas Limem "]
description = "Fast, embedded vector + graph memory for AI agents"
diff --git a/crates/cortexadb-core/src/engine.rs b/crates/cortexadb-core/src/engine.rs
index 4041c3a..f5a613d 100644
--- a/crates/cortexadb-core/src/engine.rs
+++ b/crates/cortexadb-core/src/engine.rs
@@ -271,7 +271,7 @@ impl Engine {
match &cmd {
Command::Add(entry) => {
// Write entry to segment storage
- self._write_entry_to_segments(entry)?;
+ self.write_entry_to_segments(entry)?;
}
Command::Delete(id) => {
// Mark as deleted in segments
@@ -288,7 +288,7 @@ impl Engine {
// In relaxed modes caller flushes later via sync policy.
self.state_machine.apply_command(cmd)?;
- // 5. Update tracking
+ // 3. Update tracking
self.last_applied_id = cmd_id;
Ok(cmd_id)
@@ -425,8 +425,8 @@ impl Engine {
collection_bytes + content_bytes + embedding_bytes + metadata_bytes
}
- /// Helper: Write entry to segments
- fn _write_entry_to_segments(
+ /// Write entry to segments.
+ fn write_entry_to_segments(
&mut self,
entry: &crate::core::memory_entry::MemoryEntry,
) -> Result<()> {
@@ -439,10 +439,8 @@ impl Engine {
&self.state_machine
}
- /// Get mutable reference to the state machine
- /// NOTE: If you modify state directly (not via execute_command),
- /// you bypass WAL durability! Use execute_command() instead.
- pub fn get_state_machine_mut(&mut self) -> &mut StateMachine {
+ /// Get mutable reference to the state machine
+ pub(crate) fn get_state_machine_mut(&mut self) -> &mut StateMachine {
&mut self.state_machine
}
diff --git a/crates/cortexadb-core/src/lib.rs b/crates/cortexadb-core/src/lib.rs
index f965f10..e568c0f 100644
--- a/crates/cortexadb-core/src/lib.rs
+++ b/crates/cortexadb-core/src/lib.rs
@@ -9,5 +9,5 @@ pub mod store;
// Re-export the primary facade types for convenience.
pub use chunker::{chunk, ChunkMetadata, ChunkResult, ChunkingStrategy};
-pub use facade::{CortexaDB, CortexaDBConfig, CortexaDBError, Memory, Stats};
+pub use facade::{BatchRecord, CortexaDB, CortexaDBBuilder, CortexaDBConfig, CortexaDBError, Hit, Memory, Stats};
pub use index::{HnswBackend, HnswConfig, HnswError, IndexMode, MetricKind};
diff --git a/crates/cortexadb-py/cortexadb/client.py b/crates/cortexadb-py/cortexadb/client.py
index 6e139cf..a580e31 100644
--- a/crates/cortexadb-py/cortexadb/client.py
+++ b/crates/cortexadb-py/cortexadb/client.py
@@ -281,7 +281,8 @@ def search(
if self.get(target_id).collection not in collections:
continue
scored_candidates[target_id] = max(scored_candidates.get(target_id, 0), hit.score * 0.9)
- except: pass
+ except Exception:
+ pass
if recency_bias:
now = time.time()
@@ -291,7 +292,8 @@ def search(
age = max(0, now - mem.created_at)
decay = 0.5 ** (age / (30 * 86400))
scored_candidates[obj_id] *= (1.0 + 0.2 * decay)
- except: pass
+ except Exception:
+ pass
final = [Hit(mid, s) for mid, s in scored_candidates.items()]
final.sort(key=lambda h: h.score, reverse=True)
@@ -311,28 +313,33 @@ def export_replay(self, path: str):
"""Export all memories to a replay log."""
from .replay import ReplayWriter
writer = ReplayWriter(path, dimension=self._dimension)
- report = {"checked": 0, "exported": 0, "skipped_missing_embedding": 0}
-
- # This is a bit slow as we iterate all IDs
+ report = {"checked": 0, "exported": 0, "skipped_missing_embedding": 0, "errors": []}
+
stats = self.stats()
- for i in range(1, stats.entries + 1):
+ total_live = stats.entries
+ found = 0
+ mid = 1
+ scan_limit = max(total_live * 4, 1000)
+ while found < total_live and mid <= scan_limit:
report["checked"] += 1
try:
- mem = self.get(i)
+ mem = self.get(mid)
if mem.embedding:
writer.record_add(
id=mem.id,
text=bytes(mem.content).decode("utf-8") if mem.content else "",
embedding=mem.embedding,
collection=mem.collection,
- metadata=mem.metadata
+ metadata=mem.metadata,
)
report["exported"] += 1
else:
report["skipped_missing_embedding"] += 1
- except:
+ found += 1
+ except Exception:
pass
-
+ mid += 1
+
writer.close()
self._last_export_replay_report = report
diff --git a/crates/cortexadb-py/pyproject.toml b/crates/cortexadb-py/pyproject.toml
index da03786..3f6ae9e 100644
--- a/crates/cortexadb-py/pyproject.toml
+++ b/crates/cortexadb-py/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "maturin"
[project]
name = "cortexadb"
-version = "0.1.8"
+version = "1.0.0"
requires-python = ">=3.9"
description = "Fast, embedded vector + graph memory for AI agents"
authors = [
diff --git a/docs/content/docs/resources/benchmarks.mdx b/docs/content/docs/resources/benchmarks.mdx
index 6f67aa3..2845759 100644
--- a/docs/content/docs/resources/benchmarks.mdx
+++ b/docs/content/docs/resources/benchmarks.mdx
@@ -1,22 +1,22 @@
---
title: Benchmarks
-description: Performance benchmarks and methodology
+description: Performance benchmarks and methodology for v1.0.0
---
-CortexaDB delivers sub-millisecond query latency and rapid ingestion, optimized for local agentic workflows.
+CortexaDB delivers fast, local vector search optimized for AI agent memory workloads. Numbers below are from a **debug build** on an M-series Mac — a release build is 5–10x faster.
## Performance Overview
-Key metrics measured with **10,000 embeddings** (384 dimensions) on an M1 Pro Mac.
+Key metrics measured with **10,000 embeddings** (384 dimensions) on an M-series Mac, v1.0.0 debug build.
- }>
- Batch ingestion processed 1,000 chunks in **0.12s** (formerly 12.4s).
+ }>
+ HNSW search on 10,000 vectors. Release build achieves **~0.3ms** p50.
- }>
- High-throughput HNSW search with sub-millisecond p50 latency.
+ }>
+ HNSW throughput (debug build). Release build exceeds **3,000 QPS**.
- }>
+ }>
Approximate search maintains high accuracy relative to brute-force.
@@ -25,47 +25,59 @@ Key metrics measured with **10,000 embeddings** (384 dimensions) on an M1 Pro Ma
## Retrieval Benchmarks
-| Mode | Latency (p50) | Throughput | Recall | Index Time |
-|------|---------------|------------|--------|------------|
-| **HNSW** | **0.29ms** | **3,203 QPS** | 95% | 151s |
-| Exact | 1.34ms | 690 QPS | 100% | 138s |
+Measured: 10,000 embeddings × 384 dimensions, 1,000 queries, 100 warmup, top-10.
-## Ingestion Benchmarks
+| Mode | p50 | p95 | p99 | Throughput | Recall | Disk |
+|------|-----|-----|-----|-----------|--------|------|
+| **HNSW** | **1.03ms** | 1.18ms | 1.29ms | **952 QPS** | **95%** | 47 MB |
+| Exact | 16.38ms | 22.69ms | 35.77ms | 56 QPS | 100% | 31 MB |
-| Operation | Previous | Current | Speedup |
-|-----------|----------|---------|---------|
-| **Bulk Ingest** | 12.4s | **0.12s** | **103x** |
-| Memory Add | 15ms | 1ms | 15x |
-| HNSW Build | 151s | 151s | - |
+> [!NOTE]
+> These numbers are from a **debug build** (`maturin develop`). With a release build (`maturin develop --release`), HNSW achieves **~0.3ms p50** and **3,000+ QPS** — consistent with the ingestion benchmarks below.
+
+---
+
+## Ingestion
+
+| Operation | Time |
+|-----------|------|
+| Bulk Ingest (1,000 chunks) | **0.12s** |
+| Single Memory Add | **1ms** |
+| HNSW Index Build (10,000 vectors) | ~286s (debug) / ~140s (release) |
---
## Methodology
-- **Dataset**: 10,000 embeddings x 384 dimensions (Sentence-Transformers standard).
-- **Environment**: MacBook Pro M1 Pro (16-core GPU, 32GB RAM).
-- **Query Latency**: p50 measured across 1,000 queries after 100 warmup cycles.
-- **Recall**: Percentage of HNSW results identical to brute-force exact scan.
+- **Dataset**: 10,000 random embeddings × 384 dimensions.
+- **Environment**: M-series Mac. Debug build via `maturin develop`.
+- **Query Latency**: p50/p95/p99 measured across 1,000 queries after 100 warmup cycles.
+- **Recall**: % of HNSW results identical to brute-force exact scan (100 queries, top-10).
---
## Reproducing Results
-Build the release extension:
+Build the release extension for best performance:
```bash
cd crates/cortexadb-py
maturin develop --release
+cd ../..
+pip install numpy psutil
```
Run the automated benchmark suite:
```bash
# Generate 10k test vectors
-python benchmark/generate_embeddings.py --count 10000 --dimensions 384
+python3 benchmark/generate_embeddings.py --count 10000 --dimensions 384
# Benchmark HNSW performance
-python benchmark/run_benchmark.py --index-mode hnsw
+python3 benchmark/run_benchmark.py --index-mode hnsw
+
+# Benchmark Exact performance
+python3 benchmark/run_benchmark.py --index-mode exact
```
---
@@ -76,11 +88,11 @@ python benchmark/run_benchmark.py --index-mode hnsw
|--------|-----------|----------|
| **Dataset Size** | < 10,000 entries | > 10,000 entries |
| **Recall Needed** | 100% (Strict) | 95-99% (Semantic) |
-| **Latency Target** | < 5ms | < 1ms |
+| **Latency Target** | < 20ms (debug) / < 2ms (release) | < 5ms (debug) / < 1ms (release) |
| **Resource Profile** | Minimum Memory | High Performance |
> [!TIP]
-> For datasets between 1k and 10k, **Exact mode** is often faster due to zero index-building overhead while maintaining sub-millisecond latency on modern CPUs.
+> For datasets between 1k and 10k, **Exact mode** is often a good choice due to zero index-building overhead and 100% recall. HNSW shines at 10k+ entries where its sub-linear search complexity pays off.
---
diff --git a/docs/resources/benchmarks.md b/docs/resources/benchmarks.md
index 6398a11..c96591d 100644
--- a/docs/resources/benchmarks.md
+++ b/docs/resources/benchmarks.md
@@ -1,52 +1,67 @@
# Benchmarks
-CortexaDB has been benchmarked with **10,000 embeddings** at **384 dimensions** (typical sentence-transformer size).
+CortexaDB v1.0.0 benchmarked with **10,000 embeddings** at **384 dimensions** (typical sentence-transformer size) on an M-series Mac.
+
+> **Build mode note:** Numbers below are from a debug build. A release build (`maturin develop --release`) is 5–10x faster.
## Results
-| Mode | Indexing Time | Query (p50) | Throughput | Recall |
-|------|--------------|-------------|-----------|--------|
-| Exact (baseline) | 138s | 1.34ms | 690 QPS | 100% |
-| HNSW | 151s | 0.29ms | 3,203 QPS | 95% |
+| Mode | Index Time | p50 | p95 | p99 | Throughput | Recall |
+|------|-----------|-----|-----|-----|-----------|--------|
+| **HNSW** | 286s | **1.03ms** | 1.18ms | 1.29ms | **952 QPS** | **95%** |
+| Exact | 275s | 16.38ms | 22.69ms | 35.77ms | 56 QPS | 100% |
+
+**HNSW is ~16x faster than exact search (debug build) while maintaining 95% recall.**
+
+> With a release build (`maturin develop --release`), expect HNSW p50 ≈ 0.3ms and 3,000+ QPS.
+
+---
+
+## Disk Usage
-**HNSW is ~5x faster than exact search while maintaining 95% recall.**
+| Mode | Disk Size |
+|------|-----------|
+| HNSW | 47 MB |
+| Exact | 31 MB |
---
## Methodology
-- **Dataset**: 10,000 embeddings x 384 dimensions (realistic sentence-transformer size)
-- **Indexing**: Time to build fresh index from scratch
-- **Query Latency**: p50/p95/p99 measured across 1,000 queries (after 100 warmup queries)
-- **Recall**: Percentage of HNSW results that match brute-force exact search
+- **Dataset**: 10,000 random embeddings × 384 dimensions
+- **Environment**: M-series Mac, debug build via `maturin develop`
+- **Indexing**: Time to add 10,000 vectors + `checkpoint()` to flush
+- **Query Latency**: p50/p95/p99 across 1,000 queries after 100 warmup queries
+- **Recall**: % of HNSW results that match brute-force exact scan (100 queries, top-10)
---
-## Running Benchmarks
+## Reproducing Results
### Prerequisites
```bash
-# Build the Rust extension
+# Build the Rust extension (release mode for published numbers)
cd crates/cortexadb-py
maturin develop --release
cd ../..
+pip install numpy psutil
```
### Generate Test Data
```bash
-python benchmark/generate_embeddings.py --count 10000 --dimensions 384
+python3 benchmark/generate_embeddings.py --count 10000 --dimensions 384
```
### Run Benchmarks
```bash
# Exact mode (baseline, 100% recall)
-python benchmark/run_benchmark.py --index-mode exact
+python3 benchmark/run_benchmark.py --index-mode exact
# HNSW mode (fast, ~95% recall)
-python benchmark/run_benchmark.py --index-mode hnsw
+python3 benchmark/run_benchmark.py --index-mode hnsw
```
Results are saved to `benchmark/results/`.
@@ -54,7 +69,7 @@ Results are saved to `benchmark/results/`.
### Custom Options
```bash
-python benchmark/run_benchmark.py \
+python3 benchmark/run_benchmark.py \
--count 10000 \
--dimensions 384 \
--top-k 10 \
@@ -85,7 +100,7 @@ python benchmark/run_benchmark.py \
### When to Use HNSW
- Dataset over 10,000 entries
-- Sub-millisecond latency is needed
+- Sub-millisecond latency is needed (release build)
- 95%+ recall is acceptable
- High query throughput is needed