Evilander · Evilander · Apr 28, 2026
diff --git a/.gitignore b/.gitignore
@@ -34,3 +34,4 @@ python/**/__pycache__/
 pip-*/
 build-env-*/
 build-reqs-*.txt
+.codex
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,35 @@
 # Changelog
 
+## 0.22.0 - 2026-04-28
+
+### Performance
+
+- Encode response time: 24.7ms to 15.2ms p50, about 40% faster.
+- Cold-start first encode: 525ms to 28ms with warmup, about 18.7x faster.
+- Hybrid recall: 30.2ms to 14.3ms p50, about 2.1x faster.
+- Eliminated 3 of 4 redundant embedding calls during encode. Validation, interference, and affect resonance now reuse the main content vector.
+
+### Added
+
+- Added `memory_encode.wait_for_consolidation` parameter, default `false`, for opt-in read-after-write semantics.
+- Added `memory_recall.retrieval` parameter with `"hybrid"` default, `"vector"`, and `"hybrid_strict"` modes.
+- Added `pending_consolidation_count`, `embedding_warm`, `warmup_duration_ms`, and `default_retrieval_mode` to `memory_status`.
+- Added background embedding pipeline warmup after MCP `server.connect()`.
+- Added `AUDREY_PROFILE=1` for per-stage timings in MCP `_meta.diagnostics`.
+- Added `AUDREY_DISABLE_WARMUP=1` to opt out of background embedding warmup.
+- Added `benchmarks/perf.bench.js` and `npm run bench:perf` as a mock-embedding CI perf gate.
+
+### Changed
+
+- Moved post-encode validation, interference, and affect resonance onto a serialized async queue so `memory_encode` no longer blocks on downstream consolidation work by default.
+- Folded recall's three healthy-store vec-table count queries into one SQL roundtrip before KNN.
+- Process shutdown now drains the post-encode consolidation queue with a 5-second timeout and logs pending row IDs if work remains.
+
+### Internal
+
+- Added `src/profile.ts` with `ProfileRecorder`.
+- Added `encodeWithDiagnostics()` and `recallWithDiagnostics()` for MCP profiling-mode response metadata.
+
 ## 0.21.0 - Release Diagnostics and Host Setup
 
 - Added `npx audrey doctor` for first-contact diagnostics, JSON automation, provider checks, MCP entrypoint validation, memory-store health, and host config generation.

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -40,7 +40,6 @@ Node `>=20` is required.
 If you update the README, examples, CLI behavior, or production guidance, keep those surfaces aligned:
 
 - `README.md`
-- `docs/production-readiness.md`
 - `examples/`
 
 ## Reporting Problems

diff --git a/README.md b/README.md
@@ -209,8 +209,6 @@ Production controls you still own:
 - Run `npx audrey dream` on a schedule so consolidation and decay stay current.
 - Add application-level encryption, retention, access control, and audit logging for regulated environments.
 
-Read the full guide: [docs/production-readiness.md](docs/production-readiness.md).
-
 ## Benchmarks
 
 Audrey ships with a benchmark harness and release gate:
@@ -224,7 +222,7 @@ Current repo snapshot:
 
 ![Audrey local benchmark](docs/assets/benchmarks/local-benchmark.svg)
 
-The benchmark suite covers retrieval behavior, overwrite behavior, delete/abstain behavior, and semantic/procedural merge behavior. For methodology and comparison anchors, see [docs/benchmarking.md](docs/benchmarking.md).
+The benchmark suite covers retrieval behavior, overwrite behavior, delete/abstain behavior, and semantic/procedural merge behavior.
 
 ## Command Reference
 
@@ -257,13 +255,8 @@ docker compose up -d --build
 
 ## Documentation
 
-- [Audrey for Dummies](docs/audrey-for-dummies.md)
-- [MCP host guide](docs/mcp-hosts.md)
-- [Ollama and local agents](docs/ollama-local-agents.md)
-- [Production readiness](docs/production-readiness.md)
-- [Future of LLM memory](docs/future-of-llm-memory.md)
-- [Benchmarking](docs/benchmarking.md)
 - [Security policy](SECURITY.md)
+- Public setup, runtime, benchmark, and command guidance is maintained in this README.
 
 ## Development
 

diff --git a/SECURITY.md b/SECURITY.md
@@ -16,7 +16,6 @@ Do not open a public GitHub issue for a security vulnerability.
 Report vulnerabilities through one of these channels:
 
 - GitHub Security Advisories for this repository
-- email: `j.tyler.eveland@gmail.com`
 
 Include:
 

diff --git a/benchmarks/perf.bench.js b/benchmarks/perf.bench.js
@@ -0,0 +1,141 @@
+import { performance } from 'node:perf_hooks';
+import { mkdtempSync, rmSync } from 'node:fs';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+import { pathToFileURL } from 'node:url';
+import { Audrey } from '../dist/src/index.js';
+
+const RUNS = 20;
+
+// Budget source: CHANGELOG.md#0220---2026-04-28, from the Audrey/MemoryGym
+// latency pass. This mock-provider gate catches mechanical regressions in
+// Audrey CI before live GPU benchmarks or MemoryGym release gates find them.
+export const PERF_BUDGETS = Object.freeze({
+  encodeResponseP95Ms: 50,
+  hybridRecallP95Ms: 25,
+  queueProcessingP50Ms: 5,
+});
+
+function roundMs(value) {
+  return Math.round(value * 1000) / 1000;
+}
+
+function percentile(values, percentileRank) {
+  if (values.length === 0) return 0;
+  const sorted = [...values].sort((a, b) => a - b);
+  const index = Math.min(sorted.length - 1, Math.ceil((percentileRank / 100) * sorted.length) - 1);
+  return sorted[index];
+}
+
+function stats(values) {
+  if (values.length === 0) {
+    return { p50: 0, p95: 0, min: 0, max: 0 };
+  }
+  return {
+    p50: roundMs(percentile(values, 50)),
+    p95: roundMs(percentile(values, 95)),
+    min: roundMs(Math.min(...values)),
+    max: roundMs(Math.max(...values)),
+  };
+}
+
+function assertBudget(name, actual, budget) {
+  if (actual >= budget) {
+    throw new Error(`${name} ${actual}ms exceeded budget ${budget}ms`);
+  }
+}
+
+function seedContent(index) {
+  const cases = [
+    'Stripe API returned HTTP 429 during checkout retry and needs exponential backoff.',
+    'Project memory routing should prefer Audrey MCP for durable agent context.',
+    'Tool trace learning marks repeated npm spawn EPERM failures as risky on Windows shells.',
+    'Calendar authority should come from the official source before inferred user notes.',
+    'Vector recall is faster but loses BM25 lexical signal on exact identifiers.',
+  ];
+  return `${cases[index % cases.length]} Perf sample ${index}.`;
+}
+
+export async function runPerfBenchmark({
+  runs = RUNS,
+  budgets = PERF_BUDGETS,
+  out = console.log,
+} = {}) {
+  const dataDir = mkdtempSync(join(tmpdir(), 'audrey-perf-'));
+  const audrey = new Audrey({
+    dataDir,
+    agent: 'perf-bench',
+    embedding: { provider: 'mock', dimensions: 64 },
+    llm: { provider: 'mock' },
+  });
+
+  const queueProcessingTimes = [];
+  audrey.on('post-encode-complete', event => {
+    queueProcessingTimes.push(event.processing_ms);
+  });
+
+  try {
+    const encodeTimes = [];
+    for (let i = 0; i < runs; i += 1) {
+      const startedAt = performance.now();
+      await audrey.encode({
+        content: seedContent(i),
+        source: 'direct-observation',
+        tags: ['perf-gate'],
+        affect: { valence: i % 2 === 0 ? 0.3 : -0.1, arousal: 0.2 },
+      });
+      encodeTimes.push(performance.now() - startedAt);
+    }
+
+    const drain = await audrey.drainPostEncodeQueue(5000);
+    if (!drain.drained) {
+      throw new Error(`post-encode queue did not drain: ${drain.pendingIds.join(', ')}`);
+    }
+
+    const recallTimes = [];
+    for (let i = 0; i < runs; i += 1) {
+      const startedAt = performance.now();
+      await audrey.recall('Stripe API 429 retry memory routing', {
+        limit: 5,
+        retrieval: 'hybrid',
+      });
+      recallTimes.push(performance.now() - startedAt);
+    }
+
+    const result = {
+      runs,
+      budgets,
+      encode_response_ms: stats(encodeTimes),
+      hybrid_recall_ms: stats(recallTimes),
+      queue_processing_ms: stats(queueProcessingTimes),
+      queue_events: queueProcessingTimes.length,
+      status: {
+        pending_consolidation_count: audrey.memoryStatus().pending_consolidation_count,
+        default_retrieval_mode: audrey.memoryStatus().default_retrieval_mode,
+      },
+    };
+
+    if (queueProcessingTimes.length !== runs) {
+      throw new Error(`expected ${runs} post-encode queue events, got ${queueProcessingTimes.length}`);
+    }
+
+    assertBudget('encode response p95', result.encode_response_ms.p95, budgets.encodeResponseP95Ms);
+    assertBudget('hybrid recall p95', result.hybrid_recall_ms.p95, budgets.hybridRecallP95Ms);
+    assertBudget('queue processing p50', result.queue_processing_ms.p50, budgets.queueProcessingP50Ms);
+
+    out(`Audrey perf gate passed: encode p95=${result.encode_response_ms.p95}ms, `
+      + `hybrid recall p95=${result.hybrid_recall_ms.p95}ms, `
+      + `queue p50=${result.queue_processing_ms.p50}ms`);
+    return result;
+  } finally {
+    audrey.close();
+    rmSync(dataDir, { recursive: true, force: true });
+  }
+}
+
+if (process.argv[1] && import.meta.url === pathToFileURL(process.argv[1]).href) {
+  runPerfBenchmark().catch(err => {
+    console.error('[audrey] perf gate failed:', err);
+    process.exit(1);
+  });
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -34,3 +34,4 @@ python/**/__pycache__/ @@
     pip-*/
     build-env-*/
     build-reqs-*.txt
+    .codex