AVIDS2 · RaviTharuma · Mar 27, 2026 · Mar 29, 2026
diff --git a/src/memory/observations.ts b/src/memory/observations.ts
@@ -17,6 +17,8 @@ import {
   generateEmbedding,
   batchGenerateEmbeddings,
   getVectorDimensions,
+  hydrateIndex,
+  isEmbeddingEnabled,
   makeOramaObservationId,
 } from '../store/orama-store.js';
 import { saveObservationsJson, loadObservationsJson, saveIdCounter, loadIdCounter } from '../store/persistence.js';
@@ -562,8 +564,8 @@ export function suggestTopicKey(type: string, title: string): string {
 }
 
 /**
- * Reload observations into the Orama index.
- * Called during server startup to restore the search index.
+ * Reload observations into the Orama index with full corpus embeddings.
+ * Intended for explicit heavy rebuilds, not normal MCP startup.
  *
  * Optimization: uses batch embedding (ONNX processes 64 texts at a time)
  * instead of individual embed calls. This reduces startup CPU from minutes
@@ -641,6 +643,30 @@ export async function reindexObservations(): Promise<number> {
   return count;
 }
 
+/**
+ * Prepare the search index for startup and hot-reload without blocking on
+ * corpus-wide embedding generation.
+ *
+ * This hydrates the lexical/BM25 index immediately so MCP availability is not
+ * coupled to embedding provider throughput. Missing vectors are queued for the
+ * existing background backfill cycle.
+ */
+export async function prepareSearchIndex(): Promise<number> {
+  await resetDb();
+  const count = await hydrateIndex(observations as unknown as any[]);
+
+  vectorMissingIds.clear();
+  if (isEmbeddingEnabled()) {
+    for (const obs of observations) {
+      if ((obs.status ?? 'active') === 'active') {
+        vectorMissingIds.add(obs.id);
+      }
+    }
+  }
+
+  return count;
+}
+
 // ── Vector-missing observability & backfill ─────────────────────────
 
 /**

diff --git a/src/server.ts b/src/server.ts
@@ -20,8 +20,7 @@ import { watchFile } from 'node:fs';
 import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
 import { z } from 'zod';
 import { KnowledgeGraphManager } from './memory/graph.js';
-import { initObservations, storeObservation, reindexObservations, migrateProjectIds, getObservation } from './memory/observations.js';
-import { resetDb } from './store/orama-store.js';
+import { initObservations, storeObservation, prepareSearchIndex, migrateProjectIds, getObservation } from './memory/observations.js';
 import { createAutoRelations } from './memory/auto-relations.js';
 import { extractEntities } from './memory/entity-extractor.js';
 import { compactSearch, compactTimeline, compactDetail } from './compact/engine.js';
@@ -264,9 +263,9 @@ export async function createMemorixServer(
     await graphManager.init();
     await initObservations(projectDir);
 
-    const reindexed = await reindexObservations();
-    if (reindexed > 0) {
-      console.error(`[memorix] Reindexed ${reindexed} observations for project: ${project.id}`);
+    const indexed = await prepareSearchIndex();
+    if (indexed > 0) {
+      console.error(`[memorix] Prepared search index for ${indexed} observations in project: ${project.id}`);
     }
 
     const llmConfig = initLLM();
@@ -3286,11 +3285,10 @@ export async function createMemorixServer(
           if (reloading) return;
           reloading = true;
           try {
-            await resetDb();
             await initObservations(projectDir);
-            const count = await reindexObservations();
+            const count = await prepareSearchIndex();
             if (count > 0) {
-              console.error(`[memorix] Hot-reloaded ${count} observations (external write detected)`);
+              console.error(`[memorix] Hot-reloaded search index for ${count} observations (external write detected)`);
             }
           } catch { /* silent */ }
           reloading = false;

diff --git a/src/store/orama-store.ts b/src/store/orama-store.ts
@@ -223,7 +223,6 @@ export async function hydrateIndex(observations: any[]): Promise<number> {
   let inserted = 0;
   for (const obs of observations) {
     if (!obs || !obs.id || !obs.projectId) continue;
-    if ((obs.status ?? 'active') !== 'active') continue;
     try {
       const doc: MemorixDocument = {
         id: makeOramaObservationId(obs.projectId, obs.id),

diff --git a/tests/memory/prepare-search-index.test.ts b/tests/memory/prepare-search-index.test.ts
@@ -0,0 +1,139 @@
+import { beforeEach, describe, expect, it, vi } from 'vitest';
+
+const mockResetDb = vi.fn();
+const mockBatchGenerateEmbeddings = vi.fn();
+const mockHydrateIndex = vi.fn();
+const mockInsertObservation = vi.fn();
+const mockLoadObservationsJson = vi.fn();
+const mockLoadIdCounter = vi.fn();
+const mockIsEmbeddingEnabled = vi.fn();
+
+vi.mock('../../src/store/orama-store.js', () => ({
+  insertObservation: mockInsertObservation,
+  removeObservation: vi.fn(),
+  resetDb: mockResetDb,
+  generateEmbedding: vi.fn(),
+  batchGenerateEmbeddings: mockBatchGenerateEmbeddings,
+  hydrateIndex: mockHydrateIndex,
+  isEmbeddingEnabled: mockIsEmbeddingEnabled,
+  makeOramaObservationId: (projectId: string, observationId: number) => `${projectId}:${observationId}`,
+}));
+
+vi.mock('../../src/store/persistence.js', () => ({
+  saveObservationsJson: vi.fn(),
+  loadObservationsJson: mockLoadObservationsJson,
+  saveIdCounter: vi.fn(),
+  loadIdCounter: mockLoadIdCounter,
+}));
+
+vi.mock('../../src/store/file-lock.js', () => ({
+  withFileLock: async (_dir: string, fn: () => Promise<unknown>) => fn(),
+}));
+
+vi.mock('../../src/compact/token-budget.js', () => ({
+  countTextTokens: () => 0,
+}));
+
+vi.mock('../../src/memory/entity-extractor.js', () => ({
+  extractEntities: () => [],
+  enrichConcepts: (concepts: string[]) => concepts,
+}));
+
+describe('prepareSearchIndex', () => {
+  beforeEach(() => {
+    vi.resetModules();
+    mockResetDb.mockReset();
+    mockBatchGenerateEmbeddings.mockReset();
+    mockHydrateIndex.mockReset();
+    mockInsertObservation.mockReset();
+    mockLoadObservationsJson.mockReset();
+    mockLoadIdCounter.mockReset();
+    mockIsEmbeddingEnabled.mockReset();
+  });
+
+  it('hydrates the lexical index without triggering batch embeddings and queues active docs for backfill', async () => {
+    mockLoadObservationsJson.mockResolvedValue([
+      {
+        id: 1,
+        projectId: 'AVIDS2/memorix',
+        entityName: 'search-layer',
+        type: 'what-changed',
+        title: 'Prepared startup index',
+        narrative: 'Build lexical index first, defer vectors.',
+        facts: ['Startup should not block on embeddings'],
+        filesModified: ['src/server.ts'],
+        concepts: ['startup-index'],
+        tokens: 42,
+        createdAt: '2026-03-18T00:00:00.000Z',
+        status: 'active',
+        source: 'agent',
+      },
+      {
+        id: 2,
+        projectId: 'AVIDS2/memorix',
+        entityName: 'history',
+        type: 'decision',
+        title: 'Resolved old note',
+        narrative: 'Should stay out of the backfill queue.',
+        facts: [],
+        filesModified: [],
+        concepts: ['resolved'],
+        tokens: 12,
+        createdAt: '2026-03-18T00:00:01.000Z',
+        status: 'resolved',
+        source: 'agent',
+      },
+    ]);
+    mockLoadIdCounter.mockResolvedValue(3);
+    mockHydrateIndex.mockResolvedValue(2);
+    mockIsEmbeddingEnabled.mockReturnValue(true);
+
+    const { initObservations, prepareSearchIndex, getVectorMissingIds } = await import('../../src/memory/observations.js');
+
+    await initObservations('E:/tmp/project');
+    const count = await prepareSearchIndex();
+
+    expect(count).toBe(2);
+    expect(mockResetDb).toHaveBeenCalledOnce();
+    expect(mockHydrateIndex).toHaveBeenCalledOnce();
+    expect(mockHydrateIndex).toHaveBeenCalledWith(
+      expect.arrayContaining([
+        expect.objectContaining({ id: 1, title: 'Prepared startup index' }),
+        expect.objectContaining({ id: 2, title: 'Resolved old note' }),
+      ]),
+    );
+    expect(mockBatchGenerateEmbeddings).not.toHaveBeenCalled();
+    expect(getVectorMissingIds()).toEqual([1]);
+  });
+
+  it('leaves the backfill queue empty when vector search is not enabled', async () => {
+    mockLoadObservationsJson.mockResolvedValue([
+      {
+        id: 7,
+        projectId: 'AVIDS2/memorix',
+        entityName: 'fallback',
+        type: 'discovery',
+        title: 'Fulltext only startup',
+        narrative: 'Embedding provider disabled.',
+        facts: [],
+        filesModified: [],
+        concepts: ['bm25'],
+        tokens: 9,
+        createdAt: '2026-03-18T00:00:00.000Z',
+        status: 'active',
+        source: 'agent',
+      },
+    ]);
+    mockLoadIdCounter.mockResolvedValue(8);
+    mockHydrateIndex.mockResolvedValue(1);
+    mockIsEmbeddingEnabled.mockReturnValue(false);
+
+    const { initObservations, prepareSearchIndex, getVectorMissingIds } = await import('../../src/memory/observations.js');
+
+    await initObservations('E:/tmp/project');
+    await prepareSearchIndex();
+
+    expect(mockBatchGenerateEmbeddings).not.toHaveBeenCalled();
+    expect(getVectorMissingIds()).toEqual([]);
+  });
+});
diff --git a/tests/store/hydrate-index.test.ts b/tests/store/hydrate-index.test.ts
@@ -0,0 +1,94 @@
+import { describe, it, expect, beforeEach } from 'vitest';
+import { resetDb, hydrateIndex, makeOramaObservationId } from '../../src/store/orama-store.js';
+import { count, search } from '@orama/orama';
+
+// Minimal observation shape matching what hydrateIndex expects
+function makeObs(id: number, status: string, title: string) {
+  return {
+    id,
+    projectId: 'test/hydrate-project',
+    entityName: `entity-${id}`,
+    type: 'discovery',
+    title,
+    narrative: `Narrative for observation ${id}`,
+    facts: ['fact-a'],
+    filesModified: [],
+    concepts: ['test'],
+    tokens: 100,
+    createdAt: new Date().toISOString(),
+    accessCount: 0,
+    lastAccessedAt: '',
+    status,
+    source: 'agent',
+  };
+}
+
+describe('hydrateIndex – status handling', () => {
+  beforeEach(async () => {
+    await resetDb();
+  });
+
+  it('indexes active, resolved, AND archived observations', async () => {
+    const observations = [
+      makeObs(1, 'active', 'Active observation'),
+      makeObs(2, 'resolved', 'Resolved observation'),
+      makeObs(3, 'archived', 'Archived observation'),
+    ];
+
+    const inserted = await hydrateIndex(observations);
+    expect(inserted).toBe(3);
+  });
+
+  it('stores the status field faithfully in the index', async () => {
+    const observations = [
+      makeObs(10, 'active', 'Status active entry'),
+      makeObs(11, 'resolved', 'Status resolved entry'),
+      makeObs(12, 'archived', 'Status archived entry'),
+    ];
+
+    await hydrateIndex(observations);
+
+    // Import getDb dynamically to access the raw database for verification
+    const { getDb } = await import('../../src/store/orama-store.js');
+    const db = await getDb();
+
+    // Search for each status value to confirm they're indexed
+    const activeHits = await search(db, { term: 'Status active entry', properties: ['title'] });
+    const resolvedHits = await search(db, { term: 'Status resolved entry', properties: ['title'] });
+    const archivedHits = await search(db, { term: 'Status archived entry', properties: ['title'] });
+
+    expect(activeHits.count).toBeGreaterThanOrEqual(1);
+    expect(resolvedHits.count).toBeGreaterThanOrEqual(1);
+    expect(archivedHits.count).toBeGreaterThanOrEqual(1);
+  });
+
+  it('skips malformed observations without crashing', async () => {
+    const observations = [
+      makeObs(20, 'active', 'Good observation'),
+      null,
+      { id: null, projectId: 'x' },
+      { id: 21 }, // missing projectId
+      makeObs(22, 'resolved', 'Another good one'),
+    ];
+
+    const inserted = await hydrateIndex(observations as any[]);
+    expect(inserted).toBe(2);
+  });
+
+  it('is idempotent – second call is a no-op', async () => {
+    const observations = [
+      makeObs(30, 'active', 'First hydration'),
+      makeObs(31, 'resolved', 'First hydration resolved'),
+    ];
+
+    const first = await hydrateIndex(observations);
+    expect(first).toBe(2);
+
+    // Second call with more observations should return 0 (already hydrated)
+    const second = await hydrateIndex([
+      ...observations,
+      makeObs(32, 'archived', 'Late arrival'),
+    ]);
+    expect(second).toBe(0);
+  });
+});