Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 28 additions & 2 deletions src/memory/observations.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ import {
generateEmbedding,
batchGenerateEmbeddings,
getVectorDimensions,
hydrateIndex,
isEmbeddingEnabled,
makeOramaObservationId,
} from '../store/orama-store.js';
import { saveObservationsJson, loadObservationsJson, saveIdCounter, loadIdCounter } from '../store/persistence.js';
Expand Down Expand Up @@ -562,8 +564,8 @@ export function suggestTopicKey(type: string, title: string): string {
}

/**
* Reload observations into the Orama index.
* Called during server startup to restore the search index.
* Reload observations into the Orama index with full corpus embeddings.
* Intended for explicit heavy rebuilds, not normal MCP startup.
*
* Optimization: uses batch embedding (ONNX processes 64 texts at a time)
* instead of individual embed calls. This reduces startup CPU from minutes
Expand Down Expand Up @@ -641,6 +643,30 @@ export async function reindexObservations(): Promise<number> {
return count;
}

/**
* Prepare the search index for startup and hot-reload without blocking on
* corpus-wide embedding generation.
*
* This hydrates the lexical/BM25 index immediately so MCP availability is not
* coupled to embedding provider throughput. Missing vectors are queued for the
* existing background backfill cycle.
*/
export async function prepareSearchIndex(): Promise<number> {
await resetDb();
const count = await hydrateIndex(observations as unknown as any[]);

vectorMissingIds.clear();
if (isEmbeddingEnabled()) {
for (const obs of observations) {
if ((obs.status ?? 'active') === 'active') {
vectorMissingIds.add(obs.id);
}
}
}

return count;
}

// ── Vector-missing observability & backfill ─────────────────────────

/**
Expand Down
14 changes: 6 additions & 8 deletions src/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@ import { watchFile } from 'node:fs';
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
import { z } from 'zod';
import { KnowledgeGraphManager } from './memory/graph.js';
import { initObservations, storeObservation, reindexObservations, migrateProjectIds, getObservation } from './memory/observations.js';
import { resetDb } from './store/orama-store.js';
import { initObservations, storeObservation, prepareSearchIndex, migrateProjectIds, getObservation } from './memory/observations.js';
import { createAutoRelations } from './memory/auto-relations.js';
import { extractEntities } from './memory/entity-extractor.js';
import { compactSearch, compactTimeline, compactDetail } from './compact/engine.js';
Expand Down Expand Up @@ -264,9 +263,9 @@ export async function createMemorixServer(
await graphManager.init();
await initObservations(projectDir);

const reindexed = await reindexObservations();
if (reindexed > 0) {
console.error(`[memorix] Reindexed ${reindexed} observations for project: ${project.id}`);
const indexed = await prepareSearchIndex();
if (indexed > 0) {
console.error(`[memorix] Prepared search index for ${indexed} observations in project: ${project.id}`);
}

const llmConfig = initLLM();
Expand Down Expand Up @@ -3286,11 +3285,10 @@ export async function createMemorixServer(
if (reloading) return;
reloading = true;
try {
await resetDb();
await initObservations(projectDir);
const count = await reindexObservations();
const count = await prepareSearchIndex();
if (count > 0) {
console.error(`[memorix] Hot-reloaded ${count} observations (external write detected)`);
console.error(`[memorix] Hot-reloaded search index for ${count} observations (external write detected)`);
}
} catch { /* silent */ }
reloading = false;
Expand Down
1 change: 0 additions & 1 deletion src/store/orama-store.ts
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,6 @@ export async function hydrateIndex(observations: any[]): Promise<number> {
let inserted = 0;
for (const obs of observations) {
if (!obs || !obs.id || !obs.projectId) continue;
if ((obs.status ?? 'active') !== 'active') continue;
try {
const doc: MemorixDocument = {
id: makeOramaObservationId(obs.projectId, obs.id),
Expand Down
139 changes: 139 additions & 0 deletions tests/memory/prepare-search-index.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
import { beforeEach, describe, expect, it, vi } from 'vitest';

const mockResetDb = vi.fn();
const mockBatchGenerateEmbeddings = vi.fn();
const mockHydrateIndex = vi.fn();
const mockInsertObservation = vi.fn();
const mockLoadObservationsJson = vi.fn();
const mockLoadIdCounter = vi.fn();
const mockIsEmbeddingEnabled = vi.fn();

vi.mock('../../src/store/orama-store.js', () => ({
insertObservation: mockInsertObservation,
removeObservation: vi.fn(),
resetDb: mockResetDb,
generateEmbedding: vi.fn(),
batchGenerateEmbeddings: mockBatchGenerateEmbeddings,
hydrateIndex: mockHydrateIndex,
isEmbeddingEnabled: mockIsEmbeddingEnabled,
makeOramaObservationId: (projectId: string, observationId: number) => `${projectId}:${observationId}`,
}));

vi.mock('../../src/store/persistence.js', () => ({
saveObservationsJson: vi.fn(),
loadObservationsJson: mockLoadObservationsJson,
saveIdCounter: vi.fn(),
loadIdCounter: mockLoadIdCounter,
}));

vi.mock('../../src/store/file-lock.js', () => ({
withFileLock: async (_dir: string, fn: () => Promise<unknown>) => fn(),
}));

vi.mock('../../src/compact/token-budget.js', () => ({
countTextTokens: () => 0,
}));

vi.mock('../../src/memory/entity-extractor.js', () => ({
extractEntities: () => [],
enrichConcepts: (concepts: string[]) => concepts,
}));

describe('prepareSearchIndex', () => {
beforeEach(() => {
vi.resetModules();
mockResetDb.mockReset();
mockBatchGenerateEmbeddings.mockReset();
mockHydrateIndex.mockReset();
mockInsertObservation.mockReset();
mockLoadObservationsJson.mockReset();
mockLoadIdCounter.mockReset();
mockIsEmbeddingEnabled.mockReset();
});

it('hydrates the lexical index without triggering batch embeddings and queues active docs for backfill', async () => {
mockLoadObservationsJson.mockResolvedValue([
{
id: 1,
projectId: 'AVIDS2/memorix',
entityName: 'search-layer',
type: 'what-changed',
title: 'Prepared startup index',
narrative: 'Build lexical index first, defer vectors.',
facts: ['Startup should not block on embeddings'],
filesModified: ['src/server.ts'],
concepts: ['startup-index'],
tokens: 42,
createdAt: '2026-03-18T00:00:00.000Z',
status: 'active',
source: 'agent',
},
{
id: 2,
projectId: 'AVIDS2/memorix',
entityName: 'history',
type: 'decision',
title: 'Resolved old note',
narrative: 'Should stay out of the backfill queue.',
facts: [],
filesModified: [],
concepts: ['resolved'],
tokens: 12,
createdAt: '2026-03-18T00:00:01.000Z',
status: 'resolved',
source: 'agent',
},
]);
mockLoadIdCounter.mockResolvedValue(3);
mockHydrateIndex.mockResolvedValue(2);
mockIsEmbeddingEnabled.mockReturnValue(true);

const { initObservations, prepareSearchIndex, getVectorMissingIds } = await import('../../src/memory/observations.js');

await initObservations('E:/tmp/project');
const count = await prepareSearchIndex();

expect(count).toBe(2);
expect(mockResetDb).toHaveBeenCalledOnce();
expect(mockHydrateIndex).toHaveBeenCalledOnce();
expect(mockHydrateIndex).toHaveBeenCalledWith(
expect.arrayContaining([
expect.objectContaining({ id: 1, title: 'Prepared startup index' }),
expect.objectContaining({ id: 2, title: 'Resolved old note' }),
]),
);
expect(mockBatchGenerateEmbeddings).not.toHaveBeenCalled();
expect(getVectorMissingIds()).toEqual([1]);
});

it('leaves the backfill queue empty when vector search is not enabled', async () => {
mockLoadObservationsJson.mockResolvedValue([
{
id: 7,
projectId: 'AVIDS2/memorix',
entityName: 'fallback',
type: 'discovery',
title: 'Fulltext only startup',
narrative: 'Embedding provider disabled.',
facts: [],
filesModified: [],
concepts: ['bm25'],
tokens: 9,
createdAt: '2026-03-18T00:00:00.000Z',
status: 'active',
source: 'agent',
},
]);
mockLoadIdCounter.mockResolvedValue(8);
mockHydrateIndex.mockResolvedValue(1);
mockIsEmbeddingEnabled.mockReturnValue(false);

const { initObservations, prepareSearchIndex, getVectorMissingIds } = await import('../../src/memory/observations.js');

await initObservations('E:/tmp/project');
await prepareSearchIndex();

expect(mockBatchGenerateEmbeddings).not.toHaveBeenCalled();
expect(getVectorMissingIds()).toEqual([]);
});
});
94 changes: 94 additions & 0 deletions tests/store/hydrate-index.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import { describe, it, expect, beforeEach } from 'vitest';
import { resetDb, hydrateIndex, makeOramaObservationId } from '../../src/store/orama-store.js';
import { count, search } from '@orama/orama';

// Minimal observation shape matching what hydrateIndex expects
function makeObs(id: number, status: string, title: string) {
return {
id,
projectId: 'test/hydrate-project',
entityName: `entity-${id}`,
type: 'discovery',
title,
narrative: `Narrative for observation ${id}`,
facts: ['fact-a'],
filesModified: [],
concepts: ['test'],
tokens: 100,
createdAt: new Date().toISOString(),
accessCount: 0,
lastAccessedAt: '',
status,
source: 'agent',
};
}

describe('hydrateIndex – status handling', () => {
beforeEach(async () => {
await resetDb();
});

it('indexes active, resolved, AND archived observations', async () => {
const observations = [
makeObs(1, 'active', 'Active observation'),
makeObs(2, 'resolved', 'Resolved observation'),
makeObs(3, 'archived', 'Archived observation'),
];

const inserted = await hydrateIndex(observations);
expect(inserted).toBe(3);
});

it('stores the status field faithfully in the index', async () => {
const observations = [
makeObs(10, 'active', 'Status active entry'),
makeObs(11, 'resolved', 'Status resolved entry'),
makeObs(12, 'archived', 'Status archived entry'),
];

await hydrateIndex(observations);

// Import getDb dynamically to access the raw database for verification
const { getDb } = await import('../../src/store/orama-store.js');
const db = await getDb();

// Search for each status value to confirm they're indexed
const activeHits = await search(db, { term: 'Status active entry', properties: ['title'] });
const resolvedHits = await search(db, { term: 'Status resolved entry', properties: ['title'] });
const archivedHits = await search(db, { term: 'Status archived entry', properties: ['title'] });

expect(activeHits.count).toBeGreaterThanOrEqual(1);
expect(resolvedHits.count).toBeGreaterThanOrEqual(1);
expect(archivedHits.count).toBeGreaterThanOrEqual(1);
});

it('skips malformed observations without crashing', async () => {
const observations = [
makeObs(20, 'active', 'Good observation'),
null,
{ id: null, projectId: 'x' },
{ id: 21 }, // missing projectId
makeObs(22, 'resolved', 'Another good one'),
];

const inserted = await hydrateIndex(observations as any[]);
expect(inserted).toBe(2);
});

it('is idempotent – second call is a no-op', async () => {
const observations = [
makeObs(30, 'active', 'First hydration'),
makeObs(31, 'resolved', 'First hydration resolved'),
];

const first = await hydrateIndex(observations);
expect(first).toBe(2);

// Second call with more observations should return 0 (already hydrated)
const second = await hydrateIndex([
...observations,
makeObs(32, 'archived', 'Late arrival'),
]);
expect(second).toBe(0);
});
});
Loading