Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

All notable changes to this project will be documented in this file.

## [Unreleased]

### Fixed
- **API-first auto embedding selection** — `MEMORIX_EMBEDDING=auto` now prefers a configured remote embedding API before falling back to local `fastembed` or `transformers`, preventing unexpected local-model activation when API credentials are already present.
- **Embedding cache isolation across config changes** — API embedding cache keys and probe-dimension metadata now stay isolated per `baseUrl + model + requestedDimensions`, so switching between shortened and native dimensions no longer reuses stale cached embeddings or stale dimension probes.

## [1.0.5] - 2026-03-24

### Added
Expand Down
12 changes: 12 additions & 0 deletions docs/CONFIGURATION.md
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,18 @@ Common values:
- `transformers`
- `auto`

`auto` now prefers a configured remote embedding API first.

- if `MEMORIX_EMBEDDING_API_KEY` or another supported API key is present, Memorix will use the remote `/v1/embeddings` provider first
- only if API embedding is unavailable will it fall back to local `fastembed`, then `transformers`
- this keeps semantic search on the API path by default while preserving local fallback behavior

When using API embeddings with optional dimension shortening:

- `MEMORIX_EMBEDDING_DIMENSIONS` is treated as part of the embedding configuration identity
- Memorix keeps API embedding cache entries and probed dimension metadata isolated per `baseUrl + model + requestedDimensions`
- changing from shortened dimensions back to native dimensions no longer reuses stale cached vectors or stale probe results

### `git`

Controls Git-Memory behavior.
Expand Down
120 changes: 109 additions & 11 deletions src/embedding/api-provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,17 @@ function normalizeText(text: string): string {
return text.replace(/\s+/g, ' ').trim().slice(0, MAX_INPUT_CHARS);
}

function textHash(text: string): string {
return createHash('sha256').update(text).digest('hex').slice(0, 16);
function cacheNamespace(config: Pick<APIEmbeddingConfig, 'baseUrl' | 'model' | 'requestedDimensions'>): string {
return [
'v2',
config.baseUrl.replace(/\/+$/, ''),
config.model,
config.requestedDimensions ?? 'native',
].join('|');
}

function textHash(text: string, namespace: string): string {
return createHash('sha256').update(`${namespace}\u0000${text}`).digest('hex').slice(0, 16);
}

async function loadDiskCache(): Promise<void> {
Expand Down Expand Up @@ -65,23 +74,110 @@ async function ensureDiskCacheLoaded(): Promise<void> {
await loadDiskCache();
}

function dimsCacheKey(config: Pick<APIEmbeddingConfig, 'baseUrl' | 'model' | 'requestedDimensions'>): string {
return [
config.baseUrl.replace(/\/+$/, ''),
config.model,
config.requestedDimensions ?? 'native',
].join('|');
}

/** Load cached probe dimensions from disk. Returns null if not cached. */
async function loadCachedDims(model: string, baseUrl: string): Promise<number | null> {
async function loadCachedDims(config: Pick<APIEmbeddingConfig, 'baseUrl' | 'model' | 'requestedDimensions'>): Promise<number | null> {
try {
const raw = await readFile(DIMS_CACHE_FILE, 'utf-8');
const data = JSON.parse(raw);
if (data.model === model && data.baseUrl === baseUrl && typeof data.dimensions === 'number') {

const key = dimsCacheKey(config);

if (Array.isArray(data.entries)) {
const entry = data.entries.find((candidate: unknown) =>
typeof candidate === 'object' &&
candidate !== null &&
'key' in candidate &&
'dimensions' in candidate &&
(candidate as { key?: string }).key === key &&
typeof (candidate as { dimensions?: unknown }).dimensions === 'number',
) as { dimensions: number } | undefined;
if (entry) return entry.dimensions;
}

if (
data.baseUrl === config.baseUrl &&
data.model === config.model &&
typeof data.dimensions === 'number' &&
(data.requestedDimensions ?? null) === (config.requestedDimensions ?? null)
) {
return data.dimensions;
}

if (
data.baseUrl === config.baseUrl &&
data.model === config.model &&
typeof data.dimensions === 'number' &&
(config.requestedDimensions ?? null) === null &&
!('requestedDimensions' in data)
) {
return data.dimensions;
}
} catch { /* no cache or corrupt */ }
return null;
}

/** Persist probe dimensions for fast subsequent starts. */
async function saveCachedDims(model: string, baseUrl: string, dimensions: number): Promise<void> {
async function saveCachedDims(config: Pick<APIEmbeddingConfig, 'baseUrl' | 'model' | 'requestedDimensions'>, dimensions: number): Promise<void> {
try {
await mkdir(CACHE_DIR, { recursive: true });
await writeFile(DIMS_CACHE_FILE, JSON.stringify({ model, baseUrl, dimensions, ts: Date.now() }));
const key = dimsCacheKey(config);
let entries: Array<{ key: string; baseUrl: string; model: string; requestedDimensions: number | null; dimensions: number; ts: number }> = [];

try {
const raw = await readFile(DIMS_CACHE_FILE, 'utf-8');
const data = JSON.parse(raw);
if (Array.isArray(data.entries)) {
entries = data.entries.filter((entry: unknown) =>
typeof entry === 'object' &&
entry !== null &&
'key' in entry &&
typeof (entry as { key?: unknown }).key === 'string',
) as typeof entries;
} else if (
data &&
typeof data === 'object' &&
typeof data.baseUrl === 'string' &&
typeof data.model === 'string' &&
typeof data.dimensions === 'number'
) {
entries = [{
key: dimsCacheKey({
baseUrl: data.baseUrl,
model: data.model,
requestedDimensions: data.requestedDimensions ?? null,
}),
baseUrl: data.baseUrl,
model: data.model,
requestedDimensions: data.requestedDimensions ?? null,
dimensions: data.dimensions,
ts: typeof data.ts === 'number' ? data.ts : Date.now(),
}];
}
} catch {
// no existing cache
}

const nextEntry = {
key,
baseUrl: config.baseUrl,
model: config.model,
requestedDimensions: config.requestedDimensions ?? null,
dimensions,
ts: Date.now(),
};

entries = entries.filter((entry) => entry.key !== key);
entries.push(nextEntry);

await writeFile(DIMS_CACHE_FILE, JSON.stringify({ entries }));
} catch { /* best-effort */ }
}

Expand Down Expand Up @@ -161,11 +257,13 @@ export class APIEmbeddingProvider implements EmbeddingProvider {
readonly dimensions: number;

private config: APIEmbeddingConfig;
private readonly cacheKeyNamespace: string;
private totalTokensUsed = 0;
private totalApiCalls = 0;

private constructor(config: APIEmbeddingConfig, detectedDimensions: number) {
this.config = config;
this.cacheKeyNamespace = cacheNamespace(config);
this.dimensions = detectedDimensions;
this.name = `api-${config.model.replace(/\//g, '-')}`;
}
Expand All @@ -178,14 +276,14 @@ export class APIEmbeddingProvider implements EmbeddingProvider {
startDiskCacheLoad();

// Try cached dimensions first to avoid a network probe on cold start
let probeDimensions = await loadCachedDims(config.model, config.baseUrl);
let probeDimensions = await loadCachedDims(config);
if (probeDimensions !== null) {
console.error(`[memorix] API embedding: ${config.model} @ ${config.baseUrl} (${probeDimensions}d) [cached dims]`);
} else {
probeDimensions = await APIEmbeddingProvider.probeAPI(config);
console.error(`[memorix] API embedding: ${config.model} @ ${config.baseUrl} (${probeDimensions}d)`);
// Persist for next cold start
saveCachedDims(config.model, config.baseUrl, probeDimensions).catch(() => {});
saveCachedDims(config, probeDimensions).catch(() => {});
}
if (config.requestedDimensions) {
console.error(`[memorix] Dimension shortening: ${config.requestedDimensions}d requested`);
Expand Down Expand Up @@ -256,7 +354,7 @@ export class APIEmbeddingProvider implements EmbeddingProvider {

async embed(text: string): Promise<number[]> {
const normalized = normalizeText(text);
const hash = textHash(normalized);
const hash = textHash(normalized, this.cacheKeyNamespace);

// Fast path: cache already loaded (warm process) — instant lookup
if (diskCacheLoaded) {
Expand Down Expand Up @@ -331,7 +429,7 @@ export class APIEmbeddingProvider implements EmbeddingProvider {
const uncachedTexts: string[] = [];

for (let i = 0; i < normalizedTexts.length; i++) {
const hash = textHash(normalizedTexts[i]);
const hash = textHash(normalizedTexts[i], this.cacheKeyNamespace);
const cached = cache.get(hash);
if (cached) {
results[i] = cached;
Expand Down Expand Up @@ -371,7 +469,7 @@ export class APIEmbeddingProvider implements EmbeddingProvider {
for (const item of response.data) {
const originalIdx = chunkIndices[item.index];
results[originalIdx] = item.embedding;
cacheSet(textHash(normalizedTexts[originalIdx]), item.embedding);
cacheSet(textHash(normalizedTexts[originalIdx], this.cacheKeyNamespace), item.embedding);
}
} catch (error) {
const providerLimit = parseBatchLimit(error);
Expand Down
38 changes: 36 additions & 2 deletions src/embedding/provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
* - MEMORIX_EMBEDDING=fastembed → local ONNX inference (384-dim bge-small, ~300MB RAM)
* - MEMORIX_EMBEDDING=transformers → pure JS WASM inference (384-dim MiniLM, ~500MB RAM)
* - MEMORIX_EMBEDDING=api → remote API via OpenAI-compatible /v1/embeddings (zero local RAM)
* - MEMORIX_EMBEDDING=auto → try fastembed → transformers → off (legacy behavior)
* - MEMORIX_EMBEDDING=auto → try configured API → fastembed → transformers → off
*
* API mode env vars (MEMORIX_EMBEDDING=api):
* - MEMORIX_EMBEDDING_API_KEY → API key (fallback: MEMORIX_LLM_API_KEY → OPENAI_API_KEY)
Expand Down Expand Up @@ -66,6 +66,29 @@ function getEmbeddingMode(): 'off' | 'fastembed' | 'transformers' | 'api' | 'aut
}
}

function hasAPIEmbeddingConfig(): boolean {
try {
const {
getEmbeddingApiKey,
getEmbeddingBaseUrl,
getEmbeddingModel,
} = require('../config.js');

return Boolean(
getEmbeddingApiKey?.() &&
getEmbeddingBaseUrl?.() &&
getEmbeddingModel?.(),
);
} catch {
return Boolean(
process.env.MEMORIX_EMBEDDING_API_KEY ||
process.env.MEMORIX_API_KEY ||
process.env.MEMORIX_LLM_API_KEY ||
process.env.OPENAI_API_KEY,
);
}
}

/** Minimum interval between retry attempts after a temporary failure (ms). */
const RETRY_COOLDOWN_MS = 30_000;
let lastFailureTimestamp = 0;
Expand Down Expand Up @@ -149,7 +172,18 @@ export async function getEmbeddingProvider(): Promise<EmbeddingProvider | null>
}
}

// Auto mode: try fastembed → transformers → off (legacy behavior)
// Auto mode: try configured API first, then local fallbacks
if (hasAPIEmbeddingConfig()) {
try {
const { APIEmbeddingProvider } = await import('./api-provider.js');
provider = await APIEmbeddingProvider.create();
console.error(`[memorix] Embedding provider: ${provider!.name} (${provider!.dimensions}d)`);
return provider;
} catch (e) {
console.error(`[memorix] API embedding unavailable in auto mode: ${e instanceof Error ? e.message : e}`);
}
}

try {
const { FastEmbedProvider } = await import('./fastembed-provider.js');
provider = await FastEmbedProvider.create();
Expand Down
53 changes: 51 additions & 2 deletions tests/embedding/api-provider.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
const mockFetch = vi.fn();
vi.stubGlobal('fetch', mockFetch);

const mockDiskFiles = new Map<string, string>();

// Mock Headers for consistent behavior
function mockHeaders(entries: [string, string][] = []): { get: (key: string) => string | null } {
const map = new Map(entries);
Expand All @@ -19,8 +21,13 @@ function mockHeaders(entries: [string, string][] = []): { get: (key: string) =>

// Mock fs for disk cache
vi.mock('node:fs/promises', () => ({
readFile: vi.fn().mockRejectedValue(new Error('no cache')),
writeFile: vi.fn().mockResolvedValue(undefined),
readFile: vi.fn(async (path: string) => {
if (!mockDiskFiles.has(path)) throw new Error('no cache');
return mockDiskFiles.get(path);
}),
writeFile: vi.fn(async (path: string, content: string) => {
mockDiskFiles.set(path, content);
}),
mkdir: vi.fn().mockResolvedValue(undefined),
}));

Expand Down Expand Up @@ -59,6 +66,7 @@ describe('API Embedding Provider', () => {
beforeEach(() => {
vi.resetAllMocks();
vi.stubGlobal('fetch', mockFetch);
mockDiskFiles.clear();
process.env = {
...originalEnv,
MEMORIX_EMBEDDING: 'api',
Expand Down Expand Up @@ -106,6 +114,22 @@ describe('API Embedding Provider', () => {
expect(body.dimensions).toBe(512);
});

it('should not reuse cached probe dimensions across requested dimension changes', async () => {
process.env.MEMORIX_EMBEDDING_DIMENSIONS = '512';
mockFetch.mockResolvedValueOnce(mockEmbeddingResponse([makeVector(512)]));

const shortenedProvider = await APIEmbeddingProvider.create();
expect(shortenedProvider.dimensions).toBe(512);

delete process.env.MEMORIX_EMBEDDING_DIMENSIONS;
mockFetch.mockResolvedValueOnce(mockEmbeddingResponse([makeVector(1536)]));

const nativeProvider = await APIEmbeddingProvider.create();

expect(nativeProvider.dimensions).toBe(1536);
expect(mockFetch).toHaveBeenCalledTimes(2);
});

it('should fall back to LLM API key if embedding key not set', async () => {
delete process.env.MEMORIX_EMBEDDING_API_KEY;
process.env.MEMORIX_LLM_API_KEY = 'llm-key-456';
Expand Down Expand Up @@ -185,6 +209,31 @@ describe('API Embedding Provider', () => {
expect(mockFetch).toHaveBeenCalledTimes(2); // probe + 1 embed only
expect(result1).toEqual(result2);
});

it('should namespace cache entries by model config to avoid stale dimension reuse', async () => {
const smallVec = makeVector(1536);
mockFetch.mockResolvedValueOnce(mockEmbeddingResponse([smallVec]));
const smallProvider = await APIEmbeddingProvider.create();

const cachedSmallEmbed = makeVector(1536, 0.5);
mockFetch.mockResolvedValueOnce(mockEmbeddingResponse([cachedSmallEmbed]));
const firstResult = await smallProvider.embed('shared-text');
expect(firstResult.length).toBe(1536);

process.env.MEMORIX_EMBEDDING_MODEL = 'text-embedding-3-large';
const largeProbe = makeVector(3072, 0.2);
mockFetch.mockResolvedValueOnce(mockEmbeddingResponse([largeProbe], 'text-embedding-3-large'));
const largeProvider = await APIEmbeddingProvider.create();

const largeEmbed = makeVector(3072, 0.7);
mockFetch.mockResolvedValueOnce(mockEmbeddingResponse([largeEmbed], 'text-embedding-3-large'));

const secondResult = await largeProvider.embed('shared-text');

expect(secondResult).toEqual(largeEmbed);
expect(secondResult.length).toBe(3072);
expect(mockFetch).toHaveBeenCalledTimes(4);
});
});

describe('batch embed', () => {
Expand Down
Loading
Loading