From fba48b03673bcfddeb38f9aaf821717080080343 Mon Sep 17 00:00:00 2001 From: Ravi Tharuma Date: Fri, 27 Mar 2026 14:48:50 +0100 Subject: [PATCH] Fix API embedding fallback and cache isolation --- CHANGELOG.md | 6 ++ docs/CONFIGURATION.md | 12 +++ src/embedding/api-provider.ts | 120 ++++++++++++++++++++++++--- src/embedding/provider.ts | 38 ++++++++- tests/embedding/api-provider.test.ts | 53 +++++++++++- tests/embedding/provider.test.ts | 32 +++++++ 6 files changed, 246 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 40b5d7b..26e6362 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,12 @@ All notable changes to this project will be documented in this file. +## [Unreleased] + +### Fixed +- **API-first auto embedding selection** — `MEMORIX_EMBEDDING=auto` now prefers a configured remote embedding API before falling back to local `fastembed` or `transformers`, preventing unexpected local-model activation when API credentials are already present. +- **Embedding cache isolation across config changes** — API embedding cache keys and probe-dimension metadata now stay isolated per `baseUrl + model + requestedDimensions`, so switching between shortened and native dimensions no longer reuses stale cached embeddings or stale dimension probes. + ## [1.0.5] - 2026-03-24 ### Added diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index faae669..0c6d158 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -157,6 +157,18 @@ Common values: - `transformers` - `auto` +`auto` now prefers a configured remote embedding API first. + +- if `MEMORIX_EMBEDDING_API_KEY` or another supported API key is present, Memorix will use the remote `/v1/embeddings` provider first +- only if API embedding is unavailable will it fall back to local `fastembed`, then `transformers` +- this keeps semantic search on the API path by default while preserving local fallback behavior + +When using API embeddings with optional dimension shortening: + +- `MEMORIX_EMBEDDING_DIMENSIONS` is treated as part of the embedding configuration identity +- Memorix keeps API embedding cache entries and probed dimension metadata isolated per `baseUrl + model + requestedDimensions` +- changing from shortened dimensions back to native dimensions no longer reuses stale cached vectors or stale probe results + ### `git` Controls Git-Memory behavior. diff --git a/src/embedding/api-provider.ts b/src/embedding/api-provider.ts index a3184c4..87b04b0 100644 --- a/src/embedding/api-provider.ts +++ b/src/embedding/api-provider.ts @@ -35,8 +35,17 @@ function normalizeText(text: string): string { return text.replace(/\s+/g, ' ').trim().slice(0, MAX_INPUT_CHARS); } -function textHash(text: string): string { - return createHash('sha256').update(text).digest('hex').slice(0, 16); +function cacheNamespace(config: Pick): string { + return [ + 'v2', + config.baseUrl.replace(/\/+$/, ''), + config.model, + config.requestedDimensions ?? 'native', + ].join('|'); +} + +function textHash(text: string, namespace: string): string { + return createHash('sha256').update(`${namespace}\u0000${text}`).digest('hex').slice(0, 16); } async function loadDiskCache(): Promise { @@ -65,12 +74,50 @@ async function ensureDiskCacheLoaded(): Promise { await loadDiskCache(); } +function dimsCacheKey(config: Pick): string { + return [ + config.baseUrl.replace(/\/+$/, ''), + config.model, + config.requestedDimensions ?? 'native', + ].join('|'); +} + /** Load cached probe dimensions from disk. Returns null if not cached. */ -async function loadCachedDims(model: string, baseUrl: string): Promise { +async function loadCachedDims(config: Pick): Promise { try { const raw = await readFile(DIMS_CACHE_FILE, 'utf-8'); const data = JSON.parse(raw); - if (data.model === model && data.baseUrl === baseUrl && typeof data.dimensions === 'number') { + + const key = dimsCacheKey(config); + + if (Array.isArray(data.entries)) { + const entry = data.entries.find((candidate: unknown) => + typeof candidate === 'object' && + candidate !== null && + 'key' in candidate && + 'dimensions' in candidate && + (candidate as { key?: string }).key === key && + typeof (candidate as { dimensions?: unknown }).dimensions === 'number', + ) as { dimensions: number } | undefined; + if (entry) return entry.dimensions; + } + + if ( + data.baseUrl === config.baseUrl && + data.model === config.model && + typeof data.dimensions === 'number' && + (data.requestedDimensions ?? null) === (config.requestedDimensions ?? null) + ) { + return data.dimensions; + } + + if ( + data.baseUrl === config.baseUrl && + data.model === config.model && + typeof data.dimensions === 'number' && + (config.requestedDimensions ?? null) === null && + !('requestedDimensions' in data) + ) { return data.dimensions; } } catch { /* no cache or corrupt */ } @@ -78,10 +125,59 @@ async function loadCachedDims(model: string, baseUrl: string): Promise { +async function saveCachedDims(config: Pick, dimensions: number): Promise { try { await mkdir(CACHE_DIR, { recursive: true }); - await writeFile(DIMS_CACHE_FILE, JSON.stringify({ model, baseUrl, dimensions, ts: Date.now() })); + const key = dimsCacheKey(config); + let entries: Array<{ key: string; baseUrl: string; model: string; requestedDimensions: number | null; dimensions: number; ts: number }> = []; + + try { + const raw = await readFile(DIMS_CACHE_FILE, 'utf-8'); + const data = JSON.parse(raw); + if (Array.isArray(data.entries)) { + entries = data.entries.filter((entry: unknown) => + typeof entry === 'object' && + entry !== null && + 'key' in entry && + typeof (entry as { key?: unknown }).key === 'string', + ) as typeof entries; + } else if ( + data && + typeof data === 'object' && + typeof data.baseUrl === 'string' && + typeof data.model === 'string' && + typeof data.dimensions === 'number' + ) { + entries = [{ + key: dimsCacheKey({ + baseUrl: data.baseUrl, + model: data.model, + requestedDimensions: data.requestedDimensions ?? null, + }), + baseUrl: data.baseUrl, + model: data.model, + requestedDimensions: data.requestedDimensions ?? null, + dimensions: data.dimensions, + ts: typeof data.ts === 'number' ? data.ts : Date.now(), + }]; + } + } catch { + // no existing cache + } + + const nextEntry = { + key, + baseUrl: config.baseUrl, + model: config.model, + requestedDimensions: config.requestedDimensions ?? null, + dimensions, + ts: Date.now(), + }; + + entries = entries.filter((entry) => entry.key !== key); + entries.push(nextEntry); + + await writeFile(DIMS_CACHE_FILE, JSON.stringify({ entries })); } catch { /* best-effort */ } } @@ -161,11 +257,13 @@ export class APIEmbeddingProvider implements EmbeddingProvider { readonly dimensions: number; private config: APIEmbeddingConfig; + private readonly cacheKeyNamespace: string; private totalTokensUsed = 0; private totalApiCalls = 0; private constructor(config: APIEmbeddingConfig, detectedDimensions: number) { this.config = config; + this.cacheKeyNamespace = cacheNamespace(config); this.dimensions = detectedDimensions; this.name = `api-${config.model.replace(/\//g, '-')}`; } @@ -178,14 +276,14 @@ export class APIEmbeddingProvider implements EmbeddingProvider { startDiskCacheLoad(); // Try cached dimensions first to avoid a network probe on cold start - let probeDimensions = await loadCachedDims(config.model, config.baseUrl); + let probeDimensions = await loadCachedDims(config); if (probeDimensions !== null) { console.error(`[memorix] API embedding: ${config.model} @ ${config.baseUrl} (${probeDimensions}d) [cached dims]`); } else { probeDimensions = await APIEmbeddingProvider.probeAPI(config); console.error(`[memorix] API embedding: ${config.model} @ ${config.baseUrl} (${probeDimensions}d)`); // Persist for next cold start - saveCachedDims(config.model, config.baseUrl, probeDimensions).catch(() => {}); + saveCachedDims(config, probeDimensions).catch(() => {}); } if (config.requestedDimensions) { console.error(`[memorix] Dimension shortening: ${config.requestedDimensions}d requested`); @@ -256,7 +354,7 @@ export class APIEmbeddingProvider implements EmbeddingProvider { async embed(text: string): Promise { const normalized = normalizeText(text); - const hash = textHash(normalized); + const hash = textHash(normalized, this.cacheKeyNamespace); // Fast path: cache already loaded (warm process) — instant lookup if (diskCacheLoaded) { @@ -331,7 +429,7 @@ export class APIEmbeddingProvider implements EmbeddingProvider { const uncachedTexts: string[] = []; for (let i = 0; i < normalizedTexts.length; i++) { - const hash = textHash(normalizedTexts[i]); + const hash = textHash(normalizedTexts[i], this.cacheKeyNamespace); const cached = cache.get(hash); if (cached) { results[i] = cached; @@ -371,7 +469,7 @@ export class APIEmbeddingProvider implements EmbeddingProvider { for (const item of response.data) { const originalIdx = chunkIndices[item.index]; results[originalIdx] = item.embedding; - cacheSet(textHash(normalizedTexts[originalIdx]), item.embedding); + cacheSet(textHash(normalizedTexts[originalIdx], this.cacheKeyNamespace), item.embedding); } } catch (error) { const providerLimit = parseBatchLimit(error); diff --git a/src/embedding/provider.ts b/src/embedding/provider.ts index 822e23a..eb6012d 100644 --- a/src/embedding/provider.ts +++ b/src/embedding/provider.ts @@ -8,7 +8,7 @@ * - MEMORIX_EMBEDDING=fastembed → local ONNX inference (384-dim bge-small, ~300MB RAM) * - MEMORIX_EMBEDDING=transformers → pure JS WASM inference (384-dim MiniLM, ~500MB RAM) * - MEMORIX_EMBEDDING=api → remote API via OpenAI-compatible /v1/embeddings (zero local RAM) - * - MEMORIX_EMBEDDING=auto → try fastembed → transformers → off (legacy behavior) + * - MEMORIX_EMBEDDING=auto → try configured API → fastembed → transformers → off * * API mode env vars (MEMORIX_EMBEDDING=api): * - MEMORIX_EMBEDDING_API_KEY → API key (fallback: MEMORIX_LLM_API_KEY → OPENAI_API_KEY) @@ -66,6 +66,29 @@ function getEmbeddingMode(): 'off' | 'fastembed' | 'transformers' | 'api' | 'aut } } +function hasAPIEmbeddingConfig(): boolean { + try { + const { + getEmbeddingApiKey, + getEmbeddingBaseUrl, + getEmbeddingModel, + } = require('../config.js'); + + return Boolean( + getEmbeddingApiKey?.() && + getEmbeddingBaseUrl?.() && + getEmbeddingModel?.(), + ); + } catch { + return Boolean( + process.env.MEMORIX_EMBEDDING_API_KEY || + process.env.MEMORIX_API_KEY || + process.env.MEMORIX_LLM_API_KEY || + process.env.OPENAI_API_KEY, + ); + } +} + /** Minimum interval between retry attempts after a temporary failure (ms). */ const RETRY_COOLDOWN_MS = 30_000; let lastFailureTimestamp = 0; @@ -149,7 +172,18 @@ export async function getEmbeddingProvider(): Promise } } - // Auto mode: try fastembed → transformers → off (legacy behavior) + // Auto mode: try configured API first, then local fallbacks + if (hasAPIEmbeddingConfig()) { + try { + const { APIEmbeddingProvider } = await import('./api-provider.js'); + provider = await APIEmbeddingProvider.create(); + console.error(`[memorix] Embedding provider: ${provider!.name} (${provider!.dimensions}d)`); + return provider; + } catch (e) { + console.error(`[memorix] API embedding unavailable in auto mode: ${e instanceof Error ? e.message : e}`); + } + } + try { const { FastEmbedProvider } = await import('./fastembed-provider.js'); provider = await FastEmbedProvider.create(); diff --git a/tests/embedding/api-provider.test.ts b/tests/embedding/api-provider.test.ts index a2261d6..606ddf0 100644 --- a/tests/embedding/api-provider.test.ts +++ b/tests/embedding/api-provider.test.ts @@ -11,6 +11,8 @@ import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; const mockFetch = vi.fn(); vi.stubGlobal('fetch', mockFetch); +const mockDiskFiles = new Map(); + // Mock Headers for consistent behavior function mockHeaders(entries: [string, string][] = []): { get: (key: string) => string | null } { const map = new Map(entries); @@ -19,8 +21,13 @@ function mockHeaders(entries: [string, string][] = []): { get: (key: string) => // Mock fs for disk cache vi.mock('node:fs/promises', () => ({ - readFile: vi.fn().mockRejectedValue(new Error('no cache')), - writeFile: vi.fn().mockResolvedValue(undefined), + readFile: vi.fn(async (path: string) => { + if (!mockDiskFiles.has(path)) throw new Error('no cache'); + return mockDiskFiles.get(path); + }), + writeFile: vi.fn(async (path: string, content: string) => { + mockDiskFiles.set(path, content); + }), mkdir: vi.fn().mockResolvedValue(undefined), })); @@ -59,6 +66,7 @@ describe('API Embedding Provider', () => { beforeEach(() => { vi.resetAllMocks(); vi.stubGlobal('fetch', mockFetch); + mockDiskFiles.clear(); process.env = { ...originalEnv, MEMORIX_EMBEDDING: 'api', @@ -106,6 +114,22 @@ describe('API Embedding Provider', () => { expect(body.dimensions).toBe(512); }); + it('should not reuse cached probe dimensions across requested dimension changes', async () => { + process.env.MEMORIX_EMBEDDING_DIMENSIONS = '512'; + mockFetch.mockResolvedValueOnce(mockEmbeddingResponse([makeVector(512)])); + + const shortenedProvider = await APIEmbeddingProvider.create(); + expect(shortenedProvider.dimensions).toBe(512); + + delete process.env.MEMORIX_EMBEDDING_DIMENSIONS; + mockFetch.mockResolvedValueOnce(mockEmbeddingResponse([makeVector(1536)])); + + const nativeProvider = await APIEmbeddingProvider.create(); + + expect(nativeProvider.dimensions).toBe(1536); + expect(mockFetch).toHaveBeenCalledTimes(2); + }); + it('should fall back to LLM API key if embedding key not set', async () => { delete process.env.MEMORIX_EMBEDDING_API_KEY; process.env.MEMORIX_LLM_API_KEY = 'llm-key-456'; @@ -185,6 +209,31 @@ describe('API Embedding Provider', () => { expect(mockFetch).toHaveBeenCalledTimes(2); // probe + 1 embed only expect(result1).toEqual(result2); }); + + it('should namespace cache entries by model config to avoid stale dimension reuse', async () => { + const smallVec = makeVector(1536); + mockFetch.mockResolvedValueOnce(mockEmbeddingResponse([smallVec])); + const smallProvider = await APIEmbeddingProvider.create(); + + const cachedSmallEmbed = makeVector(1536, 0.5); + mockFetch.mockResolvedValueOnce(mockEmbeddingResponse([cachedSmallEmbed])); + const firstResult = await smallProvider.embed('shared-text'); + expect(firstResult.length).toBe(1536); + + process.env.MEMORIX_EMBEDDING_MODEL = 'text-embedding-3-large'; + const largeProbe = makeVector(3072, 0.2); + mockFetch.mockResolvedValueOnce(mockEmbeddingResponse([largeProbe], 'text-embedding-3-large')); + const largeProvider = await APIEmbeddingProvider.create(); + + const largeEmbed = makeVector(3072, 0.7); + mockFetch.mockResolvedValueOnce(mockEmbeddingResponse([largeEmbed], 'text-embedding-3-large')); + + const secondResult = await largeProvider.embed('shared-text'); + + expect(secondResult).toEqual(largeEmbed); + expect(secondResult.length).toBe(3072); + expect(mockFetch).toHaveBeenCalledTimes(4); + }); }); describe('batch embed', () => { diff --git a/tests/embedding/provider.test.ts b/tests/embedding/provider.test.ts index 5ac1150..ad8aa11 100644 --- a/tests/embedding/provider.test.ts +++ b/tests/embedding/provider.test.ts @@ -15,8 +15,15 @@ vi.mock('../../src/embedding/fastembed-provider.js', () => { vi.mock('../../src/embedding/transformers-provider.js', () => { throw new Error('transformers not installed (mocked)'); }); +const mockApiProviderCreate = vi.fn(); +vi.mock('../../src/embedding/api-provider.js', () => ({ + APIEmbeddingProvider: { + create: mockApiProviderCreate, + }, +})); import { getEmbeddingProvider, isVectorSearchAvailable, resetProvider } from '../../src/embedding/provider.js'; import { resetDb, isEmbeddingEnabled, generateEmbedding, getDb } from '../../src/store/orama-store.js'; +import { resetConfigCache } from '../../src/config.js'; // Save and clear embedding-related env vars to prevent real API provider initialization const savedEnv: Record = {}; @@ -31,8 +38,10 @@ beforeEach(() => { savedEnv[key] = process.env[key]; delete process.env[key]; } + mockApiProviderCreate.mockReset(); resetProvider(); resetDb(); + resetConfigCache(); }); import { afterEach } from 'vitest'; @@ -42,6 +51,7 @@ afterEach(() => { process.env[key] = savedEnv[key]; } } + resetConfigCache(); }); describe('Embedding Provider', () => { @@ -104,4 +114,26 @@ describe('Embedding Provider', () => { expect(provider).toBeNull(); // still null since providers are mocked out }); }); + + describe('auto mode with API config present', () => { + it('should prefer API embeddings before local fallback providers', async () => { + process.env.MEMORIX_EMBEDDING = 'auto'; + process.env.MEMORIX_EMBEDDING_API_KEY = 'api-key'; + process.env.MEMORIX_EMBEDDING_BASE_URL = 'https://embeddings.example/v1'; + process.env.MEMORIX_EMBEDDING_MODEL = 'text-embedding-3-small'; + + const apiProvider = { + name: 'api-text-embedding-3-small', + dimensions: 1536, + embed: vi.fn(), + embedBatch: vi.fn(), + }; + mockApiProviderCreate.mockResolvedValue(apiProvider); + + const provider = await getEmbeddingProvider(); + + expect(provider).toBe(apiProvider); + expect(mockApiProviderCreate).toHaveBeenCalledTimes(1); + }); + }); });