From bd5ab1be6eaecbef5fe21cca0bcd133265225865 Mon Sep 17 00:00:00 2001 From: Ivan Date: Sun, 12 Apr 2026 14:29:41 +0800 Subject: [PATCH 1/2] feat: add Gemini embedding support as free alternative to OpenAI - embedding.ts: auto-detect GEMINI_API_KEY or OPENAI_API_KEY - hybrid.ts: fix key check to include GEMINI_API_KEY - gemini-embedding-001 at 1536 dims, no schema changes needed - Free tier: 1500 req/day, sufficient for personal knowledge bases Tested on 48-page knowledge base. Cross-language search works: English query finds Chinese notes via semantic similarity. --- src/core/embedding.ts | 124 ++++++++++++++++++++++++++------------ src/core/search/hybrid.ts | 4 +- 2 files changed, 86 insertions(+), 42 deletions(-) diff --git a/src/core/embedding.ts b/src/core/embedding.ts index 4689ccd1..80c5475e 100644 --- a/src/core/embedding.ts +++ b/src/core/embedding.ts @@ -1,15 +1,18 @@ /** - * Embedding Service - * Ported from production Ruby implementation (embedding_service.rb, 190 LOC) + * Embedding Service — multi-provider * - * OpenAI text-embedding-3-large at 1536 dimensions. + * Provider priority: + * 1. Gemini (GEMINI_API_KEY) — gemini-embedding-001, 1536 dims, free tier + * 2. OpenAI (OPENAI_API_KEY) — text-embedding-3-large, 1536 dims + * + * Both produce 1536-dim vectors so the DB schema is unchanged. * Retry with exponential backoff (4s base, 120s cap, 5 retries). * 8000 character input truncation. */ import OpenAI from 'openai'; +import { GoogleGenerativeAI } from '@google/generative-ai'; -const MODEL = 'text-embedding-3-large'; const DIMENSIONS = 1536; const MAX_CHARS = 8000; const MAX_RETRIES = 5; @@ -17,78 +20,119 @@ const BASE_DELAY_MS = 4000; const MAX_DELAY_MS = 120000; const BATCH_SIZE = 100; -let client: OpenAI | null = null; - -function getClient(): OpenAI { - if (!client) { - client = new OpenAI(); - } - return client; +// ─── Provider detection ──────────────────────────────────────────────────── +function getProvider(): 'gemini' | 'openai' { + if (process.env.GEMINI_API_KEY) return 'gemini'; + if (process.env.OPENAI_API_KEY) return 'openai'; + throw new Error( + 'No embedding API key found.\n' + + 'Set GEMINI_API_KEY (free tier) or OPENAI_API_KEY.\n' + + 'Get Gemini key: https://aistudio.google.com/apikey' + ); } -export async function embed(text: string): Promise { - const truncated = text.slice(0, MAX_CHARS); - const result = await embedBatch([truncated]); - return result[0]; +// ─── Gemini ──────────────────────────────────────────────────────────────── +let geminiClient: GoogleGenerativeAI | null = null; + +function getGeminiClient(): GoogleGenerativeAI { + if (!geminiClient) { + geminiClient = new GoogleGenerativeAI(process.env.GEMINI_API_KEY!); + } + return geminiClient; } -export async function embedBatch(texts: string[]): Promise { - const truncated = texts.map(t => t.slice(0, MAX_CHARS)); +async function embedWithGemini(texts: string[]): Promise { + const client = getGeminiClient(); + const model = client.getGenerativeModel({ model: 'gemini-embedding-001' }); const results: Float32Array[] = []; - // Process in batches of BATCH_SIZE - for (let i = 0; i < truncated.length; i += BATCH_SIZE) { - const batch = truncated.slice(i, i + BATCH_SIZE); - const batchResults = await embedBatchWithRetry(batch); - results.push(...batchResults); + // Gemini doesn't support batch embed, process one by one + for (const text of texts) { + for (let attempt = 0; attempt < MAX_RETRIES; attempt++) { + try { + const result = await model.embedContent({ + content: { role: 'user', parts: [{ text }] }, + taskType: 'RETRIEVAL_DOCUMENT' as any, + }); + // gemini-embedding-001 returns 3072 dims by default; slice to 1536 + const values = result.embedding.values.slice(0, DIMENSIONS); + results.push(new Float32Array(values)); + break; + } catch (e: unknown) { + if (attempt === MAX_RETRIES - 1) throw e; + await sleep(exponentialDelay(attempt)); + } + } } - return results; } -async function embedBatchWithRetry(texts: string[]): Promise { +// ─── OpenAI ─────────────────────────────────────────────────────────────── +let openaiClient: OpenAI | null = null; + +function getOpenAIClient(): OpenAI { + if (!openaiClient) { + openaiClient = new OpenAI(); + } + return openaiClient; +} + +async function embedWithOpenAI(texts: string[]): Promise { for (let attempt = 0; attempt < MAX_RETRIES; attempt++) { try { - const response = await getClient().embeddings.create({ - model: MODEL, + const response = await getOpenAIClient().embeddings.create({ + model: 'text-embedding-3-large', input: texts, dimensions: DIMENSIONS, }); - - // Sort by index to maintain order const sorted = response.data.sort((a, b) => a.index - b.index); return sorted.map(d => new Float32Array(d.embedding)); } catch (e: unknown) { if (attempt === MAX_RETRIES - 1) throw e; - - // Check for rate limit with Retry-After header let delay = exponentialDelay(attempt); - if (e instanceof OpenAI.APIError && e.status === 429) { const retryAfter = e.headers?.['retry-after']; if (retryAfter) { const parsed = parseInt(retryAfter, 10); - if (!isNaN(parsed)) { - delay = parsed * 1000; - } + if (!isNaN(parsed)) delay = parsed * 1000; } } - await sleep(delay); } } + throw new Error('OpenAI embedding failed after all retries'); +} + +// ─── Public API ─────────────────────────────────────────────────────────── +export async function embed(text: string): Promise { + const truncated = text.slice(0, MAX_CHARS); + const result = await embedBatch([truncated]); + return result[0]; +} + +export async function embedBatch(texts: string[]): Promise { + const truncated = texts.map(t => t.slice(0, MAX_CHARS)); + const provider = getProvider(); + const results: Float32Array[] = []; - // Should not reach here - throw new Error('Embedding failed after all retries'); + for (let i = 0; i < truncated.length; i += BATCH_SIZE) { + const batch = truncated.slice(i, i + BATCH_SIZE); + const batchResults = provider === 'gemini' + ? await embedWithGemini(batch) + : await embedWithOpenAI(batch); + results.push(...batchResults); + } + return results; } +// ─── Helpers ────────────────────────────────────────────────────────────── function exponentialDelay(attempt: number): number { - const delay = BASE_DELAY_MS * Math.pow(2, attempt); - return Math.min(delay, MAX_DELAY_MS); + return Math.min(BASE_DELAY_MS * Math.pow(2, attempt), MAX_DELAY_MS); } function sleep(ms: number): Promise { return new Promise(resolve => setTimeout(resolve, ms)); } -export { MODEL as EMBEDDING_MODEL, DIMENSIONS as EMBEDDING_DIMENSIONS }; +export const EMBEDDING_MODEL = 'gemini-embedding-001 / text-embedding-3-large'; +export const EMBEDDING_DIMENSIONS = DIMENSIONS; diff --git a/src/core/search/hybrid.ts b/src/core/search/hybrid.ts index f31a09d5..9e406b51 100644 --- a/src/core/search/hybrid.ts +++ b/src/core/search/hybrid.ts @@ -28,8 +28,8 @@ export async function hybridSearch( // Run keyword search (always available, no API key needed) const keywordResults = await engine.searchKeyword(query, { limit: limit * 2 }); - // Skip vector search entirely if no OpenAI key is configured - if (!process.env.OPENAI_API_KEY) { + // Skip vector search entirely if no embedding key is configured + if (!process.env.OPENAI_API_KEY && !process.env.GEMINI_API_KEY) { return dedupResults(keywordResults).slice(0, limit); } From d8e7512126a60d8302e70073742d03765bf6c5a3 Mon Sep 17 00:00:00 2001 From: Ivan Date: Mon, 13 Apr 2026 09:53:45 +0800 Subject: [PATCH 2/2] deps: add @google/generative-ai for Gemini embedding support Required peer dependency for GEMINI_API_KEY auto-detection path. Free tier: 1500 req/day via aistudio.google.com Co-Authored-By: Claude Sonnet 4.6 --- bun.lock | 3 +++ package.json | 1 + 2 files changed, 4 insertions(+) diff --git a/bun.lock b/bun.lock index a7af293f..cd598559 100644 --- a/bun.lock +++ b/bun.lock @@ -8,6 +8,7 @@ "@anthropic-ai/sdk": "^0.30.0", "@aws-sdk/client-s3": "^3.1028.0", "@electric-sql/pglite": "^0.4.4", + "@google/generative-ai": "^0.24.1", "@modelcontextprotocol/sdk": "^1.0.0", "gray-matter": "^4.0.3", "openai": "^4.0.0", @@ -104,6 +105,8 @@ "@electric-sql/pglite": ["@electric-sql/pglite@0.4.4", "", {}, "sha512-g/6CWAJ4XOkObWCWAQ2IReZD8VvsDy3poRHSKvpRR2F96F8WJ3HVbjpso3gN7l0q6QPPgvxSSpl/qo5k8a7mkQ=="], + "@google/generative-ai": ["@google/generative-ai@0.24.1", "", {}, "sha512-MqO+MLfM6kjxcKoy0p1wRzG3b4ZZXtPI+z2IE26UogS2Cm/XHO+7gGRBh6gcJsOiIVoH93UwKvW4HdgiOZCy9Q=="], + "@hono/node-server": ["@hono/node-server@1.19.12", "", { "peerDependencies": { "hono": "^4" } }, "sha512-txsUW4SQ1iilgE0l9/e9VQWmELXifEFvmdA1j6WFh/aFPj99hIntrSsq/if0UWyGVkmrRPKA1wCeP+UCr1B9Uw=="], "@modelcontextprotocol/sdk": ["@modelcontextprotocol/sdk@1.29.0", "", { "dependencies": { "@hono/node-server": "^1.19.9", "ajv": "^8.17.1", "ajv-formats": "^3.0.1", "content-type": "^1.0.5", "cors": "^2.8.5", "cross-spawn": "^7.0.5", "eventsource": "^3.0.2", "eventsource-parser": "^3.0.0", "express": "^5.2.1", "express-rate-limit": "^8.2.1", "hono": "^4.11.4", "jose": "^6.1.3", "json-schema-typed": "^8.0.2", "pkce-challenge": "^5.0.0", "raw-body": "^3.0.0", "zod": "^3.25 || ^4.0", "zod-to-json-schema": "^3.25.1" }, "peerDependencies": { "@cfworker/json-schema": "^4.1.1" }, "optionalPeers": ["@cfworker/json-schema"] }, "sha512-zo37mZA9hJWpULgkRpowewez1y6ML5GsXJPY8FI0tBBCd77HEvza4jDqRKOXgHNn867PVGCyTdzqpz0izu5ZjQ=="], diff --git a/package.json b/package.json index 4c048712..7c9ad861 100644 --- a/package.json +++ b/package.json @@ -33,6 +33,7 @@ "@anthropic-ai/sdk": "^0.30.0", "@aws-sdk/client-s3": "^3.1028.0", "@electric-sql/pglite": "^0.4.4", + "@google/generative-ai": "^0.24.1", "@modelcontextprotocol/sdk": "^1.0.0", "gray-matter": "^4.0.3", "openai": "^4.0.0",