garrytan · flychicken067 · Apr 12, 2026 · Apr 13, 2026
diff --git a/bun.lock b/bun.lock
diff --git a/package.json b/package.json
@@ -33,6 +33,7 @@
     "@anthropic-ai/sdk": "^0.30.0",
     "@aws-sdk/client-s3": "^3.1028.0",
     "@electric-sql/pglite": "^0.4.4",
+    "@google/generative-ai": "^0.24.1",
     "@modelcontextprotocol/sdk": "^1.0.0",
     "gray-matter": "^4.0.3",
     "openai": "^4.0.0",

diff --git a/src/core/embedding.ts b/src/core/embedding.ts
@@ -1,94 +1,138 @@
 /**
- * Embedding Service
- * Ported from production Ruby implementation (embedding_service.rb, 190 LOC)
+ * Embedding Service — multi-provider
  *
- * OpenAI text-embedding-3-large at 1536 dimensions.
+ * Provider priority:
+ *   1. Gemini (GEMINI_API_KEY)  — gemini-embedding-001, 1536 dims, free tier
+ *   2. OpenAI (OPENAI_API_KEY)  — text-embedding-3-large, 1536 dims
+ *
+ * Both produce 1536-dim vectors so the DB schema is unchanged.
  * Retry with exponential backoff (4s base, 120s cap, 5 retries).
  * 8000 character input truncation.
  */
 
 import OpenAI from 'openai';
+import { GoogleGenerativeAI } from '@google/generative-ai';
 
-const MODEL = 'text-embedding-3-large';
 const DIMENSIONS = 1536;
 const MAX_CHARS = 8000;
 const MAX_RETRIES = 5;
 const BASE_DELAY_MS = 4000;
 const MAX_DELAY_MS = 120000;
 const BATCH_SIZE = 100;
 
-let client: OpenAI | null = null;
-
-function getClient(): OpenAI {
-  if (!client) {
-    client = new OpenAI();
-  }
-  return client;
+// ─── Provider detection ────────────────────────────────────────────────────
+function getProvider(): 'gemini' | 'openai' {
+  if (process.env.GEMINI_API_KEY) return 'gemini';
+  if (process.env.OPENAI_API_KEY) return 'openai';
+  throw new Error(
+    'No embedding API key found.\n' +
+    'Set GEMINI_API_KEY (free tier) or OPENAI_API_KEY.\n' +
+    'Get Gemini key: https://aistudio.google.com/apikey'
+  );
 }
 
-export async function embed(text: string): Promise<Float32Array> {
-  const truncated = text.slice(0, MAX_CHARS);
-  const result = await embedBatch([truncated]);
-  return result[0];
+// ─── Gemini ────────────────────────────────────────────────────────────────
+let geminiClient: GoogleGenerativeAI | null = null;
+
+function getGeminiClient(): GoogleGenerativeAI {
+  if (!geminiClient) {
+    geminiClient = new GoogleGenerativeAI(process.env.GEMINI_API_KEY!);
+  }
+  return geminiClient;
 }
 
-export async function embedBatch(texts: string[]): Promise<Float32Array[]> {
-  const truncated = texts.map(t => t.slice(0, MAX_CHARS));
+async function embedWithGemini(texts: string[]): Promise<Float32Array[]> {
+  const client = getGeminiClient();
+  const model = client.getGenerativeModel({ model: 'gemini-embedding-001' });
   const results: Float32Array[] = [];
 
-  // Process in batches of BATCH_SIZE
-  for (let i = 0; i < truncated.length; i += BATCH_SIZE) {
-    const batch = truncated.slice(i, i + BATCH_SIZE);
-    const batchResults = await embedBatchWithRetry(batch);
-    results.push(...batchResults);
+  // Gemini doesn't support batch embed, process one by one
+  for (const text of texts) {
+    for (let attempt = 0; attempt < MAX_RETRIES; attempt++) {
+      try {
+        const result = await model.embedContent({
+          content: { role: 'user', parts: [{ text }] },
+          taskType: 'RETRIEVAL_DOCUMENT' as any,
+        });
+        // gemini-embedding-001 returns 3072 dims by default; slice to 1536
+        const values = result.embedding.values.slice(0, DIMENSIONS);
+        results.push(new Float32Array(values));
+        break;
+      } catch (e: unknown) {
+        if (attempt === MAX_RETRIES - 1) throw e;
+        await sleep(exponentialDelay(attempt));
+      }
+    }
   }
-
   return results;
 }
 
-async function embedBatchWithRetry(texts: string[]): Promise<Float32Array[]> {
+// ─── OpenAI ───────────────────────────────────────────────────────────────
+let openaiClient: OpenAI | null = null;
+
+function getOpenAIClient(): OpenAI {
+  if (!openaiClient) {
+    openaiClient = new OpenAI();
+  }
+  return openaiClient;
+}
+
+async function embedWithOpenAI(texts: string[]): Promise<Float32Array[]> {
   for (let attempt = 0; attempt < MAX_RETRIES; attempt++) {
     try {
-      const response = await getClient().embeddings.create({
-        model: MODEL,
+      const response = await getOpenAIClient().embeddings.create({
+        model: 'text-embedding-3-large',
         input: texts,
         dimensions: DIMENSIONS,
       });
-
-      // Sort by index to maintain order
       const sorted = response.data.sort((a, b) => a.index - b.index);
       return sorted.map(d => new Float32Array(d.embedding));
     } catch (e: unknown) {
       if (attempt === MAX_RETRIES - 1) throw e;
-
-      // Check for rate limit with Retry-After header
       let delay = exponentialDelay(attempt);
-
       if (e instanceof OpenAI.APIError && e.status === 429) {
         const retryAfter = e.headers?.['retry-after'];
         if (retryAfter) {
           const parsed = parseInt(retryAfter, 10);
-          if (!isNaN(parsed)) {
-            delay = parsed * 1000;
-          }
+          if (!isNaN(parsed)) delay = parsed * 1000;
         }
       }
-
       await sleep(delay);
     }
   }
+  throw new Error('OpenAI embedding failed after all retries');
+}
+
+// ─── Public API ───────────────────────────────────────────────────────────
+export async function embed(text: string): Promise<Float32Array> {
+  const truncated = text.slice(0, MAX_CHARS);
+  const result = await embedBatch([truncated]);
+  return result[0];
+}
+
+export async function embedBatch(texts: string[]): Promise<Float32Array[]> {
+  const truncated = texts.map(t => t.slice(0, MAX_CHARS));
+  const provider = getProvider();
+  const results: Float32Array[] = [];
 
-  // Should not reach here
-  throw new Error('Embedding failed after all retries');
+  for (let i = 0; i < truncated.length; i += BATCH_SIZE) {
+    const batch = truncated.slice(i, i + BATCH_SIZE);
+    const batchResults = provider === 'gemini'
+      ? await embedWithGemini(batch)
+      : await embedWithOpenAI(batch);
+    results.push(...batchResults);
+  }
+  return results;
 }
 
+// ─── Helpers ──────────────────────────────────────────────────────────────
 function exponentialDelay(attempt: number): number {
-  const delay = BASE_DELAY_MS * Math.pow(2, attempt);
-  return Math.min(delay, MAX_DELAY_MS);
+  return Math.min(BASE_DELAY_MS * Math.pow(2, attempt), MAX_DELAY_MS);
 }
 
 function sleep(ms: number): Promise<void> {
   return new Promise(resolve => setTimeout(resolve, ms));
 }
 
-export { MODEL as EMBEDDING_MODEL, DIMENSIONS as EMBEDDING_DIMENSIONS };
+export const EMBEDDING_MODEL = 'gemini-embedding-001 / text-embedding-3-large';
+export const EMBEDDING_DIMENSIONS = DIMENSIONS;
diff --git a/src/core/search/hybrid.ts b/src/core/search/hybrid.ts
@@ -28,8 +28,8 @@ export async function hybridSearch(
   // Run keyword search (always available, no API key needed)
   const keywordResults = await engine.searchKeyword(query, { limit: limit * 2 });
 
-  // Skip vector search entirely if no OpenAI key is configured
-  if (!process.env.OPENAI_API_KEY) {
+  // Skip vector search entirely if no embedding key is configured
+  if (!process.env.OPENAI_API_KEY && !process.env.GEMINI_API_KEY) {
     return dedupResults(keywordResults).slice(0, limit);
   }