Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions bun.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
"@anthropic-ai/sdk": "^0.30.0",
"@aws-sdk/client-s3": "^3.1028.0",
"@electric-sql/pglite": "^0.4.4",
"@google/generative-ai": "^0.24.1",
"@modelcontextprotocol/sdk": "^1.0.0",
"gray-matter": "^4.0.3",
"openai": "^4.0.0",
Expand Down
124 changes: 84 additions & 40 deletions src/core/embedding.ts
Original file line number Diff line number Diff line change
@@ -1,94 +1,138 @@
/**
* Embedding Service
* Ported from production Ruby implementation (embedding_service.rb, 190 LOC)
* Embedding Service — multi-provider
*
* OpenAI text-embedding-3-large at 1536 dimensions.
* Provider priority:
* 1. Gemini (GEMINI_API_KEY) — gemini-embedding-001, 1536 dims, free tier
* 2. OpenAI (OPENAI_API_KEY) — text-embedding-3-large, 1536 dims
*
* Both produce 1536-dim vectors so the DB schema is unchanged.
* Retry with exponential backoff (4s base, 120s cap, 5 retries).
* 8000 character input truncation.
*/

import OpenAI from 'openai';
import { GoogleGenerativeAI } from '@google/generative-ai';

const MODEL = 'text-embedding-3-large';
const DIMENSIONS = 1536;
const MAX_CHARS = 8000;
const MAX_RETRIES = 5;
const BASE_DELAY_MS = 4000;
const MAX_DELAY_MS = 120000;
const BATCH_SIZE = 100;

let client: OpenAI | null = null;

function getClient(): OpenAI {
if (!client) {
client = new OpenAI();
}
return client;
// ─── Provider detection ────────────────────────────────────────────────────
function getProvider(): 'gemini' | 'openai' {
if (process.env.GEMINI_API_KEY) return 'gemini';
if (process.env.OPENAI_API_KEY) return 'openai';
throw new Error(
'No embedding API key found.\n' +
'Set GEMINI_API_KEY (free tier) or OPENAI_API_KEY.\n' +
'Get Gemini key: https://aistudio.google.com/apikey'
);
}

export async function embed(text: string): Promise<Float32Array> {
const truncated = text.slice(0, MAX_CHARS);
const result = await embedBatch([truncated]);
return result[0];
// ─── Gemini ────────────────────────────────────────────────────────────────
let geminiClient: GoogleGenerativeAI | null = null;

function getGeminiClient(): GoogleGenerativeAI {
if (!geminiClient) {
geminiClient = new GoogleGenerativeAI(process.env.GEMINI_API_KEY!);
}
return geminiClient;
}

export async function embedBatch(texts: string[]): Promise<Float32Array[]> {
const truncated = texts.map(t => t.slice(0, MAX_CHARS));
async function embedWithGemini(texts: string[]): Promise<Float32Array[]> {
const client = getGeminiClient();
const model = client.getGenerativeModel({ model: 'gemini-embedding-001' });
const results: Float32Array[] = [];

// Process in batches of BATCH_SIZE
for (let i = 0; i < truncated.length; i += BATCH_SIZE) {
const batch = truncated.slice(i, i + BATCH_SIZE);
const batchResults = await embedBatchWithRetry(batch);
results.push(...batchResults);
// Gemini doesn't support batch embed, process one by one
for (const text of texts) {
for (let attempt = 0; attempt < MAX_RETRIES; attempt++) {
try {
const result = await model.embedContent({
content: { role: 'user', parts: [{ text }] },
taskType: 'RETRIEVAL_DOCUMENT' as any,
});
// gemini-embedding-001 returns 3072 dims by default; slice to 1536
const values = result.embedding.values.slice(0, DIMENSIONS);
results.push(new Float32Array(values));
break;
} catch (e: unknown) {
if (attempt === MAX_RETRIES - 1) throw e;
await sleep(exponentialDelay(attempt));
}
}
}

return results;
}

async function embedBatchWithRetry(texts: string[]): Promise<Float32Array[]> {
// ─── OpenAI ───────────────────────────────────────────────────────────────
let openaiClient: OpenAI | null = null;

function getOpenAIClient(): OpenAI {
if (!openaiClient) {
openaiClient = new OpenAI();
}
return openaiClient;
}

async function embedWithOpenAI(texts: string[]): Promise<Float32Array[]> {
for (let attempt = 0; attempt < MAX_RETRIES; attempt++) {
try {
const response = await getClient().embeddings.create({
model: MODEL,
const response = await getOpenAIClient().embeddings.create({
model: 'text-embedding-3-large',
input: texts,
dimensions: DIMENSIONS,
});

// Sort by index to maintain order
const sorted = response.data.sort((a, b) => a.index - b.index);
return sorted.map(d => new Float32Array(d.embedding));
} catch (e: unknown) {
if (attempt === MAX_RETRIES - 1) throw e;

// Check for rate limit with Retry-After header
let delay = exponentialDelay(attempt);

if (e instanceof OpenAI.APIError && e.status === 429) {
const retryAfter = e.headers?.['retry-after'];
if (retryAfter) {
const parsed = parseInt(retryAfter, 10);
if (!isNaN(parsed)) {
delay = parsed * 1000;
}
if (!isNaN(parsed)) delay = parsed * 1000;
}
}

await sleep(delay);
}
}
throw new Error('OpenAI embedding failed after all retries');
}

// ─── Public API ───────────────────────────────────────────────────────────
export async function embed(text: string): Promise<Float32Array> {
const truncated = text.slice(0, MAX_CHARS);
const result = await embedBatch([truncated]);
return result[0];
}

export async function embedBatch(texts: string[]): Promise<Float32Array[]> {
const truncated = texts.map(t => t.slice(0, MAX_CHARS));
const provider = getProvider();
const results: Float32Array[] = [];

// Should not reach here
throw new Error('Embedding failed after all retries');
for (let i = 0; i < truncated.length; i += BATCH_SIZE) {
const batch = truncated.slice(i, i + BATCH_SIZE);
const batchResults = provider === 'gemini'
? await embedWithGemini(batch)
: await embedWithOpenAI(batch);
results.push(...batchResults);
}
return results;
}

// ─── Helpers ──────────────────────────────────────────────────────────────
function exponentialDelay(attempt: number): number {
const delay = BASE_DELAY_MS * Math.pow(2, attempt);
return Math.min(delay, MAX_DELAY_MS);
return Math.min(BASE_DELAY_MS * Math.pow(2, attempt), MAX_DELAY_MS);
}

function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}

export { MODEL as EMBEDDING_MODEL, DIMENSIONS as EMBEDDING_DIMENSIONS };
export const EMBEDDING_MODEL = 'gemini-embedding-001 / text-embedding-3-large';
export const EMBEDDING_DIMENSIONS = DIMENSIONS;
4 changes: 2 additions & 2 deletions src/core/search/hybrid.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ export async function hybridSearch(
// Run keyword search (always available, no API key needed)
const keywordResults = await engine.searchKeyword(query, { limit: limit * 2 });

// Skip vector search entirely if no OpenAI key is configured
if (!process.env.OPENAI_API_KEY) {
// Skip vector search entirely if no embedding key is configured
if (!process.env.OPENAI_API_KEY && !process.env.GEMINI_API_KEY) {
return dedupResults(keywordResults).slice(0, limit);
}

Expand Down