diff --git a/.env.example b/.env.example index 0dc93bf..825964c 100644 --- a/.env.example +++ b/.env.example @@ -40,9 +40,9 @@ OLLAMA_MODEL=nomic-embed-text # Google AI (required if EMBEDDING_PROVIDER=google) # Get from: https://aistudio.google.com/app/apikey -# Model: text-embedding-004 (768 dimensions) → setup-db-google.sql +# Model: gemini-embedding-2-preview (3072 dimensions) → setup-db-google.sql # GOOGLE_AI_API_KEY=your-google-ai-api-key -# GOOGLE_EMBEDDING_MODEL=text-embedding-004 +# GOOGLE_EMBEDDING_MODEL=gemini-embedding-2-preview # OAuth (optional - enables OAuth 2.0 for Claude/ChatGPT mobile apps) # All four required vars must be set together to enable OAuth. Bearer token auth still works alongside. @@ -75,11 +75,11 @@ ENABLE_MEMORY_EXTRACTION=false ANTHROPIC_API_KEY=sk-ant-your-anthropic-api-key # Extraction Model (fast, cheap model recommended) -# Options: claude-haiku-4-5-20250501, claude-sonnet-4-6-20250514, etc. -EXTRACTION_MODEL=claude-haiku-4-5-20250501 +# Options: claude-haiku-4-5-20251001, claude-sonnet-4-6, etc. +EXTRACTION_MODEL=claude-haiku-4-5-20251001 # Insight Model (used for insight synthesis, benefits from capable reasoning) -# INSIGHT_MODEL=claude-sonnet-4-6-20250514 +# INSIGHT_MODEL=claude-sonnet-4-6 # Response Format # true: Compact responses (40-60% fewer tokens, short keys like "n", "t", "o") diff --git a/CLAUDE.md b/CLAUDE.md index 7cc2bbe..0f0c5b4 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -34,7 +34,7 @@ Claude Code reference for Textrawl. - **Knowledge search/documents**: one of - OpenAI 1536d: `scripts/setup-db.sql` - - Google AI 768d: `scripts/setup-db-google.sql` + - Google AI 3072d: `scripts/setup-db-google.sql` - Ollama 1024d: `scripts/setup-db-ollama.sql` - Ollama 768d: `scripts/setup-db-ollama-v2.sql` - **Memory graph**: @@ -46,7 +46,7 @@ Claude Code reference for Textrawl. - Ollama 768d: `scripts/setup-db-conversation-ollama-v2.sql` - **Insights**: - OpenAI: `scripts/setup-db-insights.sql` - - Google AI 768d: `scripts/setup-db-insights-google.sql` + - Google AI 3072d: `scripts/setup-db-insights-google.sql` - Ollama 1024d: `scripts/setup-db-insights-ollama.sql` - Ollama 768d: `scripts/setup-db-insights-ollama-v2.sql` - **Security hardening**: `scripts/security-rls.sql` (+ `scripts/security-rls-memory.sql`) diff --git a/README.md b/README.md index d4b4817..d65f529 100644 --- a/README.md +++ b/README.md @@ -176,8 +176,8 @@ pnpm upload -- ./converted/ | `OPENAI_API_KEY` | If OpenAI | For text-embedding-3-small (1536d) | | `OLLAMA_BASE_URL` | If Ollama | Default: `http://localhost:11434` | | `OLLAMA_MODEL` | If Ollama | Default: `nomic-embed-text` | -| `GOOGLE_AI_API_KEY` | If Google | For text-embedding-004 (768d) | -| `GOOGLE_EMBEDDING_MODEL` | If Google | Default: `text-embedding-004` | +| `GOOGLE_AI_API_KEY` | If Google | For gemini-embedding-2-preview (3072d) | +| `GOOGLE_EMBEDDING_MODEL` | If Google | Default: `gemini-embedding-2-preview` | | `API_BEARER_TOKEN` | Prod only | Min 32 chars (`openssl rand -hex 32`) | | `PORT` | No | Default: 3000 | | `LOG_LEVEL` | No | debug, info, warn, error | @@ -187,8 +187,8 @@ pnpm upload -- ./converted/ | `ENABLE_INSIGHTS` | No | Enable proactive insight tools (default: true) | | `ENABLE_MEMORY_EXTRACTION` | No | Enable LLM-based memory extraction (default: false) | | `ANTHROPIC_API_KEY` | If extraction | Required for `extract_memories` tool | -| `EXTRACTION_MODEL` | No | Model for extraction (default: claude-haiku-4-5-20250501) | -| `INSIGHT_MODEL` | No | Model for insight synthesis (default: claude-sonnet-4-6-20250514) | +| `EXTRACTION_MODEL` | No | Model for extraction (default: claude-haiku-4-5-20251001) | +| `INSIGHT_MODEL` | No | Model for insight synthesis (default: claude-sonnet-4-6) | | `COMPACT_RESPONSES` | No | Token-efficient responses (default: true) | | `CHUNKING_MODE` | No | `fixed` (default) or `semantic` (embedding-based splits) | | `SEMANTIC_SIMILARITY_THRESHOLD` | No | Semantic split sensitivity 0–1 (default: 0.5) | @@ -234,7 +234,7 @@ Enable with `ENABLE_CONVERSATIONS=true` (default). Requires running one of the c - `scripts/setup-db-conversation.sql` (OpenAI embeddings, 1536d) - `scripts/setup-db-conversation-ollama.sql` (Ollama v1 - nomic-embed-text, 1024d) - `scripts/setup-db-conversation-ollama-v2.sql` (Ollama v2 - nomic-embed-text-v2-moe, 768d) -- `scripts/setup-db-conversation-google.sql` (Google AI - text-embedding-004, 768d) +- `scripts/setup-db-conversation-google.sql` (Google AI - gemini-embedding-2-preview, 3072d) | Tool | Description | |------|-------------| @@ -395,7 +395,7 @@ OLLAMA_MODEL=nomic-embed-text **Supported Ollama models:** `nomic-embed-text` (1024d), `nomic-embed-text-v2-moe` (768d, recommended for new installs), `mxbai-embed-large` (1024d) -> **Note:** Each provider uses different embedding dimensions: OpenAI 1536d, Ollama 1024d (or 768d for v2-moe), Google AI 768d. Use the matching schema: `setup-db.sql` (OpenAI), `setup-db-ollama.sql` (Ollama 1024d), `setup-db-ollama-v2.sql` (Ollama 768d), or `setup-db-google.sql` (Google AI). You cannot mix providers without re-embedding all documents. +> **Note:** Each provider uses different embedding dimensions: OpenAI 1536d, Ollama 1024d (or 768d for v2-moe), Google AI 3072d. Use the matching schema: `setup-db.sql` (OpenAI), `setup-db-ollama.sql` (Ollama 1024d), `setup-db-ollama-v2.sql` (Ollama 768d), or `setup-db-google.sql` (Google AI). You cannot mix providers without re-embedding all documents. ## Troubleshooting diff --git a/scripts/setup-db-conversation-google.sql b/scripts/setup-db-conversation-google.sql index 227597b..5781689 100644 --- a/scripts/setup-db-conversation-google.sql +++ b/scripts/setup-db-conversation-google.sql @@ -1,10 +1,55 @@ -- Textrawl Conversation Memory Schema (Google AI Version) --- Use this when using Google AI embeddings with 768 dimensions (text-embedding-004) +-- Use this when using Google AI embeddings with 3072 dimensions (gemini-embedding-2-preview) -- For OpenAI users: use setup-db-conversation.sql -- For Google AI users: use this file -- For Ollama v1 users: use setup-db-conversation-ollama.sql -- Run this in Supabase SQL Editor after setting up the base schema and memory schema +-- ============================================ +-- Migration: resize embedding columns to VECTOR(3072) +-- ============================================ +-- Existing installs (e.g. from text-embedding-004 with VECTOR(768), or any other dimension) +-- won't have their embedding columns updated by the CREATE TABLE IF NOT EXISTS below. +-- This block detects a mismatched dimension and recreates the columns. +-- NOTE: Switching embedding models means the old vectors are in a different vector space +-- and cannot be meaningfully resized. Old embeddings are dropped; re-embedding is required. +DO $$ +BEGIN + IF EXISTS ( + SELECT 1 FROM pg_attribute a + JOIN pg_class c ON c.oid = a.attrelid + JOIN pg_namespace n ON n.oid = c.relnamespace + WHERE n.nspname = 'public' + AND c.relname = 'conversation_sessions' + AND a.attname = 'summary_embedding' + AND a.atttypmod <> 3072 + AND a.attnum > 0 + AND NOT a.attisdropped + ) THEN + DROP INDEX IF EXISTS conversation_sessions_embedding_idx; + ALTER TABLE conversation_sessions DROP COLUMN summary_embedding; + ALTER TABLE conversation_sessions ADD COLUMN summary_embedding VECTOR(3072); + RAISE NOTICE 'Resized conversation_sessions.summary_embedding to VECTOR(3072). Re-embedding required.'; + END IF; + + IF EXISTS ( + SELECT 1 FROM pg_attribute a + JOIN pg_class c ON c.oid = a.attrelid + JOIN pg_namespace n ON n.oid = c.relnamespace + WHERE n.nspname = 'public' + AND c.relname = 'conversation_turns' + AND a.attname = 'embedding' + AND a.atttypmod <> 3072 + AND a.attnum > 0 + AND NOT a.attisdropped + ) THEN + DROP INDEX IF EXISTS conversation_turns_embedding_idx; + ALTER TABLE conversation_turns DROP COLUMN embedding; + ALTER TABLE conversation_turns ADD COLUMN embedding VECTOR(3072); + RAISE NOTICE 'Resized conversation_turns.embedding to VECTOR(3072). Re-embedding required.'; + END IF; +END $$; + -- ============================================ -- Conversation Sessions -- ============================================ @@ -13,7 +58,7 @@ CREATE TABLE IF NOT EXISTS conversation_sessions ( session_key TEXT UNIQUE, title TEXT, summary TEXT, - summary_embedding VECTOR(768), + summary_embedding VECTOR(3072), metadata JSONB DEFAULT '{}', turn_count INTEGER DEFAULT 0, last_activity TIMESTAMPTZ DEFAULT NOW(), @@ -28,7 +73,7 @@ CREATE TABLE IF NOT EXISTS conversation_turns ( session_id UUID NOT NULL REFERENCES conversation_sessions(id) ON DELETE CASCADE, role TEXT NOT NULL CHECK (role IN ('user', 'assistant', 'system')), content TEXT NOT NULL, - embedding VECTOR(768), + embedding VECTOR(3072), turn_index INTEGER NOT NULL, token_count INTEGER, metadata JSONB DEFAULT '{}', @@ -102,14 +147,14 @@ CREATE TRIGGER conversation_turns_delete_activity FOR EACH ROW EXECUTE FUNCTION update_session_activity_on_delete(); -- ============================================ --- Search Functions (768 dimensions) +-- Search Functions (3072 dimensions) -- ============================================ -DROP FUNCTION IF EXISTS conversation_semantic_search(VECTOR(768), INT); -DROP FUNCTION IF EXISTS conversation_hybrid_search(TEXT, VECTOR(768), INT, FLOAT, FLOAT, INT); -DROP FUNCTION IF EXISTS conversation_turn_search(TEXT, VECTOR(768), INT, FLOAT, FLOAT, INT, UUID); +DROP FUNCTION IF EXISTS conversation_semantic_search(VECTOR(3072), INT); +DROP FUNCTION IF EXISTS conversation_hybrid_search(TEXT, VECTOR(3072), INT, FLOAT, FLOAT, INT); +DROP FUNCTION IF EXISTS conversation_turn_search(TEXT, VECTOR(3072), INT, FLOAT, FLOAT, INT, UUID); CREATE OR REPLACE FUNCTION public.conversation_semantic_search( - query_embedding VECTOR(768), + query_embedding VECTOR(3072), match_count INT DEFAULT 10 ) RETURNS TABLE ( @@ -140,7 +185,7 @@ $$; CREATE OR REPLACE FUNCTION public.conversation_hybrid_search( query_text TEXT, - query_embedding VECTOR(768), + query_embedding VECTOR(3072), match_count INT DEFAULT 10, full_text_weight FLOAT DEFAULT 1.0, semantic_weight FLOAT DEFAULT 1.0, @@ -183,7 +228,7 @@ $$; CREATE OR REPLACE FUNCTION public.conversation_turn_search( query_text TEXT, - query_embedding VECTOR(768), + query_embedding VECTOR(3072), match_count INT DEFAULT 20, full_text_weight FLOAT DEFAULT 1.0, semantic_weight FLOAT DEFAULT 1.0, diff --git a/scripts/setup-db-google.sql b/scripts/setup-db-google.sql index a271c31..12af9c6 100644 --- a/scripts/setup-db-google.sql +++ b/scripts/setup-db-google.sql @@ -1,7 +1,22 @@ -- Textrawl Database Schema (Google AI Version) --- Use this when using text-embedding-004 (768 dimensions) +-- Use this when using gemini-embedding-2-preview (3072 dimensions) -- Run this in Supabase SQL Editor after creating your project -- IMPORTANT: After running this schema, run scripts/security-rls.sql to enable Row Level Security +-- +-- BREAKING CHANGE from text-embedding-004 (vector(768)): +-- Switching to gemini-embedding-2-preview changes embedding dimensions from 768 to 3072. +-- These are different vector spaces; old embeddings cannot be resized or reused. +-- +-- Option A — fresh database: run this file as-is, then security-rls.sql. +-- Option B — existing database: drop the embedding column, recreate it as vector(3072), +-- then trigger re-embedding for all documents. Example: +-- DROP INDEX IF EXISTS chunks_embedding_idx; +-- ALTER TABLE chunks DROP COLUMN embedding; +-- ALTER TABLE chunks ADD COLUMN embedding vector(3072); +-- Then re-run security-rls.sql and re-upload/re-embed all documents. +-- +-- Verify EMBEDDING_PROVIDER=google and GOOGLE_EMBEDDING_MODEL=gemini-embedding-2-preview +-- are set before applying this schema. -- Enable required extensions create extension if not exists vector with schema extensions; @@ -24,7 +39,7 @@ create table if not exists documents ( updated_at timestamptz default now() ); --- Chunks table with embeddings (768 dimensions for text-embedding-004) +-- Chunks table with embeddings (3072 dimensions for gemini-embedding-2-preview) create table if not exists chunks ( id uuid primary key default gen_random_uuid(), document_id uuid not null references documents(id) on delete cascade, @@ -32,7 +47,7 @@ create table if not exists chunks ( chunk_index integer not null, start_offset integer, end_offset integer, - embedding vector(768), -- text-embedding-004 dimension (Matryoshka: supports 768, 512, 256) + embedding vector(3072), -- gemini-embedding-2-preview dimension (Matryoshka: supports 3072, 1536, 768) metadata jsonb default '{}', created_at timestamptz default now() ); @@ -52,7 +67,7 @@ create index if not exists chunks_embedding_idx on chunks -- Hybrid search function using Reciprocal Rank Fusion (RRF) create or replace function public.hybrid_search( query_text text, - query_embedding vector(768), + query_embedding vector(3072), match_count int default 10, full_text_weight float default 1.0, semantic_weight float default 1.0, @@ -114,7 +129,7 @@ $$; -- Semantic-only search function (when full-text query is empty) create or replace function public.semantic_search( - query_embedding vector(768), + query_embedding vector(3072), match_count int default 10 ) returns table ( diff --git a/scripts/setup-db-insights-google.sql b/scripts/setup-db-insights-google.sql index dcf84d6..c74c632 100644 --- a/scripts/setup-db-insights-google.sql +++ b/scripts/setup-db-insights-google.sql @@ -40,7 +40,7 @@ CREATE TABLE IF NOT EXISTS proactive_insights ( summary TEXT NOT NULL, evidence JSONB NOT NULL DEFAULT '[]', -- array of {chunkId, documentId, content, score} entities JSONB DEFAULT '[]', -- related entity names - embedding vector(768), -- for semantic retrieval (Google text-embedding-004 768d) + embedding vector(3072), -- for semantic retrieval (Google gemini-embedding-2-preview 3072d) batch_id UUID, -- groups insights from the same scan status TEXT NOT NULL DEFAULT 'new' CHECK (status IN ('new', 'seen', 'dismissed')), created_at TIMESTAMPTZ DEFAULT now() @@ -101,7 +101,7 @@ $$; -- 5. Semantic search over insights -- --------------------------------------------------------------------------- CREATE OR REPLACE FUNCTION public.insight_semantic_search( - query_embedding vector(768), + query_embedding vector(3072), match_count INTEGER DEFAULT 10, status_filter TEXT DEFAULT NULL ) diff --git a/src/services/audio-processor.ts b/src/services/audio-processor.ts index 9c6d81a..6af51b8 100644 --- a/src/services/audio-processor.ts +++ b/src/services/audio-processor.ts @@ -47,7 +47,7 @@ export async function transcribeAudio(buffer: Buffer, mimeType: string): Promise await writeFile(tempPath, buffer); const transcription = await client.audio.transcriptions.create({ - model: 'whisper-1', + model: 'gpt-4o-mini-transcribe', file: createReadStream(tempPath), }); diff --git a/src/services/embeddings.ts b/src/services/embeddings.ts index 7f7d7ce..7e7e62f 100644 --- a/src/services/embeddings.ts +++ b/src/services/embeddings.ts @@ -72,9 +72,9 @@ function getOllamaDimensions(model: string): number { } // Google AI constants -const GOOGLE_DIMENSIONS = 768; +const GOOGLE_DIMENSIONS = 3072; const GOOGLE_MAX_BATCH_SIZE = 100; -const GOOGLE_MAX_INPUT_CHARS = 10_000; // ~2500 tokens conservative limit +const GOOGLE_MAX_INPUT_CHARS = 30_000; // gemini-embedding-2-preview: 8192 token context (~4 chars/token) // Ollama API response type interface OllamaEmbedResponse { diff --git a/src/utils/config.ts b/src/utils/config.ts index 0b8f0c3..12487f9 100644 --- a/src/utils/config.ts +++ b/src/utils/config.ts @@ -35,7 +35,7 @@ const envSchema = z.object({ // Google AI GOOGLE_AI_API_KEY: z.string().optional(), - GOOGLE_EMBEDDING_MODEL: z.string().default('text-embedding-004'), + GOOGLE_EMBEDDING_MODEL: z.string().default('gemini-embedding-2-preview'), // Ollama OLLAMA_BASE_URL: z.string().url().default('http://localhost:11434'), @@ -67,10 +67,10 @@ const envSchema = z.object({ ANTHROPIC_API_KEY: z.string().startsWith('sk-ant-').optional(), // Model for memory extraction (fast, cheap model recommended) - EXTRACTION_MODEL: z.string().default('claude-haiku-4-5-20250501'), + EXTRACTION_MODEL: z.string().default('claude-haiku-4-5-20251001'), // Model for insight synthesis (benefits from more capable reasoning) - INSIGHT_MODEL: z.string().default('claude-sonnet-4-6-20250514'), + INSIGHT_MODEL: z.string().default('claude-sonnet-4-6'), // Response format - compact saves 40-60% tokens but uses short keys COMPACT_RESPONSES: z