jeffgreendesign · jeffgreendesign · May 1, 2026 · Apr 30, 2026 · Apr 30, 2026 · Apr 30, 2026
diff --git a/.env.example b/.env.example
@@ -40,9 +40,9 @@ OLLAMA_MODEL=nomic-embed-text
 
 # Google AI (required if EMBEDDING_PROVIDER=google)
 # Get from: https://aistudio.google.com/app/apikey
-# Model: text-embedding-004 (768 dimensions) → setup-db-google.sql
+# Model: gemini-embedding-2-preview (3072 dimensions) → setup-db-google.sql
 # GOOGLE_AI_API_KEY=your-google-ai-api-key
-# GOOGLE_EMBEDDING_MODEL=text-embedding-004
+# GOOGLE_EMBEDDING_MODEL=gemini-embedding-2-preview
 
 # OAuth (optional - enables OAuth 2.0 for Claude/ChatGPT mobile apps)
 # All four required vars must be set together to enable OAuth. Bearer token auth still works alongside.
@@ -75,11 +75,11 @@ ENABLE_MEMORY_EXTRACTION=false
 ANTHROPIC_API_KEY=sk-ant-your-anthropic-api-key
 
 # Extraction Model (fast, cheap model recommended)
-# Options: claude-haiku-4-5-20250501, claude-sonnet-4-6-20250514, etc.
-EXTRACTION_MODEL=claude-haiku-4-5-20250501
+# Options: claude-haiku-4-5-20251001, claude-sonnet-4-6, etc.
+EXTRACTION_MODEL=claude-haiku-4-5-20251001
 
 # Insight Model (used for insight synthesis, benefits from capable reasoning)
-# INSIGHT_MODEL=claude-sonnet-4-6-20250514
+# INSIGHT_MODEL=claude-sonnet-4-6
 
 # Response Format
 # true: Compact responses (40-60% fewer tokens, short keys like "n", "t", "o")

diff --git a/CLAUDE.md b/CLAUDE.md
@@ -34,7 +34,7 @@ Claude Code reference for Textrawl.
 
 - **Knowledge search/documents**: one of
   - OpenAI 1536d: `scripts/setup-db.sql`
-  - Google AI 768d: `scripts/setup-db-google.sql`
+  - Google AI 3072d: `scripts/setup-db-google.sql`
   - Ollama 1024d: `scripts/setup-db-ollama.sql`
   - Ollama 768d: `scripts/setup-db-ollama-v2.sql`
 - **Memory graph**:
@@ -46,7 +46,7 @@ Claude Code reference for Textrawl.
   - Ollama 768d: `scripts/setup-db-conversation-ollama-v2.sql`
 - **Insights**:
   - OpenAI: `scripts/setup-db-insights.sql`
-  - Google AI 768d: `scripts/setup-db-insights-google.sql`
+  - Google AI 3072d: `scripts/setup-db-insights-google.sql`
   - Ollama 1024d: `scripts/setup-db-insights-ollama.sql`
   - Ollama 768d: `scripts/setup-db-insights-ollama-v2.sql`
 - **Security hardening**: `scripts/security-rls.sql` (+ `scripts/security-rls-memory.sql`)

diff --git a/README.md b/README.md
@@ -176,8 +176,8 @@ pnpm upload -- ./converted/
 | `OPENAI_API_KEY` | If OpenAI | For text-embedding-3-small (1536d) |
 | `OLLAMA_BASE_URL` | If Ollama | Default: `http://localhost:11434` |
 | `OLLAMA_MODEL` | If Ollama | Default: `nomic-embed-text` |
-| `GOOGLE_AI_API_KEY` | If Google | For text-embedding-004 (768d) |
-| `GOOGLE_EMBEDDING_MODEL` | If Google | Default: `text-embedding-004` |
+| `GOOGLE_AI_API_KEY` | If Google | For gemini-embedding-2-preview (3072d) |
+| `GOOGLE_EMBEDDING_MODEL` | If Google | Default: `gemini-embedding-2-preview` |
 | `API_BEARER_TOKEN` | Prod only | Min 32 chars (`openssl rand -hex 32`) |
 | `PORT` | No | Default: 3000 |
 | `LOG_LEVEL` | No | debug, info, warn, error |
@@ -187,8 +187,8 @@ pnpm upload -- ./converted/
 | `ENABLE_INSIGHTS` | No | Enable proactive insight tools (default: true) |
 | `ENABLE_MEMORY_EXTRACTION` | No | Enable LLM-based memory extraction (default: false) |
 | `ANTHROPIC_API_KEY` | If extraction | Required for `extract_memories` tool |
-| `EXTRACTION_MODEL` | No | Model for extraction (default: claude-haiku-4-5-20250501) |
-| `INSIGHT_MODEL` | No | Model for insight synthesis (default: claude-sonnet-4-6-20250514) |
+| `EXTRACTION_MODEL` | No | Model for extraction (default: claude-haiku-4-5-20251001) |
+| `INSIGHT_MODEL` | No | Model for insight synthesis (default: claude-sonnet-4-6) |
 | `COMPACT_RESPONSES` | No | Token-efficient responses (default: true) |
 | `CHUNKING_MODE` | No | `fixed` (default) or `semantic` (embedding-based splits) |
 | `SEMANTIC_SIMILARITY_THRESHOLD` | No | Semantic split sensitivity 0–1 (default: 0.5) |
@@ -234,7 +234,7 @@ Enable with `ENABLE_CONVERSATIONS=true` (default). Requires running one of the c
 - `scripts/setup-db-conversation.sql` (OpenAI embeddings, 1536d)
 - `scripts/setup-db-conversation-ollama.sql` (Ollama v1 - nomic-embed-text, 1024d)
 - `scripts/setup-db-conversation-ollama-v2.sql` (Ollama v2 - nomic-embed-text-v2-moe, 768d)
-- `scripts/setup-db-conversation-google.sql` (Google AI - text-embedding-004, 768d)
+- `scripts/setup-db-conversation-google.sql` (Google AI - gemini-embedding-2-preview, 3072d)
 
 | Tool | Description |
 |------|-------------|
@@ -395,7 +395,7 @@ OLLAMA_MODEL=nomic-embed-text
 
 **Supported Ollama models:** `nomic-embed-text` (1024d), `nomic-embed-text-v2-moe` (768d, recommended for new installs), `mxbai-embed-large` (1024d)
 
-> **Note:** Each provider uses different embedding dimensions: OpenAI 1536d, Ollama 1024d (or 768d for v2-moe), Google AI 768d. Use the matching schema: `setup-db.sql` (OpenAI), `setup-db-ollama.sql` (Ollama 1024d), `setup-db-ollama-v2.sql` (Ollama 768d), or `setup-db-google.sql` (Google AI). You cannot mix providers without re-embedding all documents.
+> **Note:** Each provider uses different embedding dimensions: OpenAI 1536d, Ollama 1024d (or 768d for v2-moe), Google AI 3072d. Use the matching schema: `setup-db.sql` (OpenAI), `setup-db-ollama.sql` (Ollama 1024d), `setup-db-ollama-v2.sql` (Ollama 768d), or `setup-db-google.sql` (Google AI). You cannot mix providers without re-embedding all documents.
 
 ## Troubleshooting
 

diff --git a/scripts/setup-db-conversation-google.sql b/scripts/setup-db-conversation-google.sql
@@ -1,10 +1,55 @@
 -- Textrawl Conversation Memory Schema (Google AI Version)
--- Use this when using Google AI embeddings with 768 dimensions (text-embedding-004)
+-- Use this when using Google AI embeddings with 3072 dimensions (gemini-embedding-2-preview)
 -- For OpenAI users: use setup-db-conversation.sql
 -- For Google AI users: use this file
 -- For Ollama v1 users: use setup-db-conversation-ollama.sql
 -- Run this in Supabase SQL Editor after setting up the base schema and memory schema
 
+-- ============================================
+-- Migration: resize embedding columns to VECTOR(3072)
+-- ============================================
+-- Existing installs (e.g. from text-embedding-004 with VECTOR(768), or any other dimension)
+-- won't have their embedding columns updated by the CREATE TABLE IF NOT EXISTS below.
+-- This block detects a mismatched dimension and recreates the columns.
+-- NOTE: Switching embedding models means the old vectors are in a different vector space
+-- and cannot be meaningfully resized. Old embeddings are dropped; re-embedding is required.
+DO $$
+BEGIN
+  IF EXISTS (
+    SELECT 1 FROM pg_attribute a
+    JOIN pg_class c ON c.oid = a.attrelid
+    JOIN pg_namespace n ON n.oid = c.relnamespace
+    WHERE n.nspname = 'public'
+      AND c.relname = 'conversation_sessions'
+      AND a.attname = 'summary_embedding'
+      AND a.atttypmod <> 3072
+      AND a.attnum > 0
+      AND NOT a.attisdropped
+  ) THEN
+    DROP INDEX IF EXISTS conversation_sessions_embedding_idx;
+    ALTER TABLE conversation_sessions DROP COLUMN summary_embedding;
+    ALTER TABLE conversation_sessions ADD COLUMN summary_embedding VECTOR(3072);
+    RAISE NOTICE 'Resized conversation_sessions.summary_embedding to VECTOR(3072). Re-embedding required.';
+  END IF;
+
+  IF EXISTS (
+    SELECT 1 FROM pg_attribute a
+    JOIN pg_class c ON c.oid = a.attrelid
+    JOIN pg_namespace n ON n.oid = c.relnamespace
+    WHERE n.nspname = 'public'
+      AND c.relname = 'conversation_turns'
+      AND a.attname = 'embedding'
+      AND a.atttypmod <> 3072
+      AND a.attnum > 0
+      AND NOT a.attisdropped
+  ) THEN
+    DROP INDEX IF EXISTS conversation_turns_embedding_idx;
+    ALTER TABLE conversation_turns DROP COLUMN embedding;
+    ALTER TABLE conversation_turns ADD COLUMN embedding VECTOR(3072);
+    RAISE NOTICE 'Resized conversation_turns.embedding to VECTOR(3072). Re-embedding required.';
+  END IF;
+END $$;
+
 -- ============================================
 -- Conversation Sessions
 -- ============================================
@@ -13,7 +58,7 @@ CREATE TABLE IF NOT EXISTS conversation_sessions (
   session_key TEXT UNIQUE,
   title TEXT,
   summary TEXT,
-  summary_embedding VECTOR(768),
+  summary_embedding VECTOR(3072),
   metadata JSONB DEFAULT '{}',
   turn_count INTEGER DEFAULT 0,
   last_activity TIMESTAMPTZ DEFAULT NOW(),
@@ -28,7 +73,7 @@ CREATE TABLE IF NOT EXISTS conversation_turns (
   session_id UUID NOT NULL REFERENCES conversation_sessions(id) ON DELETE CASCADE,
   role TEXT NOT NULL CHECK (role IN ('user', 'assistant', 'system')),
   content TEXT NOT NULL,
-  embedding VECTOR(768),
+  embedding VECTOR(3072),
   turn_index INTEGER NOT NULL,
   token_count INTEGER,
   metadata JSONB DEFAULT '{}',
@@ -102,14 +147,14 @@ CREATE TRIGGER conversation_turns_delete_activity
   FOR EACH ROW EXECUTE FUNCTION update_session_activity_on_delete();
 
 -- ============================================
--- Search Functions (768 dimensions)
+-- Search Functions (3072 dimensions)
 -- ============================================
-DROP FUNCTION IF EXISTS conversation_semantic_search(VECTOR(768), INT);
-DROP FUNCTION IF EXISTS conversation_hybrid_search(TEXT, VECTOR(768), INT, FLOAT, FLOAT, INT);
-DROP FUNCTION IF EXISTS conversation_turn_search(TEXT, VECTOR(768), INT, FLOAT, FLOAT, INT, UUID);
+DROP FUNCTION IF EXISTS conversation_semantic_search(VECTOR(3072), INT);
+DROP FUNCTION IF EXISTS conversation_hybrid_search(TEXT, VECTOR(3072), INT, FLOAT, FLOAT, INT);
+DROP FUNCTION IF EXISTS conversation_turn_search(TEXT, VECTOR(3072), INT, FLOAT, FLOAT, INT, UUID);
 
 CREATE OR REPLACE FUNCTION public.conversation_semantic_search(
-  query_embedding VECTOR(768),
+  query_embedding VECTOR(3072),
   match_count INT DEFAULT 10
 )
 RETURNS TABLE (
@@ -140,7 +185,7 @@ $$;
 
 CREATE OR REPLACE FUNCTION public.conversation_hybrid_search(
   query_text TEXT,
-  query_embedding VECTOR(768),
+  query_embedding VECTOR(3072),
   match_count INT DEFAULT 10,
   full_text_weight FLOAT DEFAULT 1.0,
   semantic_weight FLOAT DEFAULT 1.0,
@@ -183,7 +228,7 @@ $$;
 
 CREATE OR REPLACE FUNCTION public.conversation_turn_search(
   query_text TEXT,
-  query_embedding VECTOR(768),
+  query_embedding VECTOR(3072),
   match_count INT DEFAULT 20,
   full_text_weight FLOAT DEFAULT 1.0,
   semantic_weight FLOAT DEFAULT 1.0,

diff --git a/scripts/setup-db-google.sql b/scripts/setup-db-google.sql
@@ -1,7 +1,22 @@
 -- Textrawl Database Schema (Google AI Version)
--- Use this when using text-embedding-004 (768 dimensions)
+-- Use this when using gemini-embedding-2-preview (3072 dimensions)
 -- Run this in Supabase SQL Editor after creating your project
 -- IMPORTANT: After running this schema, run scripts/security-rls.sql to enable Row Level Security
+--
+-- BREAKING CHANGE from text-embedding-004 (vector(768)):
+--   Switching to gemini-embedding-2-preview changes embedding dimensions from 768 to 3072.
+--   These are different vector spaces; old embeddings cannot be resized or reused.
+--
+--   Option A — fresh database: run this file as-is, then security-rls.sql.
+--   Option B — existing database: drop the embedding column, recreate it as vector(3072),
+--     then trigger re-embedding for all documents. Example:
+--       DROP INDEX IF EXISTS chunks_embedding_idx;
+--       ALTER TABLE chunks DROP COLUMN embedding;
+--       ALTER TABLE chunks ADD COLUMN embedding vector(3072);
+--     Then re-run security-rls.sql and re-upload/re-embed all documents.
+--
+-- Verify EMBEDDING_PROVIDER=google and GOOGLE_EMBEDDING_MODEL=gemini-embedding-2-preview
+-- are set before applying this schema.
 
 -- Enable required extensions
 create extension if not exists vector with schema extensions;
@@ -24,15 +39,15 @@ create table if not exists documents (
   updated_at timestamptz default now()
 );
 
--- Chunks table with embeddings (768 dimensions for text-embedding-004)
+-- Chunks table with embeddings (3072 dimensions for gemini-embedding-2-preview)
 create table if not exists chunks (
   id uuid primary key default gen_random_uuid(),
   document_id uuid not null references documents(id) on delete cascade,
   content text not null,
   chunk_index integer not null,
   start_offset integer,
   end_offset integer,
-  embedding vector(768), -- text-embedding-004 dimension (Matryoshka: supports 768, 512, 256)
+  embedding vector(3072), -- gemini-embedding-2-preview dimension (Matryoshka: supports 3072, 1536, 768)
   metadata jsonb default '{}',
   created_at timestamptz default now()
 );
@@ -52,7 +67,7 @@ create index if not exists chunks_embedding_idx on chunks
 -- Hybrid search function using Reciprocal Rank Fusion (RRF)
 create or replace function public.hybrid_search(
   query_text text,
-  query_embedding vector(768),
+  query_embedding vector(3072),
   match_count int default 10,
   full_text_weight float default 1.0,
   semantic_weight float default 1.0,
@@ -114,7 +129,7 @@ $$;
 
 -- Semantic-only search function (when full-text query is empty)
 create or replace function public.semantic_search(
-  query_embedding vector(768),
+  query_embedding vector(3072),
   match_count int default 10
 )
 returns table (

diff --git a/scripts/setup-db-insights-google.sql b/scripts/setup-db-insights-google.sql
@@ -40,7 +40,7 @@ CREATE TABLE IF NOT EXISTS proactive_insights (
   summary       TEXT NOT NULL,
   evidence      JSONB NOT NULL DEFAULT '[]',   -- array of {chunkId, documentId, content, score}
   entities      JSONB DEFAULT '[]',            -- related entity names
-  embedding     vector(768),                  -- for semantic retrieval (Google text-embedding-004 768d)
+  embedding     vector(3072),                  -- for semantic retrieval (Google gemini-embedding-2-preview 3072d)
   batch_id      UUID,                          -- groups insights from the same scan
   status        TEXT NOT NULL DEFAULT 'new' CHECK (status IN ('new', 'seen', 'dismissed')),
   created_at    TIMESTAMPTZ DEFAULT now()
@@ -101,7 +101,7 @@ $$;
 -- 5. Semantic search over insights
 -- ---------------------------------------------------------------------------
 CREATE OR REPLACE FUNCTION public.insight_semantic_search(
-  query_embedding vector(768),
+  query_embedding vector(3072),
   match_count INTEGER DEFAULT 10,
   status_filter TEXT DEFAULT NULL
 )

diff --git a/src/services/audio-processor.ts b/src/services/audio-processor.ts
@@ -47,7 +47,7 @@ export async function transcribeAudio(buffer: Buffer, mimeType: string): Promise
 		await writeFile(tempPath, buffer);
 
 		const transcription = await client.audio.transcriptions.create({
-			model: 'whisper-1',
+			model: 'gpt-4o-mini-transcribe',
 			file: createReadStream(tempPath),
 		});
 

diff --git a/src/services/embeddings.ts b/src/services/embeddings.ts
@@ -72,9 +72,9 @@ function getOllamaDimensions(model: string): number {
 }
 
 // Google AI constants
-const GOOGLE_DIMENSIONS = 768;
+const GOOGLE_DIMENSIONS = 3072;
 const GOOGLE_MAX_BATCH_SIZE = 100;
-const GOOGLE_MAX_INPUT_CHARS = 10_000; // ~2500 tokens conservative limit
+const GOOGLE_MAX_INPUT_CHARS = 30_000; // gemini-embedding-2-preview: 8192 token context (~4 chars/token)
 
 // Ollama API response type
 interface OllamaEmbedResponse {

diff --git a/src/utils/config.ts b/src/utils/config.ts
@@ -35,7 +35,7 @@ const envSchema = z.object({
 
 	// Google AI
 	GOOGLE_AI_API_KEY: z.string().optional(),
-	GOOGLE_EMBEDDING_MODEL: z.string().default('text-embedding-004'),
+	GOOGLE_EMBEDDING_MODEL: z.string().default('gemini-embedding-2-preview'),
 
 	// Ollama
 	OLLAMA_BASE_URL: z.string().url().default('http://localhost:11434'),
@@ -67,10 +67,10 @@ const envSchema = z.object({
 	ANTHROPIC_API_KEY: z.string().startsWith('sk-ant-').optional(),
 
 	// Model for memory extraction (fast, cheap model recommended)
-	EXTRACTION_MODEL: z.string().default('claude-haiku-4-5-20250501'),
+	EXTRACTION_MODEL: z.string().default('claude-haiku-4-5-20251001'),
 
 	// Model for insight synthesis (benefits from more capable reasoning)
-	INSIGHT_MODEL: z.string().default('claude-sonnet-4-6-20250514'),
+	INSIGHT_MODEL: z.string().default('claude-sonnet-4-6'),
 
 	// Response format - compact saves 40-60% tokens but uses short keys
 	COMPACT_RESPONSES: z