diff --git a/.gitignore b/.gitignore index 724f7bc..254b1e2 100644 --- a/.gitignore +++ b/.gitignore @@ -42,3 +42,6 @@ next-env.d.ts # Extension config extension/config.js +.env +.env.local +aiarchives.log diff --git a/aiarchives.log b/aiarchives.log new file mode 100644 index 0000000..4f4422a Binary files /dev/null and b/aiarchives.log differ diff --git a/app/api/search/route.ts b/app/api/search/route.ts new file mode 100644 index 0000000..81a96ec --- /dev/null +++ b/app/api/search/route.ts @@ -0,0 +1,86 @@ +import { NextRequest, NextResponse } from 'next/server'; +import { generateEmbedding } from '@/lib/services/embeddings'; +import { searchBySimilarity } from '@/lib/db/embeddings'; +import { dbClient } from '@/lib/db/client'; +import { s3Client } from '@/lib/storage/s3'; +import { loadConfig } from '@/lib/config'; + +let isInitialized = false; + +async function ensureInitialized() { + if (!isInitialized) { + try { + const config = loadConfig(); + await dbClient.initialize(config.database); + s3Client.initialize(config.s3); + isInitialized = true; + } catch (error) { + if (error instanceof Error && error.message.includes('already initialized')) { + isInitialized = true; + } else { + throw error; + } + } + } +} + +/** + * GET /api/search?q=query&limit=10 + * + * Semantic search for conversations + */ +export async function GET(req: NextRequest) { + try { + await ensureInitialized(); + + const { searchParams } = new URL(req.url); + const query = searchParams.get('q'); + const limitParam = searchParams.get('limit'); + + if (!query || query.trim().length === 0) { + return NextResponse.json( + { error: 'Query parameter "q" is required' }, + { status: 400 } + ); + } + + const limit = limitParam ? parseInt(limitParam, 10) : 10; + + // Generate embedding for search query + console.log(`Searching for: "${query}"`); + const queryEmbedding = await generateEmbedding(query); + + // Search for similar conversations + const results = await searchBySimilarity(queryEmbedding, limit); + + console.log(`Found ${results.length} results`); + + return NextResponse.json({ + query, + results, + count: results.length, + }, { + headers: { + 'Access-Control-Allow-Origin': '*', + } + }); + + } catch (error) { + console.error('Search error:', error); + return NextResponse.json( + { error: 'Search failed', details: error instanceof Error ? error.message : 'Unknown error' }, + { status: 500 } + ); + } +} + +export async function OPTIONS() { + return new NextResponse(null, { + status: 204, + headers: { + 'Access-Control-Allow-Origin': '*', + 'Access-Control-Allow-Methods': 'GET, OPTIONS', + 'Access-Control-Allow-Headers': 'Content-Type', + }, + }); +} diff --git a/app/page.tsx b/app/page.tsx index 9619f19..599d391 100644 --- a/app/page.tsx +++ b/app/page.tsx @@ -1,4 +1,4 @@ -import { HelpCircle } from 'lucide-react'; +import { Search } from 'lucide-react'; import { Card, CardContent, CardFooter } from '@/components/ui/card'; import { Avatar, AvatarFallback } from '@/components/ui/avatar'; import { Badge } from '@/components/ui/badge'; @@ -96,10 +96,12 @@ const Home = async () => {
+ +
diff --git a/app/search/page.tsx b/app/search/page.tsx new file mode 100644 index 0000000..547d277 --- /dev/null +++ b/app/search/page.tsx @@ -0,0 +1,70 @@ +'use client'; + +import { useState } from 'react'; +import { SearchBar } from '@/components/SearchBar'; +import { SearchResults } from '@/components/SearchResults'; +import { ConversationRecord } from '@/lib/db/types'; +import { ArrowLeft } from 'lucide-react'; +import Link from 'next/link'; + +export default function SearchPage() { + const [results, setResults] = useState([]); + const [loading, setLoading] = useState(false); + const [query, setQuery] = useState(''); + const [hasSearched, setHasSearched] = useState(false); + + const handleSearch = async (searchQuery: string) => { + setLoading(true); + setQuery(searchQuery); + setHasSearched(true); + + try { + const response = await fetch(`/api/search?q=${encodeURIComponent(searchQuery)}&limit=20`); + const data = await response.json(); + + if (response.ok) { + setResults(data.results || []); + } else { + console.error('Search failed:', data.error); + setResults([]); + } + } catch (error) { + console.error('Search error:', error); + setResults([]); + } finally { + setLoading(false); + } + }; + + return ( +
+
+
+ + + +
+ AI Archives - Search +
+
+
+ +
+
+

+ Semantic Search +

+

+ Find conversations by meaning, not just keywords +

+ + +
+ + {hasSearched && ( + + )} +
+
+ ); +} diff --git a/components/SearchBar.tsx b/components/SearchBar.tsx new file mode 100644 index 0000000..88f3e8d --- /dev/null +++ b/components/SearchBar.tsx @@ -0,0 +1,54 @@ +'use client'; + +import { useState } from 'react'; +import { Search, Loader2 } from 'lucide-react'; +import { Button } from '@/components/ui/button'; + +interface SearchBarProps { + onSearch: (query: string) => void; + loading?: boolean; +} + +export function SearchBar({ onSearch, loading = false }: SearchBarProps) { + const [query, setQuery] = useState(''); + + const handleSubmit = (e: React.FormEvent) => { + e.preventDefault(); + if (query.trim()) { + onSearch(query.trim()); + } + }; + + return ( +
+
+ + setQuery(e.target.value)} + placeholder="Search conversations... (e.g., 'React hooks', 'async await')" + className="w-full pl-12 pr-24 py-3 border border-gray-300 rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500" + disabled={loading} + /> + +
+
+ ); +} diff --git a/components/SearchResults.tsx b/components/SearchResults.tsx new file mode 100644 index 0000000..ae4b372 --- /dev/null +++ b/components/SearchResults.tsx @@ -0,0 +1,81 @@ +'use client'; + +import { Card, CardContent, CardFooter } from '@/components/ui/card'; +import { Avatar, AvatarFallback } from '@/components/ui/avatar'; +import { Badge } from '@/components/ui/badge'; +import Link from 'next/link'; +import { ConversationRecord } from '@/lib/db/types'; + +interface SearchResultsProps { + results: ConversationRecord[]; + query: string; +} + +export function SearchResults({ results, query }: SearchResultsProps) { + if (results.length === 0) { + return ( + + +

+ {'No conversations found for "' + query + '"'} +

+

+ Try different keywords or add more conversations +

+
+
+ ); + } + + return ( +
+

+ {'Found ' + results.length + ' conversation' + (results.length !== 1 ? 's' : '') + ' matching "' + query + '"'} +

+
+ {results.map((conv) => { + const avatar = conv.model.charAt(0).toUpperCase(); + const daysDiff = Math.floor( + (new Date().getTime() - new Date(conv.createdAt).getTime()) / (1000 * 60 * 60 * 24) + ); + + return ( + + + +
+ + + {avatar} + + +
+

+ Anonymous +

+

+ AI Conversation +

+
+
+
+ +
+ + {conv.model} + +
+
+ {conv.views} Views + | + {daysDiff} Days ago +
+
+
+ + ); + })} +
+
+ ); +} diff --git a/lib/db/embeddings.ts b/lib/db/embeddings.ts new file mode 100644 index 0000000..e35bd99 --- /dev/null +++ b/lib/db/embeddings.ts @@ -0,0 +1,98 @@ +import { dbClient } from './client'; +import { ConversationRecord } from './types'; + +/** + * Store embedding for a conversation + */ +export async function storeEmbedding( + conversationId: string, + embedding: number[] +): Promise { + const pool = dbClient.getPool(); + + const query = ` + UPDATE conversations + SET content_embedding = $1::vector, + embedding_generated = TRUE + WHERE id = $2 + `; + + await pool.query(query, [JSON.stringify(embedding), conversationId]); +} + +/** + * Search conversations by semantic similarity + */ +export async function searchBySimilarity( + queryEmbedding: number[], + limit: number = 10 +): Promise { + const pool = dbClient.getPool(); + + const query = ` + SELECT + id, + model, + scraped_at, + content_key, + source_html_bytes, + views, + created_at, + updated_at, + 1 - (content_embedding <=> $1::vector) as similarity + FROM conversations + WHERE embedding_generated = TRUE + ORDER BY content_embedding <=> $1::vector + LIMIT $2 + `; + + const result = await pool.query(query, [JSON.stringify(queryEmbedding), limit]); + + return result.rows.map(row => ({ + id: row.id, + model: row.model, + scrapedAt: row.scraped_at, + contentKey: row.content_key, + sourceHtmlBytes: row.source_html_bytes, + views: row.views, + createdAt: row.created_at, + updatedAt: row.updated_at, + })); +} + +/** + * Get conversations without embeddings + */ +export async function getConversationsWithoutEmbeddings( + limit: number = 50 +): Promise { + const pool = dbClient.getPool(); + + const query = ` + SELECT + id, + model, + scraped_at, + content_key, + source_html_bytes, + views, + created_at, + updated_at + FROM conversations + WHERE embedding_generated = FALSE OR content_embedding IS NULL + LIMIT $1 + `; + + const result = await pool.query(query, [limit]); + + return result.rows.map(row => ({ + id: row.id, + model: row.model, + scrapedAt: row.scraped_at, + contentKey: row.content_key, + sourceHtmlBytes: row.source_html_bytes, + views: row.views, + createdAt: row.created_at, + updatedAt: row.updated_at, + })); +} diff --git a/lib/db/migrations/002_add_embeddings.sql b/lib/db/migrations/002_add_embeddings.sql new file mode 100644 index 0000000..f9a8762 --- /dev/null +++ b/lib/db/migrations/002_add_embeddings.sql @@ -0,0 +1,13 @@ +-- Add vector column for embeddings (1536 dimensions for OpenAI text-embedding-3-small) +ALTER TABLE conversations +ADD COLUMN IF NOT EXISTS content_embedding vector(1536); + +-- Create index for fast similarity search +CREATE INDEX IF NOT EXISTS idx_conversations_embedding +ON conversations +USING ivfflat (content_embedding vector_cosine_ops) +WITH (lists = 100); + +-- Add column to track if embedding is generated +ALTER TABLE conversations +ADD COLUMN IF NOT EXISTS embedding_generated BOOLEAN DEFAULT FALSE; diff --git a/lib/services/embeddings.ts b/lib/services/embeddings.ts new file mode 100644 index 0000000..390684d --- /dev/null +++ b/lib/services/embeddings.ts @@ -0,0 +1,47 @@ +import OpenAI from 'openai'; + +const openai = new OpenAI({ + apiKey: process.env.OPENAI_API_KEY, +}); + +interface ConversationMessage { + role: string; + content: string; +} + +interface ConversationContent { + messages?: ConversationMessage[]; +} + +/** + * Generate embedding for text using OpenAI + */ +export async function generateEmbedding(text: string): Promise { + try { + const response = await openai.embeddings.create({ + model: 'text-embedding-3-small', + input: text, + }); + + return response.data[0].embedding; + } catch (error) { + console.error('Error generating embedding:', error); + throw error; + } +} + +/** + * Prepare conversation text for embedding + */ +export function prepareTextForEmbedding(conversationContent: ConversationContent): string { + // Extract messages from conversation + const messages = conversationContent.messages || []; + + // Combine all messages into searchable text + const text = messages + .map((msg: ConversationMessage) => `${msg.role}: ${msg.content}`) + .join('\n') + .slice(0, 8000); + + return text; +} diff --git a/package-lock.json b/package-lock.json index 5058ea1..2482de2 100644 --- a/package-lock.json +++ b/package-lock.json @@ -15,8 +15,10 @@ "@types/pg": "^8.15.4", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", + "dotenv": "^17.3.1", "lucide-react": "^0.509.0", "next": "15.3.2", + "openai": "^6.22.0", "pg": "^8.16.0", "react": "^19.0.0", "react-dom": "^19.0.0", @@ -32,6 +34,7 @@ "eslint": "^9", "eslint-config-next": "15.3.2", "tailwindcss": "^4", + "tsx": "^4.21.0", "tw-animate-css": "^1.2.9", "typescript": "^5" } @@ -981,6 +984,448 @@ "tslib": "^2.4.0" } }, + "node_modules/@esbuild/aix-ppc64": { + "version": "0.27.3", + "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.27.3.tgz", + "integrity": "sha512-9fJMTNFTWZMh5qwrBItuziu834eOCUcEqymSH7pY+zoMVEZg3gcPuBNxH1EvfVYe9h0x/Ptw8KBzv7qxb7l8dg==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "aix" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/android-arm": { + "version": "0.27.3", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.27.3.tgz", + "integrity": "sha512-i5D1hPY7GIQmXlXhs2w8AWHhenb00+GxjxRncS2ZM7YNVGNfaMxgzSGuO8o8SJzRc/oZwU2bcScvVERk03QhzA==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/android-arm64": { + "version": "0.27.3", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.27.3.tgz", + "integrity": "sha512-YdghPYUmj/FX2SYKJ0OZxf+iaKgMsKHVPF1MAq/P8WirnSpCStzKJFjOjzsW0QQ7oIAiccHdcqjbHmJxRb/dmg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/android-x64": { + "version": "0.27.3", + "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.27.3.tgz", + "integrity": "sha512-IN/0BNTkHtk8lkOM8JWAYFg4ORxBkZQf9zXiEOfERX/CzxW3Vg1ewAhU7QSWQpVIzTW+b8Xy+lGzdYXV6UZObQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/darwin-arm64": { + "version": "0.27.3", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.27.3.tgz", + "integrity": "sha512-Re491k7ByTVRy0t3EKWajdLIr0gz2kKKfzafkth4Q8A5n1xTHrkqZgLLjFEHVD+AXdUGgQMq+Godfq45mGpCKg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/darwin-x64": { + "version": "0.27.3", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.27.3.tgz", + "integrity": "sha512-vHk/hA7/1AckjGzRqi6wbo+jaShzRowYip6rt6q7VYEDX4LEy1pZfDpdxCBnGtl+A5zq8iXDcyuxwtv3hNtHFg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/freebsd-arm64": { + "version": "0.27.3", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.27.3.tgz", + "integrity": "sha512-ipTYM2fjt3kQAYOvo6vcxJx3nBYAzPjgTCk7QEgZG8AUO3ydUhvelmhrbOheMnGOlaSFUoHXB6un+A7q4ygY9w==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/freebsd-x64": { + "version": "0.27.3", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.27.3.tgz", + "integrity": "sha512-dDk0X87T7mI6U3K9VjWtHOXqwAMJBNN2r7bejDsc+j03SEjtD9HrOl8gVFByeM0aJksoUuUVU9TBaZa2rgj0oA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-arm": { + "version": "0.27.3", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.27.3.tgz", + "integrity": "sha512-s6nPv2QkSupJwLYyfS+gwdirm0ukyTFNl3KTgZEAiJDd+iHZcbTPPcWCcRYH+WlNbwChgH2QkE9NSlNrMT8Gfw==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-arm64": { + "version": "0.27.3", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.27.3.tgz", + "integrity": "sha512-sZOuFz/xWnZ4KH3YfFrKCf1WyPZHakVzTiqji3WDc0BCl2kBwiJLCXpzLzUBLgmp4veFZdvN5ChW4Eq/8Fc2Fg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-ia32": { + "version": "0.27.3", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.27.3.tgz", + "integrity": "sha512-yGlQYjdxtLdh0a3jHjuwOrxQjOZYD/C9PfdbgJJF3TIZWnm/tMd/RcNiLngiu4iwcBAOezdnSLAwQDPqTmtTYg==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-loong64": { + "version": "0.27.3", + "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.27.3.tgz", + "integrity": "sha512-WO60Sn8ly3gtzhyjATDgieJNet/KqsDlX5nRC5Y3oTFcS1l0KWba+SEa9Ja1GfDqSF1z6hif/SkpQJbL63cgOA==", + "cpu": [ + "loong64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-mips64el": { + "version": "0.27.3", + "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.27.3.tgz", + "integrity": "sha512-APsymYA6sGcZ4pD6k+UxbDjOFSvPWyZhjaiPyl/f79xKxwTnrn5QUnXR5prvetuaSMsb4jgeHewIDCIWljrSxw==", + "cpu": [ + "mips64el" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-ppc64": { + "version": "0.27.3", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.27.3.tgz", + "integrity": "sha512-eizBnTeBefojtDb9nSh4vvVQ3V9Qf9Df01PfawPcRzJH4gFSgrObw+LveUyDoKU3kxi5+9RJTCWlj4FjYXVPEA==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-riscv64": { + "version": "0.27.3", + "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.27.3.tgz", + "integrity": "sha512-3Emwh0r5wmfm3ssTWRQSyVhbOHvqegUDRd0WhmXKX2mkHJe1SFCMJhagUleMq+Uci34wLSipf8Lagt4LlpRFWQ==", + "cpu": [ + "riscv64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-s390x": { + "version": "0.27.3", + "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.27.3.tgz", + "integrity": "sha512-pBHUx9LzXWBc7MFIEEL0yD/ZVtNgLytvx60gES28GcWMqil8ElCYR4kvbV2BDqsHOvVDRrOxGySBM9Fcv744hw==", + "cpu": [ + "s390x" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-x64": { + "version": "0.27.3", + "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.27.3.tgz", + "integrity": "sha512-Czi8yzXUWIQYAtL/2y6vogER8pvcsOsk5cpwL4Gk5nJqH5UZiVByIY8Eorm5R13gq+DQKYg0+JyQoytLQas4dA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/netbsd-arm64": { + "version": "0.27.3", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.27.3.tgz", + "integrity": "sha512-sDpk0RgmTCR/5HguIZa9n9u+HVKf40fbEUt+iTzSnCaGvY9kFP0YKBWZtJaraonFnqef5SlJ8/TiPAxzyS+UoA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/netbsd-x64": { + "version": "0.27.3", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.27.3.tgz", + "integrity": "sha512-P14lFKJl/DdaE00LItAukUdZO5iqNH7+PjoBm+fLQjtxfcfFE20Xf5CrLsmZdq5LFFZzb5JMZ9grUwvtVYzjiA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/openbsd-arm64": { + "version": "0.27.3", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.27.3.tgz", + "integrity": "sha512-AIcMP77AvirGbRl/UZFTq5hjXK+2wC7qFRGoHSDrZ5v5b8DK/GYpXW3CPRL53NkvDqb9D+alBiC/dV0Fb7eJcw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/openbsd-x64": { + "version": "0.27.3", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.27.3.tgz", + "integrity": "sha512-DnW2sRrBzA+YnE70LKqnM3P+z8vehfJWHXECbwBmH/CU51z6FiqTQTHFenPlHmo3a8UgpLyH3PT+87OViOh1AQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/openharmony-arm64": { + "version": "0.27.3", + "resolved": "https://registry.npmjs.org/@esbuild/openharmony-arm64/-/openharmony-arm64-0.27.3.tgz", + "integrity": "sha512-NinAEgr/etERPTsZJ7aEZQvvg/A6IsZG/LgZy+81wON2huV7SrK3e63dU0XhyZP4RKGyTm7aOgmQk0bGp0fy2g==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openharmony" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/sunos-x64": { + "version": "0.27.3", + "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.27.3.tgz", + "integrity": "sha512-PanZ+nEz+eWoBJ8/f8HKxTTD172SKwdXebZ0ndd953gt1HRBbhMsaNqjTyYLGLPdoWHy4zLU7bDVJztF5f3BHA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "sunos" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/win32-arm64": { + "version": "0.27.3", + "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.27.3.tgz", + "integrity": "sha512-B2t59lWWYrbRDw/tjiWOuzSsFh1Y/E95ofKz7rIVYSQkUYBjfSgf6oeYPNWHToFRr2zx52JKApIcAS/D5TUBnA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/win32-ia32": { + "version": "0.27.3", + "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.27.3.tgz", + "integrity": "sha512-QLKSFeXNS8+tHW7tZpMtjlNb7HKau0QDpwm49u0vUp9y1WOF+PEzkU84y9GqYaAVW8aH8f3GcBck26jh54cX4Q==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/win32-x64": { + "version": "0.27.3", + "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.27.3.tgz", + "integrity": "sha512-4uJGhsxuptu3OcpVAzli+/gWusVGwZZHTlS63hh++ehExkVT8SgiEf7/uC/PclrPPkLhZqGgCTjd0VWLo6xMqA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, "node_modules/@eslint-community/eslint-utils": { "version": "4.7.0", "resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.7.0.tgz", @@ -1650,6 +2095,16 @@ "node": ">=18" } }, + "node_modules/@modelcontextprotocol/sdk/node_modules/zod": { + "version": "3.25.76", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz", + "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==", + "dev": true, + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + }, "node_modules/@napi-rs/wasm-runtime": { "version": "0.2.9", "resolved": "https://registry.npmjs.org/@napi-rs/wasm-runtime/-/wasm-runtime-0.2.9.tgz", @@ -2048,6 +2503,16 @@ "url": "https://github.com/chalk/chalk?sponsor=1" } }, + "node_modules/@shadcn/ui/node_modules/zod": { + "version": "3.25.76", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz", + "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==", + "dev": true, + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + }, "node_modules/@smithy/abort-controller": { "version": "4.0.4", "resolved": "https://registry.npmjs.org/@smithy/abort-controller/-/abort-controller-4.0.4.tgz", @@ -4547,6 +5012,18 @@ "node": ">=0.10.0" } }, + "node_modules/dotenv": { + "version": "17.3.1", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-17.3.1.tgz", + "integrity": "sha512-IO8C/dzEb6O3F9/twg6ZLXz164a2fhTnEWb95H23Dm4OuN+92NmEAlTrupP9VW6Jm3sO26tQlqyvyi4CsnY9GA==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://dotenvx.com" + } + }, "node_modules/dunder-proto": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", @@ -4774,6 +5251,48 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/esbuild": { + "version": "0.27.3", + "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.27.3.tgz", + "integrity": "sha512-8VwMnyGCONIs6cWue2IdpHxHnAjzxnw2Zr7MkVxB2vjmQ2ivqGFb4LEG3SMnv0Gb2F/G/2yA8zUaiL1gywDCCg==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "bin": { + "esbuild": "bin/esbuild" + }, + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "@esbuild/aix-ppc64": "0.27.3", + "@esbuild/android-arm": "0.27.3", + "@esbuild/android-arm64": "0.27.3", + "@esbuild/android-x64": "0.27.3", + "@esbuild/darwin-arm64": "0.27.3", + "@esbuild/darwin-x64": "0.27.3", + "@esbuild/freebsd-arm64": "0.27.3", + "@esbuild/freebsd-x64": "0.27.3", + "@esbuild/linux-arm": "0.27.3", + "@esbuild/linux-arm64": "0.27.3", + "@esbuild/linux-ia32": "0.27.3", + "@esbuild/linux-loong64": "0.27.3", + "@esbuild/linux-mips64el": "0.27.3", + "@esbuild/linux-ppc64": "0.27.3", + "@esbuild/linux-riscv64": "0.27.3", + "@esbuild/linux-s390x": "0.27.3", + "@esbuild/linux-x64": "0.27.3", + "@esbuild/netbsd-arm64": "0.27.3", + "@esbuild/netbsd-x64": "0.27.3", + "@esbuild/openbsd-arm64": "0.27.3", + "@esbuild/openbsd-x64": "0.27.3", + "@esbuild/openharmony-arm64": "0.27.3", + "@esbuild/sunos-x64": "0.27.3", + "@esbuild/win32-arm64": "0.27.3", + "@esbuild/win32-ia32": "0.27.3", + "@esbuild/win32-x64": "0.27.3" + } + }, "node_modules/escape-html": { "version": "1.0.3", "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz", @@ -5158,6 +5677,16 @@ "url": "https://opencollective.com/eslint" } }, + "node_modules/eslint/node_modules/zod": { + "version": "3.25.76", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz", + "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==", + "dev": true, + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + }, "node_modules/espree": { "version": "10.3.0", "resolved": "https://registry.npmjs.org/espree/-/espree-10.3.0.tgz", @@ -5591,6 +6120,21 @@ "node": ">=14.14" } }, + "node_modules/fsevents": { + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", + "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, "node_modules/function-bind": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", @@ -7486,6 +8030,27 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/openai": { + "version": "6.22.0", + "resolved": "https://registry.npmjs.org/openai/-/openai-6.22.0.tgz", + "integrity": "sha512-7Yvy17F33Bi9RutWbsaYt5hJEEJ/krRPOrwan+f9aCPuMat1WVsb2VNSII5W1EksKT6fF69TG/xj4XzodK3JZw==", + "license": "Apache-2.0", + "bin": { + "openai": "bin/cli" + }, + "peerDependencies": { + "ws": "^8.18.0", + "zod": "^3.25 || ^4.0" + }, + "peerDependenciesMeta": { + "ws": { + "optional": true + }, + "zod": { + "optional": true + } + } + }, "node_modules/optionator": { "version": "0.9.4", "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz", @@ -8994,6 +9559,26 @@ "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", "license": "0BSD" }, + "node_modules/tsx": { + "version": "4.21.0", + "resolved": "https://registry.npmjs.org/tsx/-/tsx-4.21.0.tgz", + "integrity": "sha512-5C1sg4USs1lfG0GFb2RLXsdpXqBSEhAaA/0kPL01wxzpMqLILNxIxIOKiILz+cdg/pLnOUxFYOR5yhHU666wbw==", + "dev": true, + "license": "MIT", + "dependencies": { + "esbuild": "~0.27.0", + "get-tsconfig": "^4.7.5" + }, + "bin": { + "tsx": "dist/cli.mjs" + }, + "engines": { + "node": ">=18.0.0" + }, + "optionalDependencies": { + "fsevents": "~2.3.3" + } + }, "node_modules/tw-animate-css": { "version": "1.2.9", "resolved": "https://registry.npmjs.org/tw-animate-css/-/tw-animate-css-1.2.9.tgz", @@ -9426,23 +10011,24 @@ } }, "node_modules/zod": { - "version": "3.24.4", - "resolved": "https://registry.npmjs.org/zod/-/zod-3.24.4.tgz", - "integrity": "sha512-OdqJE9UDRPwWsrHjLN2F8bPxvwJBK22EHLWtanu0LSYr5YqzsaaW3RMgmjwr8Rypg5k+meEJdSPXJZXE/yqOMg==", - "dev": true, + "version": "4.3.6", + "resolved": "https://registry.npmjs.org/zod/-/zod-4.3.6.tgz", + "integrity": "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==", + "devOptional": true, "license": "MIT", + "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } }, "node_modules/zod-to-json-schema": { - "version": "3.24.5", - "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.24.5.tgz", - "integrity": "sha512-/AuWwMP+YqiPbsJx5D6TfgRTc4kTLjsh5SOcd4bLsfUg2RcEXrFMJl1DGgdHy2aCfsIA/cr/1JM0xcB2GZji8g==", + "version": "3.25.1", + "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.25.1.tgz", + "integrity": "sha512-pM/SU9d3YAggzi6MtR4h7ruuQlqKtad8e9S0fmxcMi+ueAK5Korys/aWcV9LIIHTVbj01NdzxcnXSN+O74ZIVA==", "dev": true, "license": "ISC", "peerDependencies": { - "zod": "^3.24.1" + "zod": "^3.25 || ^4" } } } diff --git a/package.json b/package.json index bd99d85..df3d39b 100644 --- a/package.json +++ b/package.json @@ -6,7 +6,9 @@ "dev": "next dev --turbopack", "build": "next build", "start": "next start", - "lint": "next lint" + "lint": "next lint", + "generate-embeddings": "tsx scripts/generate-embeddings.ts", + "add-test-data": "tsx scripts/add-test-data.ts" }, "dependencies": { "@aws-sdk/client-s3": "^3.821.0", @@ -16,8 +18,10 @@ "@types/pg": "^8.15.4", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", + "dotenv": "^17.3.1", "lucide-react": "^0.509.0", "next": "15.3.2", + "openai": "^6.22.0", "pg": "^8.16.0", "react": "^19.0.0", "react-dom": "^19.0.0", @@ -33,6 +37,7 @@ "eslint": "^9", "eslint-config-next": "15.3.2", "tailwindcss": "^4", + "tsx": "^4.21.0", "tw-animate-css": "^1.2.9", "typescript": "^5" } diff --git a/scripts/add-test-data.ts b/scripts/add-test-data.ts new file mode 100644 index 0000000..b2206b1 --- /dev/null +++ b/scripts/add-test-data.ts @@ -0,0 +1,72 @@ +import 'dotenv/config'; +import { dbClient } from '../lib/db/client'; +import { s3Client } from '../lib/storage/s3'; +import { loadConfig } from '../lib/config'; +import { createConversationRecord } from '../lib/db/conversations'; +import { randomUUID } from 'crypto'; + +const testConversations = [ + { + model: 'ChatGPT', + content: { + messages: [ + { role: 'user', content: 'How do I use React hooks like useState and useEffect?' }, + { role: 'assistant', content: 'React hooks are functions that let you use state and lifecycle features in functional components. useState manages state, useEffect handles side effects.' } + ] + } + }, + { + model: 'Claude', + content: { + messages: [ + { role: 'user', content: 'Explain async/await in JavaScript' }, + { role: 'assistant', content: 'Async/await is syntactic sugar for Promises in JavaScript. It makes asynchronous code look synchronous and easier to read.' } + ] + } + }, + { + model: 'ChatGPT', + content: { + messages: [ + { role: 'user', content: 'Best practices for API design in Node.js' }, + { role: 'assistant', content: 'Here are key API design principles: RESTful conventions, proper error handling, versioning, authentication.' } + ] + } + } +]; + +async function main() { + console.log('Adding test conversations...'); + + const config = loadConfig(); + await dbClient.initialize(config.database); + s3Client.initialize(config.s3); + + for (const conv of testConversations) { + const conversationId = randomUUID(); + + // Convert content to JSON string for S3 + const contentString = JSON.stringify(conv.content); + + const contentKey = await s3Client.storeConversation(conversationId, contentString); + + await createConversationRecord({ + model: conv.model, + scrapedAt: new Date(), + sourceHtmlBytes: contentString.length, + views: 0, + contentKey, + }); + + console.log(`✓ Added ${conv.model} conversation`); + } + + console.log('\nTest data added ccessfully!'); + await dbClient.close(); + process.exit(0); +} + +main().catch(error => { + console.error('Error:', error); + process.exit(1); +}); diff --git a/scripts/generate-embeddings.ts b/scripts/generate-embeddings.ts new file mode 100644 index 0000000..eb49488 --- /dev/null +++ b/scripts/generate-embeddings.ts @@ -0,0 +1,63 @@ +import 'dotenv/config'; +import { dbClient } from '../lib/db/client'; +import { s3Client } from '../lib/storage/s3'; +import { loadConfig } from '../lib/config'; +import { getConversationsWithoutEmbeddings, storeEmbedding } from '../lib/db/embeddings'; +import { generateEmbedding, prepareTextForEmbedding } from '../lib/services/embeddings'; + +async function main() { + console.log('Starting embedding generation...'); + + const config = loadConfig(); + await dbClient.initialize(config.database); + s3Client.initialize(config.s3); + + let processed = 0; + let errors = 0; + + while (true) { + const conversations = await getConversationsWithoutEmbeddings(10); + + if (conversations.length === 0) { + console.log('All conversations processed!'); + break; + } + + console.log(`Processing ${conversations.length} conversations...`); + + for (const conv of conversations) { + try { + const contentString = await s3Client.getConversationContent(conv.contentKey); + const content = JSON.parse(contentString); + + const text = prepareTextForEmbedding(content); + + if (!text || text.trim().length === 0) { + console.log(`⚠ Skipping ${conv.id} - no text content`); + continue; + } + + const embedding = await generateEmbedding(text); + await storeEmbedding(conv.id, embedding); + + processed++; + console.log(`✓ Processed conversation ${conv.id} (${processed} total)`); + + await new Promise(resolve => setTimeout(resolve, 200)); + + } catch (error) { + console.error(`✗ Error processing ${conv.id}:`, error); + errors++; + } + } + } + + console.log(`\nComplete! Processed: ${processed}, Errors: ${errors}`); + await dbClient.close(); + process.exit(0); +} + +main().catch(error => { + console.error('Fatal error:', error); + process.exit(1); +});