diff --git a/demo/README.md b/demo/README.md new file mode 100644 index 0000000..79e2958 --- /dev/null +++ b/demo/README.md @@ -0,0 +1,18 @@ +# VortexDB Demos + +Example applications demonstrating VortexDB capabilities. + +## Available Demos + +### [clip-image-search](./clip-image-search/) + +Real-time image search using natural language. Stack: CLIP + React + FastAPI + +## Running a Demo + +Each demo is self-contained with its own `docker-compose.yml`: + +```bash +cd demo/ +sudo docker compose up +``` diff --git a/demo/clip-image-search/.gitignore b/demo/clip-image-search/.gitignore new file mode 100644 index 0000000..5f8ecbd --- /dev/null +++ b/demo/clip-image-search/.gitignore @@ -0,0 +1,34 @@ +# Dependencies +node_modules/ +__pycache__/ +*.pyc +.venv/ +venv/ + +# Build outputs +dist/ +build/ + +# Images (downloaded separately) +backend/images/*.jpg +backend/images/*.jpeg +backend/images/*.png +backend/images/*.webp +backend/images/*.gif + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# Logs +*.log + +# Environment +.env +.env.local + +# OS +.DS_Store +Thumbs.db diff --git a/demo/clip-image-search/README.md b/demo/clip-image-search/README.md new file mode 100644 index 0000000..30ed89f --- /dev/null +++ b/demo/clip-image-search/README.md @@ -0,0 +1,50 @@ +# CLIP Image Search Demo + +A simple image search demo powered by **VortexDB** using CLIP embedding model. +Search images using natural language descriptions and see results update as you type. + + +## Quick Start + + +```bash +# From the demo directory +cd demo/clip-image-search + +# Also downloads 100 demo images +sudo docker compose up +``` +The setup container will: +1. Wait for CLIP and VortexDB to be ready +2. Download 100 sample images from Lorem Picsum +3. Vectorize and index them into VortexDB +4. Exit when complete + +To change the number of images, edit `docker-compose.yml` and set `IMAGE_COUNT=50` (or any number). + +### Manual Setup (Optional) + +If you prefer to run setup separately: + +```bash +# Start services only +sudo docker compose up -d clip-vectorizer vortexdb backend frontend + +# Run setup manually with custom count +./setup.sh --count 50 +``` + +## Adding Your Own Images + +### Via the Backend API + +```bash +# Index a single image +curl -X POST http://localhost:3001/index \ + -F "file=@/path/to/your/image.jpg" +``` + +### Via the Setup Script + +1. Copy images to `demo/clip-image-search/backend/images/` +2. Run `./setup.sh` (will index existing images without downloading new ones) diff --git a/demo/clip-image-search/backend/Dockerfile b/demo/clip-image-search/backend/Dockerfile new file mode 100644 index 0000000..2bae21c --- /dev/null +++ b/demo/clip-image-search/backend/Dockerfile @@ -0,0 +1,22 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Install curl for healthcheck +RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/* + +COPY demo/clip-image-search/backend/requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy and install VortexDB Python client +COPY client/python /tmp/vortexdb-client +RUN pip install --no-cache-dir /tmp/vortexdb-client && rm -rf /tmp/vortexdb-client + +COPY demo/clip-image-search/backend/main.py . + +# Create images directory +RUN mkdir -p /app/images + +EXPOSE 3001 + +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "3001"] diff --git a/demo/clip-image-search/backend/images/.gitkeep b/demo/clip-image-search/backend/images/.gitkeep new file mode 100644 index 0000000..a28aaba --- /dev/null +++ b/demo/clip-image-search/backend/images/.gitkeep @@ -0,0 +1,2 @@ +# Placeholder for sample images +# Run `./setup.sh` from the demo root to download sample images. diff --git a/demo/clip-image-search/backend/main.py b/demo/clip-image-search/backend/main.py new file mode 100644 index 0000000..159ec09 --- /dev/null +++ b/demo/clip-image-search/backend/main.py @@ -0,0 +1,319 @@ +import os +import time +import httpx +from fastapi import FastAPI, HTTPException, UploadFile, File +from fastapi.middleware.cors import CORSMiddleware +from fastapi.staticfiles import StaticFiles +from pydantic import BaseModel +from typing import Optional +import asyncio +from concurrent.futures import ThreadPoolExecutor + +from vortexdb import VortexDB, DenseVector, Payload, Similarity + +app = FastAPI(title="CLIP Image Search API") + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +CLIP_VECTORIZER_URL = os.getenv("CLIP_VECTORIZER_URL", "http://localhost:5000") +VORTEXDB_GRPC_URL = os.getenv("VORTEXDB_GRPC_URL", "localhost:50051") +VORTEXDB_API_KEY = os.getenv("VORTEXDB_API_KEY", "") +IMAGES_DIR = os.getenv("IMAGES_DIR", "./images") + +executor = ThreadPoolExecutor(max_workers=10) + + +def get_vortexdb_client() -> VortexDB: + """Create a new VortexDB client instance""" + return VortexDB( + grpc_url=VORTEXDB_GRPC_URL, + api_key=VORTEXDB_API_KEY if VORTEXDB_API_KEY else None, + ) + +os.makedirs(IMAGES_DIR, exist_ok=True) +app.mount("/images", StaticFiles(directory=IMAGES_DIR), name="images") + + +class SearchRequest(BaseModel): + query: str + limit: Optional[int] = 20 + + +class SearchResult(BaseModel): + image_url: str + point_id: str + score: Optional[float] = None + + +class SearchResponse(BaseModel): + results: list[SearchResult] + query: str + vectorize_time_ms: float + search_time_ms: float + total_time_ms: float + + +class IndexRequest(BaseModel): + image_path: str + + +class IndexResponse(BaseModel): + point_id: str + vectorize_time_ms: float + insert_time_ms: float + + +class StatsResponse(BaseModel): + total_images: int + clip_vectorizer_status: str + vortexdb_status: str + + +@app.get("/health") +async def health_check(): + return {"status": "ok"} + + +@app.get("/stats", response_model=StatsResponse) +async def get_stats(): + """Get system statistics and service health""" + clip_status = "unknown" + vortex_status = "unknown" + + async with httpx.AsyncClient(timeout=5.0) as client: + try: + resp = await client.get(f"{CLIP_VECTORIZER_URL}/docs") + clip_status = "online" if resp.status_code == 200 else "error" + except Exception: + clip_status = "offline" + + # Check VortexDB via gRPC client + def check_vortexdb(): + try: + db = get_vortexdb_client() + db.close() + return "online" + except Exception: + return "offline" + + loop = asyncio.get_event_loop() + vortex_status = await loop.run_in_executor(executor, check_vortexdb) + + # Count images in directory + image_count = 0 + if os.path.exists(IMAGES_DIR): + image_count = len([f for f in os.listdir(IMAGES_DIR) + if f.lower().endswith(('.png', '.jpg', '.jpeg', '.webp', '.gif'))]) + + return StatsResponse( + total_images=image_count, + clip_vectorizer_status=clip_status, + vortexdb_status=vortex_status + ) + + +@app.post("/search", response_model=SearchResponse) +async def search_images(request: SearchRequest): + """ + Search for images similar to the text query. + Uses CLIP to vectorize text and VortexDB to find similar image vectors. + """ + total_start = time.perf_counter() + + # Vectorize the text query using CLIP + vectorize_start = time.perf_counter() + async with httpx.AsyncClient(timeout=30.0) as client: + try: + clip_response = await client.post( + f"{CLIP_VECTORIZER_URL}/vectors", + json={"text": request.query} + ) + clip_response.raise_for_status() + vector = clip_response.json()["result"] + except httpx.HTTPError as e: + raise HTTPException(status_code=503, detail=f"CLIP vectorizer error: {str(e)}") + + vectorize_time = (time.perf_counter() - vectorize_start) * 1000 + + # Search VortexDB for similar vectors using the Python client + def do_search(): + db = get_vortexdb_client() + try: + point_ids = db.search( + vector=DenseVector(vector), + similarity=Similarity.COSINE, + limit=request.limit, + ) + return point_ids + finally: + db.close() + + search_start = time.perf_counter() + loop = asyncio.get_event_loop() + try: + point_ids = await loop.run_in_executor(executor, do_search) + except Exception as e: + raise HTTPException(status_code=503, detail=f"VortexDB error: {str(e)}") + + search_time = (time.perf_counter() - search_start) * 1000 + + # Get point details to retrieve image paths + def get_points_details(point_ids): + results = [] + db = get_vortexdb_client() + try: + for point_id in point_ids: + try: + point = db.get(point_id=point_id) + if point and point.payload: + image_path = point.payload.content + if image_path: + filename = os.path.basename(image_path) + results.append(SearchResult( + image_url=f"/images/{filename}", + point_id=str(point_id) + )) + except Exception: + continue + return results + finally: + db.close() + + results = await loop.run_in_executor(executor, get_points_details, point_ids) + + total_time = (time.perf_counter() - total_start) * 1000 + + return SearchResponse( + results=results, + query=request.query, + vectorize_time_ms=round(vectorize_time, 2), + search_time_ms=round(search_time, 2), + total_time_ms=round(total_time, 2) + ) + + +@app.post("/index", response_model=IndexResponse) +async def index_image(file: UploadFile = File(...)): + """ + Index a new image: vectorize with CLIP and store in VortexDB + """ + # Save the uploaded file + filename = file.filename or f"image_{int(time.time())}.jpg" + file_path = os.path.join(IMAGES_DIR, filename) + + content = await file.read() + with open(file_path, "wb") as f: + f.write(content) + + # Vectorize the image + vectorize_start = time.perf_counter() + async with httpx.AsyncClient(timeout=60.0) as client: + try: + files = {"file": (filename, content, file.content_type or "image/jpeg")} + clip_response = await client.post( + f"{CLIP_VECTORIZER_URL}/vectors_img", + files=files + ) + clip_response.raise_for_status() + vector = clip_response.json()["result"] + except httpx.HTTPError as e: + raise HTTPException(status_code=503, detail=f"CLIP vectorizer error: {str(e)}") + + vectorize_time = (time.perf_counter() - vectorize_start) * 1000 + + # Insert into VortexDB using the Python client + def do_insert(): + db = get_vortexdb_client() + try: + point_id = db.insert( + vector=DenseVector(vector), + payload=Payload.image(file_path), + ) + return point_id + finally: + db.close() + + insert_start = time.perf_counter() + loop = asyncio.get_event_loop() + try: + point_id = await loop.run_in_executor(executor, do_insert) + except Exception as e: + raise HTTPException(status_code=503, detail=f"VortexDB error: {str(e)}") + + insert_time = (time.perf_counter() - insert_start) * 1000 + + return IndexResponse( + point_id=point_id, + vectorize_time_ms=round(vectorize_time, 2), + insert_time_ms=round(insert_time, 2) + ) + + +@app.post("/index-batch") +async def index_batch(): + """ + Index all images in the images directory + """ + results = [] + errors = [] + + if not os.path.exists(IMAGES_DIR): + return {"indexed": 0, "errors": ["Images directory not found"]} + + image_files = [f for f in os.listdir(IMAGES_DIR) + if f.lower().endswith(('.png', '.jpg', '.jpeg', '.webp', '.gif'))] + + loop = asyncio.get_event_loop() + + async with httpx.AsyncClient(timeout=120.0) as client: + for filename in image_files: + file_path = os.path.join(IMAGES_DIR, filename) + try: + with open(file_path, "rb") as f: + content = f.read() + + # Vectorize + files = {"file": (filename, content, "image/jpeg")} + clip_response = await client.post( + f"{CLIP_VECTORIZER_URL}/vectors_img", + files=files + ) + clip_response.raise_for_status() + vector = clip_response.json()["result"] + + # Insert using the Python client + def do_insert(vector, file_path): + db = get_vortexdb_client() + try: + point_id = db.insert( + vector=DenseVector(vector), + payload=Payload.image(file_path), + ) + return point_id + finally: + db.close() + + point_id = await loop.run_in_executor( + executor, do_insert, vector, file_path + ) + results.append({"filename": filename, "point_id": point_id}) + + except Exception as e: + errors.append({"filename": filename, "error": str(e)}) + + return { + "indexed": len(results), + "results": results, + "errors": errors + } + + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=3001) diff --git a/demo/clip-image-search/backend/requirements.txt b/demo/clip-image-search/backend/requirements.txt new file mode 100644 index 0000000..51fdb94 --- /dev/null +++ b/demo/clip-image-search/backend/requirements.txt @@ -0,0 +1,8 @@ +fastapi>=0.104.0 +uvicorn[standard]>=0.24.0 +httpx>=0.25.0 +python-multipart>=0.0.6 +pydantic>=2.0.0 +grpcio>=1.60.0 +grpcio-tools>=1.60.0 +protobuf>=4.25.0 diff --git a/demo/clip-image-search/clip-vectorizer/Dockerfile b/demo/clip-image-search/clip-vectorizer/Dockerfile new file mode 100644 index 0000000..fc04eb1 --- /dev/null +++ b/demo/clip-image-search/clip-vectorizer/Dockerfile @@ -0,0 +1,24 @@ +FROM python:3.11-slim + +WORKDIR /src + +# Install system dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# Copy requirements first for caching +COPY requirements.txt /src/ + +# Install PyTorch CPU version (works everywhere, simpler for demo) +RUN pip3 install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu + +# Install other requirements +RUN pip3 install --no-cache-dir -r requirements.txt + +# Copy application code +COPY app/ /src/app/ + +EXPOSE 8080 + +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8080"] diff --git a/demo/clip-image-search/clip-vectorizer/app/main.py b/demo/clip-image-search/clip-vectorizer/app/main.py new file mode 100644 index 0000000..338a735 --- /dev/null +++ b/demo/clip-image-search/clip-vectorizer/app/main.py @@ -0,0 +1,77 @@ +from io import BytesIO + +from PIL import Image +from fastapi import FastAPI, File, UploadFile +from pydantic import BaseModel +import torch +from torch.nn.functional import normalize +from transformers import CLIPProcessor, CLIPModel + +MODEL_ID = "openai/clip-vit-base-patch32" + +device = "cuda" if torch.cuda.is_available() else "cpu" +print(f"Using device: {device}") + +if device == "cuda": + model = CLIPModel.from_pretrained(MODEL_ID, torch_dtype=torch.float16) +else: + model = CLIPModel.from_pretrained(MODEL_ID) + +processor = CLIPProcessor.from_pretrained(MODEL_ID, clean_up_tokenization_spaces=True) +model.to(device) +model.eval() + +app = FastAPI() + + +class TextInput(BaseModel): + text: str + + +@app.post("/vectors") +async def generate_text_embedding(text_input: TextInput): + label_tokens = processor( + text=text_input.text, + padding=True, + images=None, + return_tensors='pt' + ).to(device) + + with torch.no_grad(): + label_embeddings = model.get_text_features(**label_tokens) + + if hasattr(label_embeddings, 'pooler_output'): + label_embeddings = label_embeddings.pooler_output + elif hasattr(label_embeddings, 'last_hidden_state'): + label_embeddings = label_embeddings.last_hidden_state[:, 0, :] + + label_embeddings = normalize(label_embeddings, p=2, dim=1) + label_embeddings = label_embeddings.detach().cpu().tolist() + return {"result": label_embeddings[0]} + + +@app.post("/vectors_img") +async def generate_image_embedding(file: UploadFile = File(...)): + file_bytes = await file.read() + image_stream = BytesIO(file_bytes) + img = Image.open(image_stream).convert("RGB") + + image = processor( + text=None, + images=img, + return_tensors='pt' + ).to(device)['pixel_values'] + + with torch.no_grad(): + image_embeddings = model.get_image_features(image) + + if hasattr(image_embeddings, 'pooler_output'): + image_embeddings = image_embeddings.pooler_output + elif hasattr(image_embeddings, 'last_hidden_state'): + image_embeddings = image_embeddings.last_hidden_state[:, 0, :] + + image_embeddings = normalize(image_embeddings, p=2, dim=1) + image_embeddings = image_embeddings.detach().cpu().tolist() + return {"result": image_embeddings[0]} + + \ No newline at end of file diff --git a/demo/clip-image-search/clip-vectorizer/requirements.txt b/demo/clip-image-search/clip-vectorizer/requirements.txt new file mode 100644 index 0000000..4fc0623 --- /dev/null +++ b/demo/clip-image-search/clip-vectorizer/requirements.txt @@ -0,0 +1,8 @@ +# CLIP Vectorizer dependencies (torch installed separately in Dockerfile) +fastapi>=0.112.0 +uvicorn>=0.30.0 +transformers==4.44.0 +pillow>=10.0.0 +python-multipart>=0.0.9 +huggingface-hub>=0.24.0 +safetensors>=0.4.0 diff --git a/demo/clip-image-search/docker-compose.yml b/demo/clip-image-search/docker-compose.yml new file mode 100644 index 0000000..15266ef --- /dev/null +++ b/demo/clip-image-search/docker-compose.yml @@ -0,0 +1,74 @@ +services: + clip-vectorizer: + build: + context: ./clip-vectorizer + dockerfile: Dockerfile + ports: + - "5000:8080" + restart: unless-stopped + + vortexdb: + build: + context: ../.. + dockerfile: Dockerfile + ports: + - "8081:3000" + - "50051:50051" + environment: + - RUST_LOG=info + - HTTP_PORT=3000 + - GRPC_PORT=50051 + - STORAGE_TYPE=inmemory + - INDEX_TYPE=hnsw + - GRPC_ROOT_PASSWORD=demo_password_123 + - DIMENSION=512 + volumes: + - vortexdb-data:/data + restart: unless-stopped + + backend: + build: + context: ../.. + dockerfile: demo/clip-image-search/backend/Dockerfile + ports: + - "3001:3001" + environment: + - CLIP_VECTORIZER_URL=http://clip-vectorizer:8080 + - VORTEXDB_GRPC_URL=vortexdb:50051 + - VORTEXDB_API_KEY=demo_password_123 + - IMAGES_DIR=/app/images + volumes: + - ./backend/images:/app/images + depends_on: + - clip-vectorizer + - vortexdb + restart: unless-stopped + + frontend: + build: + context: ./frontend + dockerfile: Dockerfile + ports: + - "3000:80" + depends_on: + - backend + restart: unless-stopped + + setup: + build: + context: ./setup + dockerfile: Dockerfile + environment: + - CLIP_VECTORIZER_URL=http://clip-vectorizer:8080 + - VORTEXDB_URL=http://vortexdb:3000 + - IMAGES_DIR=/app/images + - IMAGE_COUNT=100 + volumes: + - ./backend/images:/app/images + depends_on: + - clip-vectorizer + - vortexdb + restart: "no" + +volumes: + vortexdb-data: diff --git a/demo/clip-image-search/frontend/Dockerfile b/demo/clip-image-search/frontend/Dockerfile new file mode 100644 index 0000000..2c64fe3 --- /dev/null +++ b/demo/clip-image-search/frontend/Dockerfile @@ -0,0 +1,23 @@ +# Build stage +FROM node:20-alpine AS build + +WORKDIR /app + +COPY package.json ./ +RUN npm install + +COPY . . +RUN npm run build + +# Production stage +FROM nginx:alpine + +# Copy built assets +COPY --from=build /app/dist /usr/share/nginx/html + +# Copy nginx config +COPY nginx.conf /etc/nginx/conf.d/default.conf + +EXPOSE 80 + +CMD ["nginx", "-g", "daemon off;"] diff --git a/demo/clip-image-search/frontend/index.html b/demo/clip-image-search/frontend/index.html new file mode 100644 index 0000000..5758b8e --- /dev/null +++ b/demo/clip-image-search/frontend/index.html @@ -0,0 +1,16 @@ + + + + + + + CLIP Image Search - VortexDB Demo + + + + + +
+ + + diff --git a/demo/clip-image-search/frontend/nginx.conf b/demo/clip-image-search/frontend/nginx.conf new file mode 100644 index 0000000..dc5800c --- /dev/null +++ b/demo/clip-image-search/frontend/nginx.conf @@ -0,0 +1,35 @@ +server { + listen 80; + server_name localhost; + root /usr/share/nginx/html; + index index.html; + + # Serve static files + location / { + try_files $uri $uri/ /index.html; + } + + # Proxy API requests to backend + location /api/ { + proxy_pass http://backend:3001/; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection 'upgrade'; + proxy_set_header Host $host; + proxy_cache_bypass $http_upgrade; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + } + + # Proxy image requests to backend + location /images/ { + proxy_pass http://backend:3001/images/; + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_cache_bypass $http_upgrade; + } + + # Gzip compression + gzip on; + gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript; +} diff --git a/demo/clip-image-search/frontend/package.json b/demo/clip-image-search/frontend/package.json new file mode 100644 index 0000000..1430a51 --- /dev/null +++ b/demo/clip-image-search/frontend/package.json @@ -0,0 +1,22 @@ +{ + "name": "clip-image-search", + "private": true, + "version": "0.1.0", + "type": "module", + "scripts": { + "dev": "vite", + "build": "tsc && vite build", + "preview": "vite preview" + }, + "dependencies": { + "react": "^18.2.0", + "react-dom": "^18.2.0" + }, + "devDependencies": { + "@types/react": "^18.2.43", + "@types/react-dom": "^18.2.17", + "@vitejs/plugin-react": "^4.2.1", + "typescript": "^5.2.2", + "vite": "^5.0.8" + } +} diff --git a/demo/clip-image-search/frontend/public/logo_horizontal.svg b/demo/clip-image-search/frontend/public/logo_horizontal.svg new file mode 100644 index 0000000..89c7ecb --- /dev/null +++ b/demo/clip-image-search/frontend/public/logo_horizontal.svg @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/demo/clip-image-search/frontend/public/logo_small.svg b/demo/clip-image-search/frontend/public/logo_small.svg new file mode 100644 index 0000000..084b7fb --- /dev/null +++ b/demo/clip-image-search/frontend/public/logo_small.svg @@ -0,0 +1,7 @@ + + + + + + + diff --git a/demo/clip-image-search/frontend/public/search-icon.svg b/demo/clip-image-search/frontend/public/search-icon.svg new file mode 100644 index 0000000..bca2caf --- /dev/null +++ b/demo/clip-image-search/frontend/public/search-icon.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/demo/clip-image-search/frontend/public/vite.svg b/demo/clip-image-search/frontend/public/vite.svg new file mode 100644 index 0000000..513cc13 --- /dev/null +++ b/demo/clip-image-search/frontend/public/vite.svg @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff --git a/demo/clip-image-search/frontend/src/App.tsx b/demo/clip-image-search/frontend/src/App.tsx new file mode 100644 index 0000000..7fa4616 --- /dev/null +++ b/demo/clip-image-search/frontend/src/App.tsx @@ -0,0 +1,290 @@ +import { useState, useEffect, useCallback, useRef } from 'react' + +interface SearchResult { + image_url: string + point_id: string + score?: number +} + +interface SearchResponse { + results: SearchResult[] + query: string + vectorize_time_ms: number + search_time_ms: number + total_time_ms: number +} + +interface Stats { + total_images: number + clip_vectorizer_status: string + vortexdb_status: string +} + +const SUGGESTIONS = [ + "sunset ocean", + "sleeping cat", + "snowy mountains", + "city street", + "colorful flowers", + "dog park", + "food plate", + "highway cars" +] + +function useDebounce(value: T, delay: number): T { + const [debouncedValue, setDebouncedValue] = useState(value) + + useEffect(() => { + const timer = setTimeout(() => { + setDebouncedValue(value) + }, delay) + + return () => { + clearTimeout(timer) + } + }, [value, delay]) + + return debouncedValue +} + +function App() { + const [query, setQuery] = useState('') + const [results, setResults] = useState([]) + const [loading, setLoading] = useState(false) + const [error, setError] = useState(null) + const [stats, setStats] = useState(null) + const [searchStats, setSearchStats] = useState<{ + vectorize_time_ms: number + search_time_ms: number + total_time_ms: number + } | null>(null) + + const inputRef = useRef(null) + const debouncedQuery = useDebounce(query, 150) // 150ms debounce for real-time feel + + // Fetch system stats on mount + useEffect(() => { + fetch('/api/stats') + .then(res => res.json()) + .then(data => setStats(data)) + .catch(err => console.error('Failed to fetch stats:', err)) + }, []) + + // Perform search when debounced query changes + const performSearch = useCallback(async (searchQuery: string) => { + if (!searchQuery.trim()) { + setResults([]) + setSearchStats(null) + return + } + + setLoading(true) + setError(null) + + try { + const response = await fetch('/api/search', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + query: searchQuery, + limit: 24 + }) + }) + + if (!response.ok) { + throw new Error(`Search failed: ${response.statusText}`) + } + + const data: SearchResponse = await response.json() + setResults(data.results) + setSearchStats({ + vectorize_time_ms: data.vectorize_time_ms, + search_time_ms: data.search_time_ms, + total_time_ms: data.total_time_ms + }) + } catch (err) { + setError(err instanceof Error ? err.message : 'Search failed') + setResults([]) + } finally { + setLoading(false) + } + }, []) + + useEffect(() => { + performSearch(debouncedQuery) + }, [debouncedQuery, performSearch]) + + const handleSuggestionClick = (suggestion: string) => { + setQuery(suggestion) + inputRef.current?.focus() + } + + const getSpeedClass = (ms: number) => { + if (ms < 50) return 'fast' + if (ms < 200) return 'medium' + return 'slow' + } + + return ( +
+
+
+
+
+ VortexDB + / + Image Search +
+
+
+ + CLIP +
+
+ + VortexDB +
+ {stats && ( +
+ {stats.total_images} images +
+ )} +
+
+ +
+
+ + setQuery(e.target.value)} + autoFocus + /> +
+
+ + {searchStats && ( +
+
+ Vectorize: + + {searchStats.vectorize_time_ms.toFixed(0)}ms + +
+
+ Search: + + {searchStats.search_time_ms.toFixed(0)}ms + +
+
+ Total: + + {searchStats.total_time_ms.toFixed(0)}ms + +
+
+ Results: + {results.length} +
+
+ )} +
+
+ +
+ {error && ( +
+ ! +

{error}

+
+ )} + + {loading && ( +
+
+

Searching...

+
+ )} + + {!loading && !error && results.length > 0 && ( + <> +
+

+ Showing {results.length} results for "{debouncedQuery}" +

+
+
+ {results.map((result, index) => ( +
+ {`Search { + (e.target as HTMLImageElement).src = 'data:image/svg+xml,No Image' + }} + /> +
+
+ ID: {result.point_id.slice(0, 8)}... +
+
+
+ ))} +
+ + )} + + {!loading && !error && results.length === 0 && !query && ( +
+
+

Semantic Image Search

+

Describe what you're looking for. Results update as you type.

+ +
+

Suggestions

+
+ {SUGGESTIONS.map((suggestion) => ( + + ))} +
+
+
+ )} + + {!loading && !error && results.length === 0 && query && ( +
+
?
+

No Results

+

No images matched "{query}". Try a different description.

+
+ )} +
+ +
+

+ Powered by VortexDB +

+
+
+ ) +} + +export default App diff --git a/demo/clip-image-search/frontend/src/index.css b/demo/clip-image-search/frontend/src/index.css new file mode 100644 index 0000000..a0a3ac8 --- /dev/null +++ b/demo/clip-image-search/frontend/src/index.css @@ -0,0 +1,523 @@ +:root { + --bg-primary: #0a0a0f; + --bg-secondary: #0f0f15; + --bg-tertiary: #16161e; + --text-primary: #ffffff; + --text-secondary: #9ca3af; + --text-muted: #6b7280; + --accent: #00d4aa; + --accent-hover: #00e6b8; + --accent-dim: rgba(0, 212, 170, 0.15); + --success: #22c55e; + --warning: #f59e0b; + --error: #ef4444; + --border: #1f2937; + --shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.4); +} + +* { + margin: 0; + padding: 0; + box-sizing: border-box; +} + +body { + font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; + background: var(--bg-primary); + color: var(--text-primary); + min-height: 100vh; + line-height: 1.6; +} + +#root { + min-height: 100vh; +} + +.app { + min-height: 100vh; + display: flex; + flex-direction: column; +} + +/* Header */ +.header { + background: var(--bg-secondary); + border-bottom: 1px solid var(--border); + padding: 1rem 2rem; + position: sticky; + top: 0; + z-index: 100; +} + +.header-content { + max-width: 1200px; + margin: 0 auto; + display: flex; + flex-direction: column; + gap: 1.25rem; +} + +.header-top { + display: flex; + justify-content: space-between; + align-items: center; +} + +.logo { + display: flex; + align-items: center; + gap: 0.75rem; +} + +.logo-text { + font-size: 1.25rem; + font-weight: 600; + color: var(--accent); +} + +.logo-divider { + color: var(--text-muted); + font-weight: 300; + font-size: 1.25rem; +} + +.logo-subtitle { + font-size: 1.25rem; + font-weight: 400; + color: var(--text-secondary); +} + +.stats-badges { + display: flex; + gap: 0.75rem; + align-items: center; +} + +.badge { + display: flex; + align-items: center; + gap: 0.375rem; + padding: 0.5rem 0.875rem; + background: transparent; + border: 1px solid var(--border); + border-radius: 0; + font-size: 0.875rem; + color: var(--text-muted); +} + +.badge .dot { + width: 6px; + height: 6px; + border-radius: 50%; +} + +.badge .dot.online { + background: var(--success); +} + +.badge .dot.offline { + background: var(--error); +} + +/* Search Container */ +.search-container { + position: relative; +} + +.search-input-wrapper { + position: relative; + display: flex; + align-items: center; +} + +.search-icon { + position: absolute; + left: 1rem; + width: 1.125rem; + height: 1.125rem; + background-color: var(--text-muted); + mask-image: url('/search-icon.svg'); + mask-size: contain; + mask-repeat: no-repeat; + mask-position: center; + -webkit-mask-image: url('/search-icon.svg'); + -webkit-mask-size: contain; + -webkit-mask-repeat: no-repeat; + -webkit-mask-position: center; + pointer-events: none; +} + +.search-input { + width: 100%; + padding: 1rem 1.25rem 1rem 3rem; + font-size: 1rem; + background: var(--bg-tertiary); + border: 1px solid var(--border); + border-radius: 0; + color: var(--text-primary); + transition: all 0.15s ease; +} + +.search-input:focus { + outline: none; + border-color: var(--accent); +} + +.search-input::placeholder { + color: var(--text-muted); +} + +/* Performance Stats */ +.performance-stats { + display: flex; + gap: 1.5rem; + padding: 0.75rem 1.25rem; + background: var(--bg-tertiary); + border-radius: 0; + border: 1px solid var(--border); + font-size: 0.875rem; +} + +.stat { + display: flex; + align-items: center; + gap: 0.5rem; +} + +.stat-label { + color: var(--text-muted); +} + +.stat-value { + font-weight: 500; + color: var(--accent); +} + +.stat-value.fast { + color: var(--success); +} + +.stat-value.medium { + color: var(--warning); +} + +.stat-value.slow { + color: var(--error); +} + +/* Main Content */ +.main-content { + flex: 1; + padding: 1.5rem 2rem; + max-width: 1200px; + margin: 0 auto; + width: 100%; +} + +/* Results Info */ +.results-info { + margin-bottom: 1rem; + display: flex; + justify-content: space-between; + align-items: center; +} + +.results-count { + color: var(--text-muted); + font-size: 0.875rem; +} + +.results-count strong { + color: var(--text-secondary); + font-weight: 500; +} + +/* Image Grid */ +.image-grid { + display: grid; + grid-template-columns: repeat(auto-fill, minmax(200px, 1fr)); + gap: 1rem; +} + +.image-card { + position: relative; + aspect-ratio: 1; + border-radius: 0; + overflow: hidden; + background: var(--bg-secondary); + border: 1px solid var(--border); + transition: all 0.15s ease; + cursor: pointer; +} + +.image-card:hover { + border-color: var(--accent); +} + +.image-card img { + width: 100%; + height: 100%; + object-fit: cover; +} + +.image-card-overlay { + position: absolute; + bottom: 0; + left: 0; + right: 0; + background: linear-gradient(to top, rgba(0,0,0,0.7) 0%, transparent 100%); + opacity: 0; + transition: opacity 0.15s ease; + padding: 0.75rem; +} + +.image-card:hover .image-card-overlay { + opacity: 1; +} + +.image-card-info { + color: var(--text-secondary); + font-size: 0.75rem; +} + +/* Loading State */ +.loading-container { + display: flex; + flex-direction: column; + align-items: center; + justify-content: center; + padding: 4rem; + gap: 1rem; +} + +.spinner { + width: 48px; + height: 48px; + border: 3px solid var(--border); + border-top-color: var(--accent); + border-radius: 50%; + animation: spin 1s linear infinite; +} + +@keyframes spin { + to { + transform: rotate(360deg); + } +} + +/* Empty State */ +.empty-state { + display: flex; + flex-direction: column; + align-items: center; + justify-content: center; + padding: 3rem; + text-align: center; + gap: 0.75rem; +} + +.empty-state-icon { + width: 3rem; + height: 3rem; + display: flex; + align-items: center; + justify-content: center; + border: 1px solid var(--border); + margin-bottom: 0.5rem; +} + +.empty-state-icon span { + width: 1.5rem; + height: 1.5rem; + background-color: var(--text-muted); + mask-image: url('/search-icon.svg'); + mask-size: contain; + mask-repeat: no-repeat; + mask-position: center; + -webkit-mask-image: url('/search-icon.svg'); + -webkit-mask-size: contain; + -webkit-mask-repeat: no-repeat; + -webkit-mask-position: center; +} + +.empty-state-icon--empty span { + width: auto; + height: auto; + background: none; + mask-image: none; + -webkit-mask-image: none; + font-size: 1.5rem; + color: var(--text-muted); +} + +.empty-state h2 { + font-size: 1.375rem; + font-weight: 500; + color: var(--text-primary); +} + +.empty-state p { + color: var(--text-muted); + white-space: nowrap; + font-size: 1rem; +} + +/* Error State */ +.error-state { + display: flex; + align-items: center; + gap: 0.5rem; + padding: 0.75rem 1rem; + color: var(--error); + font-size: 0.875rem; +} + +.error-icon { + display: flex; + align-items: center; + justify-content: center; + width: 1.25rem; + height: 1.25rem; + border: 1px solid var(--error); + border-radius: 50%; + font-size: 0.75rem; + font-weight: 600; + flex-shrink: 0; +} + +.error-state p { + margin: 0; +} + +/* Suggestions */ +.suggestions { + margin-top: 1.5rem; +} + +.suggestions h3 { + font-size: 0.75rem; + color: var(--text-muted); + margin-bottom: 0.75rem; + text-transform: uppercase; + letter-spacing: 0.05em; +} + +.suggestion-chips { + display: flex; + flex-wrap: wrap; + gap: 0.5rem; + justify-content: center; + max-width: 600px; +} + +.suggestion-chip { + padding: 0.625rem 1.25rem; + background: transparent; + border: 1px solid var(--border); + border-radius: 0; + color: var(--text-muted); + cursor: pointer; + transition: all 0.15s ease; + font-size: 0.875rem; +} + +.suggestion-chip:hover { + border-color: var(--accent); + color: var(--accent); +} + +/* Footer */ +.footer { + padding: 1rem 2rem; + text-align: center; + color: var(--text-muted); + font-size: 0.875rem; + border-top: 1px solid var(--border); +} + +.footer a { + color: var(--accent); + text-decoration: none; +} + +.footer strong { + color: var(--accent); +} + +/* Responsive */ +@media (max-width: 768px) { + .header { + padding: 1rem; + } + + .header-top { + flex-direction: column; + gap: 1rem; + } + + .stats-badges { + flex-wrap: wrap; + justify-content: center; + } + + .performance-stats { + flex-wrap: wrap; + gap: 1rem; + } + + .main-content { + padding: 1rem; + } + + .image-grid { + grid-template-columns: repeat(auto-fill, minmax(150px, 1fr)); + gap: 1rem; + } +} + +/* Upload Modal */ +.upload-area { + margin: 2rem 0; + padding: 2rem; + border: 2px dashed var(--border); + border-radius: 0; + text-align: center; + transition: all 0.3s ease; + cursor: pointer; +} + +.upload-area:hover, +.upload-area.drag-over { + border-color: var(--accent); + background: rgba(99, 102, 241, 0.05); +} + +.upload-area input { + display: none; +} + +.upload-icon { + font-size: 3rem; + margin-bottom: 1rem; +} + +/* Skeleton loading */ +.skeleton { + background: linear-gradient(90deg, var(--bg-tertiary) 0%, var(--bg-secondary) 50%, var(--bg-tertiary) 100%); + background-size: 200% 100%; + animation: shimmer 1.5s infinite; +} + +@keyframes shimmer { + 0% { + background-position: 200% 0; + } + 100% { + background-position: -200% 0; + } +} + +.image-skeleton { + aspect-ratio: 1; + border-radius: 0; +} diff --git a/demo/clip-image-search/frontend/src/main.tsx b/demo/clip-image-search/frontend/src/main.tsx new file mode 100644 index 0000000..964aeb4 --- /dev/null +++ b/demo/clip-image-search/frontend/src/main.tsx @@ -0,0 +1,10 @@ +import React from 'react' +import ReactDOM from 'react-dom/client' +import App from './App' +import './index.css' + +ReactDOM.createRoot(document.getElementById('root')!).render( + + + , +) diff --git a/demo/clip-image-search/frontend/src/vite-env.d.ts b/demo/clip-image-search/frontend/src/vite-env.d.ts new file mode 100644 index 0000000..11f02fe --- /dev/null +++ b/demo/clip-image-search/frontend/src/vite-env.d.ts @@ -0,0 +1 @@ +/// diff --git a/demo/clip-image-search/frontend/tsconfig.json b/demo/clip-image-search/frontend/tsconfig.json new file mode 100644 index 0000000..3934b8f --- /dev/null +++ b/demo/clip-image-search/frontend/tsconfig.json @@ -0,0 +1,21 @@ +{ + "compilerOptions": { + "target": "ES2020", + "useDefineForClassFields": true, + "lib": ["ES2020", "DOM", "DOM.Iterable"], + "module": "ESNext", + "skipLibCheck": true, + "moduleResolution": "bundler", + "allowImportingTsExtensions": true, + "resolveJsonModule": true, + "isolatedModules": true, + "noEmit": true, + "jsx": "react-jsx", + "strict": true, + "noUnusedLocals": true, + "noUnusedParameters": true, + "noFallthroughCasesInSwitch": true + }, + "include": ["src"], + "references": [{ "path": "./tsconfig.node.json" }] +} diff --git a/demo/clip-image-search/frontend/tsconfig.node.json b/demo/clip-image-search/frontend/tsconfig.node.json new file mode 100644 index 0000000..97ede7e --- /dev/null +++ b/demo/clip-image-search/frontend/tsconfig.node.json @@ -0,0 +1,11 @@ +{ + "compilerOptions": { + "composite": true, + "skipLibCheck": true, + "module": "ESNext", + "moduleResolution": "bundler", + "allowSyntheticDefaultImports": true, + "strict": true + }, + "include": ["vite.config.ts"] +} diff --git a/demo/clip-image-search/frontend/vite.config.ts b/demo/clip-image-search/frontend/vite.config.ts new file mode 100644 index 0000000..c6fa47c --- /dev/null +++ b/demo/clip-image-search/frontend/vite.config.ts @@ -0,0 +1,20 @@ +import { defineConfig } from 'vite' +import react from '@vitejs/plugin-react' + +export default defineConfig({ + plugins: [react()], + server: { + port: 3000, + proxy: { + '/api': { + target: 'http://localhost:3001', + changeOrigin: true, + rewrite: (path) => path.replace(/^\/api/, '') + }, + '/images': { + target: 'http://localhost:3001', + changeOrigin: true + } + } + } +}) diff --git a/demo/clip-image-search/setup.sh b/demo/clip-image-search/setup.sh new file mode 100755 index 0000000..478ca97 --- /dev/null +++ b/demo/clip-image-search/setup.sh @@ -0,0 +1,283 @@ +#!/bin/bash +set -e + +# CLIP Image Search Demo - Setup Script +# Downloads sample images and indexes them into VortexDB + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +IMAGES_DIR="${SCRIPT_DIR}/backend/images" +CLIP_URL="${CLIP_VECTORIZER_URL:-http://localhost:5000}" +VORTEXDB_URL="${VORTEXDB_URL:-http://localhost:8081}" + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +echo -e "${BLUE}╔════════════════════════════════════════════════════════════╗${NC}" +echo -e "${BLUE}║ CLIP Image Search Demo - Setup Script ║${NC}" +echo -e "${BLUE}╚════════════════════════════════════════════════════════════╝${NC}" +echo "" + +# Create images directory +mkdir -p "$IMAGES_DIR" + +# Function to check if services are running +check_services() { + echo -e "${YELLOW}Checking services...${NC}" + + # Check CLIP Vectorizer + if curl -s "${CLIP_URL}/docs" > /dev/null 2>&1; then + echo -e "${GREEN}✓ CLIP Vectorizer is running at ${CLIP_URL}${NC}" + else + echo -e "${RED}✗ CLIP Vectorizer is not reachable at ${CLIP_URL}${NC}" + echo -e "${YELLOW} Make sure docker compose is running: sudo docker compose up -d${NC}" + return 1 + fi + + # Check VortexDB + if curl -s "${VORTEXDB_URL}/health" > /dev/null 2>&1; then + echo -e "${GREEN}✓ VortexDB is running at ${VORTEXDB_URL}${NC}" + else + echo -e "${RED}✗ VortexDB is not reachable at ${VORTEXDB_URL}${NC}" + echo -e "${YELLOW} Make sure docker compose is running: sudo docker compose up -d${NC}" + return 1 + fi + + return 0 +} + +# Function to download images from Lorem Picsum (free, no API key needed) +download_images() { + local count=${1:-100} + echo "" + echo -e "${BLUE}═══════════════════════════════════════════════════════════════${NC}" + echo -e "${BLUE} Downloading ${count} sample images from Lorem Picsum...${NC}" + echo -e "${BLUE}═══════════════════════════════════════════════════════════════${NC}" + echo "" + + # Categories for diverse images (using seed words for consistent results) + local categories=( + "nature" "city" "people" "food" "tech" "animal" "architecture" + "art" "travel" "sports" "fashion" "music" "business" "health" + "ocean" "mountain" "forest" "desert" "snow" "beach" "sunset" + "car" "bike" "train" "plane" "boat" "street" "building" + "cat" "dog" "bird" "flower" "tree" "sky" "cloud" + "coffee" "book" "laptop" "phone" "camera" "guitar" "piano" + ) + + local downloaded=0 + local failed=0 + local i=1 + + while [ $downloaded -lt $count ]; do + # Get category based on current index + local cat_index=$((i % ${#categories[@]})) + local category="${categories[$cat_index]}" + local seed="${category}${i}" + local filename="${category}_$(printf '%03d' $i).jpg" + local filepath="${IMAGES_DIR}/${filename}" + + if [ -f "$filepath" ]; then + echo -e " [${downloaded}/${count}] ${YELLOW}Skipping${NC} ${filename} (already exists)" + ((downloaded++)) + else + # Download from Lorem Picsum with seed for reproducible images + # Using 640x640 for good quality but reasonable size + local url="https://picsum.photos/seed/${seed}/640/640" + + if curl -sL -o "$filepath" "$url" 2>/dev/null; then + # Verify it's a valid image (check file size > 1KB) + if [ -s "$filepath" ] && [ $(stat -f%z "$filepath" 2>/dev/null || stat -c%s "$filepath" 2>/dev/null) -gt 1024 ]; then + echo -e " [${downloaded}/${count}] ${GREEN}Downloaded${NC} ${filename}" + ((downloaded++)) + else + rm -f "$filepath" + echo -e " [${downloaded}/${count}] ${RED}Failed${NC} ${filename} (invalid image)" + ((failed++)) + fi + else + echo -e " [${downloaded}/${count}] ${RED}Failed${NC} ${filename}" + ((failed++)) + fi + fi + + ((i++)) + + # Prevent infinite loop + if [ $i -gt $((count * 2)) ]; then + echo -e "${YELLOW}Warning: Too many failures, stopping download${NC}" + break + fi + + # Small delay to be nice to the server + sleep 0.1 + done + + echo "" + echo -e "${GREEN}Downloaded: ${downloaded} images${NC}" + if [ $failed -gt 0 ]; then + echo -e "${RED}Failed: ${failed} images${NC}" + fi +} + +# Function to index images into VortexDB +index_images() { + echo "" + echo -e "${BLUE}═══════════════════════════════════════════════════════════════${NC}" + echo -e "${BLUE} Indexing images into VortexDB...${NC}" + echo -e "${BLUE}═══════════════════════════════════════════════════════════════${NC}" + echo "" + + local total=0 + local indexed=0 + local failed=0 + local total_vectorize_time=0 + local total_insert_time=0 + + # Count total images + for img in "${IMAGES_DIR}"/*.{jpg,jpeg,png,webp,gif} 2>/dev/null; do + [ -f "$img" ] && ((total++)) + done + + if [ $total -eq 0 ]; then + echo -e "${RED}No images found in ${IMAGES_DIR}${NC}" + return 1 + fi + + echo -e "Found ${total} images to index" + echo "" + + for img in "${IMAGES_DIR}"/*.{jpg,jpeg,png,webp,gif} 2>/dev/null; do + [ -f "$img" ] || continue + + local filename=$(basename "$img") + local current=$((indexed + failed + 1)) + + # Step 1: Vectorize with CLIP + local vec_start=$(date +%s%3N) + local vector_response=$(curl -s -X POST "${CLIP_URL}/vectors_img" \ + -F "file=@${img}" \ + -H "accept: application/json") + local vec_end=$(date +%s%3N) + local vec_time=$((vec_end - vec_start)) + + # Extract vector from response + local vector=$(echo "$vector_response" | grep -o '"result":\[[^]]*\]' | sed 's/"result"://') + + if [ -z "$vector" ] || [ "$vector" = "null" ]; then + echo -e " [${current}/${total}] ${RED}✗${NC} ${filename} (vectorization failed)" + ((failed++)) + continue + fi + + # Step 2: Insert into VortexDB + local ins_start=$(date +%s%3N) + local insert_response=$(curl -s -X POST "${VORTEXDB_URL}/points" \ + -H "Content-Type: application/json" \ + -d "{ + \"vector\": ${vector}, + \"payload\": { + \"content_type\": \"Image\", + \"content\": \"${img}\" + } + }") + local ins_end=$(date +%s%3N) + local ins_time=$((ins_end - ins_start)) + + # Check if insert was successful + local point_id=$(echo "$insert_response" | grep -o '"point_id":"[^"]*"' | sed 's/"point_id":"\([^"]*\)"/\1/') + + if [ -n "$point_id" ]; then + echo -e " [${current}/${total}] ${GREEN}✓${NC} ${filename} (CLIP: ${vec_time}ms | DB: ${ins_time}ms)" + ((indexed++)) + total_vectorize_time=$((total_vectorize_time + vec_time)) + total_insert_time=$((total_insert_time + ins_time)) + else + echo -e " [${current}/${total}] ${RED}✗${NC} ${filename} (insert failed)" + ((failed++)) + fi + done + + echo "" + echo -e "${BLUE}═══════════════════════════════════════════════════════════════${NC}" + echo -e "${GREEN} Indexing Complete!${NC}" + echo -e "${BLUE}═══════════════════════════════════════════════════════════════${NC}" + echo "" + echo -e " ${GREEN}Indexed:${NC} ${indexed} images" + if [ $failed -gt 0 ]; then + echo -e " ${RED}Failed:${NC} ${failed} images" + fi + + if [ $indexed -gt 0 ]; then + local avg_vec=$((total_vectorize_time / indexed)) + local avg_ins=$((total_insert_time / indexed)) + echo "" + echo -e " ${BLUE}Performance:${NC}" + echo -e " Avg CLIP vectorization: ${avg_vec}ms" + echo -e " Avg VortexDB insert: ${avg_ins}ms" + echo -e " Total time: $((total_vectorize_time + total_insert_time))ms" + fi +} + +# Main execution +main() { + local skip_download=false + local image_count=100 + + # Parse arguments + while [[ $# -gt 0 ]]; do + case $1 in + --skip-download) + skip_download=true + shift + ;; + --count) + image_count=$2 + shift 2 + ;; + -h|--help) + echo "Usage: $0 [OPTIONS]" + echo "" + echo "Options:" + echo " --skip-download Skip image download, only index existing images" + echo " --count N Number of images to download (default: 100)" + echo " -h, --help Show this help message" + exit 0 + ;; + *) + echo "Unknown option: $1" + exit 1 + ;; + esac + done + + # Check services first + if ! check_services; then + echo "" + echo -e "${RED}Please start the services first:${NC}" + echo -e " cd ${SCRIPT_DIR}" + echo -e " sudo docker compose up -d" + echo -e " # Wait ~60 seconds for CLIP model to load" + echo -e " $0" + exit 1 + fi + + # Download images + if [ "$skip_download" = false ]; then + download_images $image_count + fi + + # Index images + index_images + + echo "" + echo -e "${GREEN}╔════════════════════════════════════════════════════════════╗${NC}" + echo -e "${GREEN}║ Setup Complete! Open http://localhost:3000 to try it out ║${NC}" + echo -e "${GREEN}╚════════════════════════════════════════════════════════════╝${NC}" + echo "" +} + +main "$@" diff --git a/demo/clip-image-search/setup/Dockerfile b/demo/clip-image-search/setup/Dockerfile new file mode 100644 index 0000000..ec0b80f --- /dev/null +++ b/demo/clip-image-search/setup/Dockerfile @@ -0,0 +1,21 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Install curl for downloads and healthchecks +RUN apt-get update && apt-get install -y --no-install-recommends \ + curl \ + && rm -rf /var/lib/apt/lists/* + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY setup_images.py . + +# Default to 100 images +ENV IMAGE_COUNT=100 +ENV CLIP_VECTORIZER_URL=http://clip-vectorizer:8080 +ENV VORTEXDB_URL=http://vortexdb:3000 +ENV IMAGES_DIR=/app/images + +CMD ["python", "setup_images.py"] diff --git a/demo/clip-image-search/setup/requirements.txt b/demo/clip-image-search/setup/requirements.txt new file mode 100644 index 0000000..278a1cc --- /dev/null +++ b/demo/clip-image-search/setup/requirements.txt @@ -0,0 +1,2 @@ +httpx>=0.25.0 +pillow>=10.0.0 diff --git a/demo/clip-image-search/setup/setup_images.py b/demo/clip-image-search/setup/setup_images.py new file mode 100644 index 0000000..96396ab --- /dev/null +++ b/demo/clip-image-search/setup/setup_images.py @@ -0,0 +1,249 @@ +#!/usr/bin/env python3 +""" +Setup script that downloads sample images and indexes them into VortexDB. +Runs as a Docker container after services are ready. +""" + +import os +import sys +import time +import httpx +from pathlib import Path +from concurrent.futures import ThreadPoolExecutor + +# Configuration from environment +CLIP_URL = os.getenv("CLIP_VECTORIZER_URL", "http://clip-vectorizer:8080") +VORTEXDB_URL = os.getenv("VORTEXDB_URL", "http://vortexdb:3000") +IMAGES_DIR = os.getenv("IMAGES_DIR", "/app/images") +IMAGE_COUNT = int(os.getenv("IMAGE_COUNT", "100")) + +# Categories for diverse images +CATEGORIES = [ + "nature", "city", "people", "food", "tech", "animal", "architecture", + "art", "travel", "sports", "fashion", "music", "business", "health", + "ocean", "mountain", "forest", "desert", "snow", "beach", "sunset", + "car", "bike", "train", "plane", "boat", "street", "building", + "cat", "dog", "bird", "flower", "tree", "sky", "cloud", + "coffee", "book", "laptop", "phone", "camera", "guitar", "piano" +] + + +def print_banner(): + print("=" * 60) + print(" CLIP Image Search Demo - Auto Setup") + print("=" * 60) + print() + + +def wait_for_services(max_retries=60, delay=5): + """Wait for CLIP and VortexDB services to be ready.""" + print("⏳ Waiting for services to be ready...") + + for attempt in range(max_retries): + clip_ready = False + vortex_ready = False + + try: + with httpx.Client(timeout=5.0) as client: + # Check CLIP + try: + resp = client.get(f"{CLIP_URL}/docs") + clip_ready = resp.status_code == 200 + except Exception: + pass + + # Check VortexDB + try: + resp = client.get(f"{VORTEXDB_URL}/health") + vortex_ready = resp.status_code == 200 + except Exception: + pass + except Exception: + pass + + if clip_ready and vortex_ready: + print(f"✅ CLIP Vectorizer ready at {CLIP_URL}") + print(f"✅ VortexDB ready at {VORTEXDB_URL}") + return True + + status = [] + if clip_ready: + status.append("CLIP ✓") + else: + status.append("CLIP ✗") + if vortex_ready: + status.append("VortexDB ✓") + else: + status.append("VortexDB ✗") + + print(f" Attempt {attempt + 1}/{max_retries}: {' | '.join(status)}") + time.sleep(delay) + + print("❌ Services did not become ready in time") + return False + + +def download_image(seed: str, filepath: str) -> bool: + """Download a single image from Lorem Picsum.""" + if os.path.exists(filepath): + return True + + url = f"https://picsum.photos/seed/{seed}/640/640" + try: + with httpx.Client(timeout=30.0, follow_redirects=True) as client: + resp = client.get(url) + if resp.status_code == 200 and len(resp.content) > 1024: + with open(filepath, "wb") as f: + f.write(resp.content) + return True + except Exception as e: + print(f" Download error for {seed}: {e}") + return False + + +def download_images(count: int) -> int: + """Download sample images from Lorem Picsum.""" + print() + print("=" * 60) + print(f" Downloading {count} sample images...") + print("=" * 60) + print() + + os.makedirs(IMAGES_DIR, exist_ok=True) + + downloaded = 0 + i = 1 + + while downloaded < count and i <= count * 2: + cat_index = i % len(CATEGORIES) + category = CATEGORIES[cat_index] + seed = f"{category}{i}" + filename = f"{category}_{i:03d}.jpg" + filepath = os.path.join(IMAGES_DIR, filename) + + if download_image(seed, filepath): + downloaded += 1 + if downloaded % 10 == 0: + print(f" Downloaded: {downloaded}/{count}") + + i += 1 + time.sleep(0.1) # Be nice to the server + + print(f"\n✅ Downloaded {downloaded} images") + return downloaded + + +def index_images() -> tuple[int, int]: + """Index all images into VortexDB using CLIP.""" + print() + print("=" * 60) + print(" Indexing images into VortexDB...") + print("=" * 60) + print() + + image_files = list(Path(IMAGES_DIR).glob("*.jpg")) + \ + list(Path(IMAGES_DIR).glob("*.jpeg")) + \ + list(Path(IMAGES_DIR).glob("*.png")) + + if not image_files: + print("❌ No images found to index") + return 0, 0 + + print(f" Found {len(image_files)} images to index") + + indexed = 0 + failed = 0 + total_time = 0 + + with httpx.Client(timeout=120.0) as client: + for i, img_path in enumerate(image_files, 1): + try: + start = time.perf_counter() + + # Vectorize with CLIP + with open(img_path, "rb") as f: + files = {"file": (img_path.name, f, "image/jpeg")} + clip_resp = client.post(f"{CLIP_URL}/vectors_img", files=files) + + if clip_resp.status_code != 200: + print(f" [{i}/{len(image_files)}] ❌ {img_path.name} (CLIP error)") + failed += 1 + continue + + vector = clip_resp.json().get("result") + if not vector: + print(f" [{i}/{len(image_files)}] ❌ {img_path.name} (no vector)") + failed += 1 + continue + + # Insert into VortexDB + insert_resp = client.post( + f"{VORTEXDB_URL}/points", + json={ + "vector": vector, + "payload": { + "content_type": "Image", + "content": str(img_path) + } + } + ) + + elapsed = (time.perf_counter() - start) * 1000 + total_time += elapsed + + if insert_resp.status_code in (200, 201): + indexed += 1 + if indexed % 10 == 0: + avg = total_time / indexed + print(f" [{i}/{len(image_files)}] Indexed {indexed} images (avg: {avg:.0f}ms)") + else: + print(f" [{i}/{len(image_files)}] ❌ {img_path.name} (DB error: {insert_resp.status_code})") + failed += 1 + + except Exception as e: + print(f" [{i}/{len(image_files)}] ❌ {img_path.name}: {e}") + failed += 1 + + print() + print(f"✅ Indexed: {indexed} images") + if failed > 0: + print(f"❌ Failed: {failed} images") + if indexed > 0: + print(f"⚡ Average time per image: {total_time / indexed:.0f}ms") + + return indexed, failed + + +def main(): + print_banner() + + # Wait for services + if not wait_for_services(): + sys.exit(1) + + print() + + # Download images + downloaded = download_images(IMAGE_COUNT) + + if downloaded == 0: + print("❌ No images downloaded, exiting") + sys.exit(1) + + # Index images + indexed, failed = index_images() + + print() + print("=" * 60) + print(" 🎉 Setup Complete!") + print("=" * 60) + print() + print(" Open http://localhost:3000 to try the demo") + print() + + # Keep container running briefly so logs can be seen + time.sleep(5) + + +if __name__ == "__main__": + main()