From 1365da3b5afcf43a935b62c40d21bc97f75bbeee Mon Sep 17 00:00:00 2001 From: Filippo Stanghellini Date: Tue, 10 Mar 2026 19:54:41 +0100 Subject: [PATCH 1/6] feat: optimize indexing method and update UI --- src/docfinder/embedding/encoder.py | 38 +- src/docfinder/index/indexer.py | 41 +- src/docfinder/index/storage.py | 27 +- src/docfinder/web/app.py | 179 ++-- src/docfinder/web/templates/index.html | 1299 +++++++++++++----------- 5 files changed, 830 insertions(+), 754 deletions(-) diff --git a/src/docfinder/embedding/encoder.py b/src/docfinder/embedding/encoder.py index 981d753..2ffb91e 100644 --- a/src/docfinder/embedding/encoder.py +++ b/src/docfinder/embedding/encoder.py @@ -2,7 +2,6 @@ from __future__ import annotations -import gc import logging import platform import sys @@ -131,7 +130,7 @@ def detect_optimal_backend() -> tuple[Literal["torch", "onnx"], str | None]: @dataclass(slots=True) class EmbeddingConfig: model_name: str = DEFAULT_MODEL - batch_size: int = 8 + batch_size: int = 32 normalize: bool = True backend: Literal["torch", "onnx", "openvino"] | None = None onnx_model_file: str | None = None @@ -214,34 +213,15 @@ def _log_backend_info(self) -> None: def embed(self, texts: Sequence[str] | Iterable[str]) -> np.ndarray: """Return float32 embeddings for input texts.""" - sentences = list(texts) - - # Processa in mini-batch per ridurre memoria - all_embeddings = [] - mini_batch_size = 4 # Processa solo 4 testi alla volta - - for i in range(0, len(sentences), mini_batch_size): - batch = sentences[i : i + mini_batch_size] - embeddings = self._model.encode( - batch, - batch_size=self.config.batch_size, - show_progress_bar=False, - convert_to_numpy=True, - normalize_embeddings=self.config.normalize, - ) - all_embeddings.append(embeddings) - - # Libera memoria dopo ogni mini-batch - gc.collect() - - # Concatena tutti i risultati - result = np.vstack(all_embeddings).astype("float32", copy=False) - - # Pulizia finale - gc.collect() - - return result + embeddings = self._model.encode( + sentences, + batch_size=self.config.batch_size, + show_progress_bar=False, + convert_to_numpy=True, + normalize_embeddings=self.config.normalize, + ) + return np.asarray(embeddings, dtype="float32") def embed_query(self, text: str) -> np.ndarray: """Convenience wrapper for single-query embedding.""" diff --git a/src/docfinder/index/indexer.py b/src/docfinder/index/indexer.py index b9024ba..bee397f 100644 --- a/src/docfinder/index/indexer.py +++ b/src/docfinder/index/indexer.py @@ -6,7 +6,7 @@ import logging from dataclasses import dataclass, field from pathlib import Path -from typing import Sequence +from typing import Callable, Sequence from docfinder.embedding.encoder import EmbeddingModel from docfinder.index.storage import SQLiteVectorStore @@ -52,11 +52,13 @@ def __init__( *, chunk_chars: int = 1200, overlap: int = 200, + progress_callback: Callable[[int, int, str], None] | None = None, ) -> None: self.embedder = embedder self.store = store self.chunk_chars = chunk_chars self.overlap = overlap + self.progress_callback = progress_callback def index(self, paths: Sequence[Path]) -> IndexStats: """Index all PDFs found under the given paths.""" @@ -65,28 +67,25 @@ def index(self, paths: Sequence[Path]) -> IndexStats: LOGGER.warning("No PDF files found") return IndexStats() + total = len(pdf_files) stats = IndexStats() - # Processa solo 2 file alla volta per ridurre memoria - batch_size = 2 - - for i in range(0, len(pdf_files), batch_size): - batch = pdf_files[i : i + batch_size] - - for path in batch: - try: - LOGGER.info(f"Processing: {path}") - status = self._index_single(path) - stats.increment(status, path) - - except Exception as e: - LOGGER.error(f"Failed to process {path}: {e}") - stats.failed += 1 - stats.processed_files.append(path) - - # Libera memoria dopo ogni batch + for i, path in enumerate(pdf_files): + if self.progress_callback: + self.progress_callback(i, total, str(path)) + try: + LOGGER.info(f"Processing: {path}") + status = self._index_single(path) + stats.increment(status, path) + except Exception as e: + LOGGER.error(f"Failed to process {path}: {e}") + stats.failed += 1 + stats.processed_files.append(path) gc.collect() + if self.progress_callback: + self.progress_callback(total, total, "") + return stats def _index_single(self, path: Path) -> str: @@ -120,7 +119,7 @@ def _index_single(self, path: Path) -> str: return status # Process chunks in batches - batch_size = 32 + batch_size = 64 current_batch = [] # Chain the first chunk back with the rest @@ -131,12 +130,10 @@ def _index_single(self, path: Path) -> str: embeddings = self.embedder.embed([c.text for c in current_batch]) self.store.insert_chunks(doc_id, current_batch, embeddings) current_batch = [] - gc.collect() # Process remaining chunks if current_batch: embeddings = self.embedder.embed([c.text for c in current_batch]) self.store.insert_chunks(doc_id, current_batch, embeddings) - gc.collect() return status diff --git a/src/docfinder/index/storage.py b/src/docfinder/index/storage.py index 5e78593..e43c3a6 100644 --- a/src/docfinder/index/storage.py +++ b/src/docfinder/index/storage.py @@ -139,21 +139,20 @@ def insert_chunks( if embeddings.shape[0] != len(chunks): raise ValueError("Embeddings and chunks length mismatch") - conn = self._conn - for chunk, vector in zip(chunks, embeddings): - conn.execute( - """ - INSERT INTO chunks(document_id, chunk_index, text, metadata, embedding) - VALUES (?, ?, ?, ?, ?) - """, - ( - doc_id, - chunk.index, - chunk.text, - json.dumps(chunk.metadata, ensure_ascii=True), - sqlite3.Binary(np.asarray(vector, dtype="float32").tobytes()), - ), + data = [ + ( + doc_id, + chunk.index, + chunk.text, + json.dumps(chunk.metadata, ensure_ascii=True), + sqlite3.Binary(np.asarray(vector, dtype="float32").tobytes()), ) + for chunk, vector in zip(chunks, embeddings) + ] + self._conn.executemany( + "INSERT INTO chunks(document_id, chunk_index, text, metadata, embedding) VALUES (?, ?, ?, ?, ?)", + data, + ) def upsert_document( self, diff --git a/src/docfinder/web/app.py b/src/docfinder/web/app.py index 2df2580..a2935db 100644 --- a/src/docfinder/web/app.py +++ b/src/docfinder/web/app.py @@ -7,6 +7,9 @@ import os import subprocess import sys +import threading +import uuid +from contextlib import asynccontextmanager from pathlib import Path from typing import Any, List @@ -23,7 +26,35 @@ LOGGER = logging.getLogger(__name__) -app = FastAPI(title="DocFinder Web", version="1.1.1") +# ── Singleton EmbeddingModel ───────────────────────────────────────────────── +_embedder: EmbeddingModel | None = None +_embedder_lock = threading.Lock() + + +def _get_embedder() -> EmbeddingModel: + """Return a cached EmbeddingModel, creating it on first call.""" + global _embedder + if _embedder is None: + with _embedder_lock: + if _embedder is None: + config = AppConfig() + _embedder = EmbeddingModel(EmbeddingConfig(model_name=config.model_name)) + return _embedder + + +# ── Async indexing job registry ─────────────────────────────────────────────── +_index_jobs: dict[str, dict] = {} + + +@asynccontextmanager +async def lifespan(app: FastAPI): + logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s") + # Pre-load the embedding model at startup so the first request is instant + await asyncio.to_thread(_get_embedder) + yield + + +app = FastAPI(title="DocFinder Web", version="1.1.1", lifespan=lifespan) app.add_middleware( CORSMiddleware, allow_origins=["*"], @@ -65,11 +96,6 @@ def _ensure_db_parent(db_path: Path) -> None: db_path.parent.mkdir(parents=True, exist_ok=True) -@app.on_event("startup") -async def startup_event() -> None: - logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s") - - @app.post("/search") async def search_documents(payload: SearchPayload) -> dict[str, List[SearchResult]]: query = payload.query.strip() @@ -86,7 +112,7 @@ async def search_documents(payload: SearchPayload) -> dict[str, List[SearchResul "Please index some documents first using the 'Index folder or PDF' section above.", ) - embedder = EmbeddingModel(EmbeddingConfig(model_name=AppConfig().model_name)) + embedder = _get_embedder() store = SQLiteVectorStore(resolved_db, dimension=embedder.dimension) searcher = Searcher(embedder, store) results = searcher.search(query, top_k=top_k) @@ -122,7 +148,7 @@ async def list_documents(db: Path | None = None) -> dict[str, Any]: "stats": {"document_count": 0, "chunk_count": 0, "total_size_bytes": 0}, } - embedder = EmbeddingModel(EmbeddingConfig(model_name=AppConfig().model_name)) + embedder = _get_embedder() store = SQLiteVectorStore(resolved_db, dimension=embedder.dimension) try: documents = store.list_documents() @@ -140,7 +166,7 @@ async def cleanup_missing_files(db: Path | None = None) -> dict[str, Any]: if not resolved_db.exists(): raise HTTPException(status_code=404, detail="Database not found") - embedder = EmbeddingModel(EmbeddingConfig(model_name=AppConfig().model_name)) + embedder = _get_embedder() store = SQLiteVectorStore(resolved_db, dimension=embedder.dimension) try: removed_count = store.remove_missing_files() @@ -157,7 +183,7 @@ async def delete_document_by_id(doc_id: int, db: Path | None = None) -> dict[str if not resolved_db.exists(): raise HTTPException(status_code=404, detail="Database not found") - embedder = EmbeddingModel(EmbeddingConfig(model_name=AppConfig().model_name)) + embedder = _get_embedder() store = SQLiteVectorStore(resolved_db, dimension=embedder.dimension) try: deleted = store.delete_document(doc_id) @@ -180,7 +206,7 @@ async def delete_document(payload: DeleteDocumentRequest, db: Path | None = None if not resolved_db.exists(): raise HTTPException(status_code=404, detail="Database not found") - embedder = EmbeddingModel(EmbeddingConfig(model_name=AppConfig().model_name)) + embedder = _get_embedder() store = SQLiteVectorStore(resolved_db, dimension=embedder.dimension) try: if payload.doc_id is not None: @@ -196,14 +222,27 @@ async def delete_document(payload: DeleteDocumentRequest, db: Path | None = None return {"status": "ok"} -def _run_index_job(paths: List[Path], config: AppConfig, resolved_db: Path) -> dict[str, Any]: - embedder = EmbeddingModel(EmbeddingConfig(model_name=config.model_name)) +def _run_index_job( + paths: List[Path], + config: AppConfig, + resolved_db: Path, + job: dict | None = None, +) -> dict[str, Any]: + embedder = _get_embedder() + + def _progress(processed: int, total: int, current_file: str) -> None: + if job is not None: + job["processed"] = processed + job["total"] = total + job["current_file"] = current_file + store = SQLiteVectorStore(resolved_db, dimension=embedder.dimension) indexer = Indexer( embedder, store, chunk_chars=config.chunk_chars, overlap=config.overlap, + progress_callback=_progress, ) try: stats = indexer.index(paths) @@ -219,98 +258,100 @@ def _run_index_job(paths: List[Path], config: AppConfig, resolved_db: Path) -> d } -@app.post("/index") -async def index_documents(payload: IndexPayload) -> dict[str, Any]: +def _validate_index_paths(payload: "IndexPayload") -> List[Path]: + """Validate and resolve paths from an IndexPayload. Raises HTTPException on error.""" logger = logging.getLogger(__name__) - sanitized_paths = [p.replace("\r", "").replace("\n", "") for p in payload.paths] - logger.info("DEBUG: Received paths = %s", sanitized_paths) - logger.info("DEBUG: Path type = %s", type(payload.paths)) - - if not payload.paths: - raise HTTPException(status_code=400, detail="No path provided") - - config_defaults = AppConfig() - config = AppConfig( - db_path=Path(payload.db) if payload.db is not None else config_defaults.db_path, - model_name=payload.model or config_defaults.model_name, - chunk_chars=payload.chunk_chars or config_defaults.chunk_chars, - overlap=payload.overlap or config_defaults.overlap, - ) - - resolved_db = config.resolve_db_path(Path.cwd()) - _ensure_db_parent(resolved_db) - - # Security: Define safe base directory for path traversal protection - # User can only access directories within their home directory or an explicitly allowed path - # For now, we allow access to the entire filesystem as the user is expected to be trusted - # In production, you might want to restrict this to specific directories - # IMPORTANT: Use canonical (real) path to prevent symlink-based bypasses safe_base_dir = Path(os.path.realpath(str(Path.home()))) + resolved_paths: List[Path] = [] - # Validate and resolve paths safely - resolved_paths = [] for p in payload.paths: - # Sanitize input: remove newlines and carriage returns clean_path = p.strip().replace("\r", "").replace("\n", "") if not clean_path: continue - - # Security: Reject paths with null bytes or other dangerous characters if "\0" in clean_path: raise HTTPException(status_code=400, detail="Invalid path: contains null byte") try: - # Step 1: Expand user directory first expanded_path = os.path.expanduser(clean_path) - - # Step 2: Use os.path.realpath for secure path resolution (prevents symlink attacks) - # This also resolves relative paths and removes .. components real_path = os.path.realpath(expanded_path) - # Step 3: Additional security check - verify it's an absolute path if not os.path.isabs(real_path): raise HTTPException(status_code=400, detail="Invalid path: must be absolute") - # Step 4: CRITICAL SECURITY CHECK - Verify path is within safe base directory - # We use canonical string prefix comparison for maximum robustness: - # - Both paths are already fully resolved via os.path.realpath - # - String prefix check works across all Python versions - # - Avoids edge cases with is_relative_to() and symlinked parents - # - Ensures path cannot escape the allowed directory (e.g., /etc/passwd) - # Add path separator to prevent partial matches (e.g., /home/user vs /home/user2) safe_base_str = str(safe_base_dir) + os.sep real_path_str = real_path + os.sep - if not real_path_str.startswith(safe_base_str): raise HTTPException( status_code=403, detail="Access denied: path is outside allowed directory", ) - # Step 5: Create Path object from the validated canonical path - # This breaks the taint chain for CodeQL static analysis validated_path = Path(real_path) - - # Step 6: Now that path is validated, perform filesystem operations if not validated_path.exists(): raise HTTPException(status_code=404, detail="Path not found: %s" % clean_path) - - # Step 7: Verify it's a directory (not a file) if not validated_path.is_dir(): raise HTTPException( status_code=400, detail="Path must be a directory: %s" % clean_path ) - resolved_paths.append(validated_path) except (ValueError, OSError) as e: logger.error("Invalid path '%s': %s", clean_path, e) raise HTTPException(status_code=400, detail="Invalid path: %s" % clean_path) - try: - stats = await asyncio.to_thread(_run_index_job, resolved_paths, config, resolved_db) - except Exception as exc: # pragma: no cover - defensive - LOGGER.exception("Indexing failed: %s", exc) - raise HTTPException(status_code=500, detail=str(exc)) from exc + return resolved_paths - return {"status": "ok", "db": str(resolved_db), "stats": stats} + +@app.post("/index") +async def index_documents(payload: IndexPayload) -> dict[str, Any]: + """Start an indexing job and return its ID immediately for progress polling.""" + if not payload.paths: + raise HTTPException(status_code=400, detail="No path provided") + + config_defaults = AppConfig() + config = AppConfig( + db_path=Path(payload.db) if payload.db is not None else config_defaults.db_path, + model_name=payload.model or config_defaults.model_name, + chunk_chars=payload.chunk_chars or config_defaults.chunk_chars, + overlap=payload.overlap or config_defaults.overlap, + ) + resolved_db = config.resolve_db_path(Path.cwd()) + _ensure_db_parent(resolved_db) + + resolved_paths = _validate_index_paths(payload) + + job_id = str(uuid.uuid4()) + job: dict[str, Any] = { + "id": job_id, + "status": "running", + "processed": 0, + "total": 0, + "current_file": "", + "stats": None, + "error": None, + } + _index_jobs[job_id] = job + + async def _run() -> None: + try: + result = await asyncio.to_thread( + _run_index_job, resolved_paths, config, resolved_db, job + ) + job["status"] = "complete" + job["stats"] = result + except Exception as exc: + LOGGER.exception("Indexing job %s failed: %s", job_id, exc) + job["status"] = "error" + job["error"] = str(exc) + + asyncio.create_task(_run()) + return {"status": "ok", "job_id": job_id} + + +@app.get("/index/status/{job_id}") +async def get_index_status(job_id: str) -> dict[str, Any]: + """Poll the status of a running or completed indexing job.""" + job = _index_jobs.get(job_id) + if job is None: + raise HTTPException(status_code=404, detail="Job not found") + return job diff --git a/src/docfinder/web/templates/index.html b/src/docfinder/web/templates/index.html index 34da2f6..3faf6b3 100644 --- a/src/docfinder/web/templates/index.html +++ b/src/docfinder/web/templates/index.html @@ -8,67 +8,86 @@ :root { --primary: #2563eb; --primary-hover: #1d4ed8; - --primary-light: rgba(37, 99, 235, 0.1); + --primary-light: rgba(37, 99, 235, 0.12); --danger: #dc2626; --danger-hover: #b91c1c; --danger-light: rgba(220, 38, 38, 0.1); --success: #16a34a; --success-light: rgba(22, 163, 74, 0.1); --warning: #d97706; - --bg: #f8fafc; + --bg: #f1f5f9; --bg-card: #ffffff; - --text: #1e293b; + --bg-input: #f8fafc; + --text: #0f172a; --text-muted: #64748b; - --border: rgba(148, 163, 184, 0.3); - --shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -2px rgba(0, 0, 0, 0.1); - --shadow-lg: 0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -4px rgba(0, 0, 0, 0.1); + --text-subtle: #94a3b8; + --border: rgba(148, 163, 184, 0.25); + --border-focus: rgba(37, 99, 235, 0.5); + --shadow-xs: 0 1px 2px rgba(0,0,0,0.05); + --shadow: 0 1px 3px rgba(0,0,0,0.07), 0 4px 12px rgba(0,0,0,0.04); + --shadow-lg: 0 8px 24px rgba(0,0,0,0.10), 0 2px 6px rgba(0,0,0,0.06); + --shadow-xl: 0 20px 48px rgba(0,0,0,0.14); --radius: 12px; --radius-sm: 8px; + --radius-xs: 6px; + --header-h: 60px; color-scheme: light dark; } - + @media (prefers-color-scheme: dark) { :root { - --bg: #0f172a; - --bg-card: #1e293b; - --text: #f1f5f9; - --text-muted: #94a3b8; - --border: rgba(148, 163, 184, 0.2); - --shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.3); - --shadow-lg: 0 10px 15px -3px rgba(0, 0, 0, 0.4); + --bg: #0c1120; + --bg-card: #151f32; + --bg-input: #1a2540; + --text: #e8edf5; + --text-muted: #8a9bbf; + --text-subtle: #536180; + --border: rgba(148, 163, 184, 0.12); + --border-focus: rgba(96, 165, 250, 0.5); + --shadow-xs: 0 1px 2px rgba(0,0,0,0.3); + --shadow: 0 1px 3px rgba(0,0,0,0.4), 0 4px 12px rgba(0,0,0,0.25); + --shadow-lg: 0 8px 24px rgba(0,0,0,0.45), 0 2px 6px rgba(0,0,0,0.3); + --shadow-xl: 0 20px 48px rgba(0,0,0,0.6); + --primary: #3b82f6; + --primary-hover: #2563eb; + --primary-light: rgba(59, 130, 246, 0.15); } } - * { - box-sizing: border-box; - } + * { box-sizing: border-box; margin: 0; padding: 0; } body { - margin: 0; - padding: 0; background: var(--bg); color: var(--text); - font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif; - line-height: 1.6; - } - - .container { - max-width: 1200px; - margin: 0 auto; - padding: 0 1.5rem; + font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", system-ui, sans-serif; + font-size: 15px; + line-height: 1.55; + -webkit-font-smoothing: antialiased; } + /* ── Header ─────────────────────────────────────────────────────────── */ header { - background: var(--bg-card); + height: var(--header-h); + background: rgba(255,255,255,0.75); + backdrop-filter: saturate(180%) blur(20px); + -webkit-backdrop-filter: saturate(180%) blur(20px); border-bottom: 1px solid var(--border); - padding: 1rem 0; position: sticky; top: 0; - z-index: 100; - box-shadow: var(--shadow); + z-index: 200; + } + + @media (prefers-color-scheme: dark) { + header { + background: rgba(21,31,50,0.8); + } } .header-content { + max-width: 1100px; + margin: 0 auto; + padding: 0 1.5rem; + height: 100%; display: flex; align-items: center; justify-content: space-between; @@ -78,79 +97,89 @@ .logo { display: flex; align-items: center; - gap: 0.75rem; + gap: 0.6rem; + text-decoration: none; + flex-shrink: 0; } .logo-icon { - width: 40px; - height: 40px; - background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); - border-radius: var(--radius-sm); + width: 34px; + height: 34px; + background: linear-gradient(135deg, #667eea, #764ba2); + border-radius: 9px; display: flex; align-items: center; justify-content: center; - padding: 4px; + box-shadow: 0 2px 8px rgba(118,75,162,0.35); } - h1 { - font-size: 1.5rem; + .logo h1 { + font-size: 1.15rem; font-weight: 700; - margin: 0; background: linear-gradient(135deg, var(--primary), #7c3aed); -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text; + letter-spacing: -0.02em; } + /* ── Tabs ─────────────────────────────────────────────────────────────── */ .tabs { display: flex; - gap: 0.5rem; + gap: 2px; background: var(--bg); - padding: 0.25rem; - border-radius: var(--radius); + border: 1px solid var(--border); + padding: 3px; + border-radius: calc(var(--radius-sm) + 3px); } .tab { - padding: 0.5rem 1rem; + padding: 0.4rem 1rem; border: none; background: transparent; color: var(--text-muted); cursor: pointer; border-radius: var(--radius-sm); + font-size: 0.875rem; font-weight: 500; - transition: all 0.2s; + transition: all 0.18s ease; + white-space: nowrap; + display: flex; + align-items: center; + gap: 0.35rem; } - .tab:hover { - color: var(--text); - background: var(--bg-card); - } + .tab:hover { color: var(--text); background: var(--bg-card); } .tab.active { background: var(--bg-card); color: var(--primary); - box-shadow: var(--shadow); + box-shadow: var(--shadow-xs); } + /* ── Main layout ──────────────────────────────────────────────────────── */ main { - padding: 2rem 0; + max-width: 1100px; + margin: 0 auto; + padding: 1.75rem 1.5rem 3rem; } - .section { - display: none; - } + .section { display: none; } + .section.active { display: block; animation: fadeUp 0.2s ease both; } - .section.active { - display: block; + @keyframes fadeUp { + from { opacity: 0; transform: translateY(6px); } + to { opacity: 1; transform: translateY(0); } } + /* ── Card ─────────────────────────────────────────────────────────────── */ .card { background: var(--bg-card); border-radius: var(--radius); - padding: 1.5rem; + padding: 1.4rem 1.5rem; box-shadow: var(--shadow); border: 1px solid var(--border); - margin-bottom: 1.5rem; + margin-bottom: 1.25rem; } .card-header { @@ -161,190 +190,220 @@ } .card-title { - font-size: 1.1rem; + font-size: 0.95rem; font-weight: 600; - margin: 0; + color: var(--text); display: flex; align-items: center; - gap: 0.5rem; + gap: 0.45rem; } - .card-title .icon { - font-size: 1.25rem; + /* ── Search ───────────────────────────────────────────────────────────── */ + .search-wrap { + position: relative; + display: flex; + align-items: center; } - .search-container { - position: relative; + .search-icon { + position: absolute; + left: 1rem; + color: var(--text-subtle); + font-size: 1.1rem; + pointer-events: none; + display: flex; } .search-input { width: 100%; - padding: 1rem 1.25rem 1rem 3rem; - border: 2px solid var(--border); + padding: 0.875rem 5.5rem 0.875rem 3rem; + border: 1.5px solid var(--border); border-radius: var(--radius); - font-size: 1.1rem; + font-size: 1.05rem; background: var(--bg-card); color: var(--text); - transition: all 0.2s; + transition: border-color 0.15s, box-shadow 0.15s; + outline: none; } + .search-input::placeholder { color: var(--text-subtle); } + .search-input:focus { - outline: none; border-color: var(--primary); box-shadow: 0 0 0 3px var(--primary-light); } - .search-icon { - position: absolute; - left: 1rem; - top: 50%; - transform: translateY(-50%); - font-size: 1.25rem; - color: var(--text-muted); - } - .search-btn { position: absolute; right: 0.5rem; - top: 50%; - transform: translateY(-50%); - } - - .form-row { - display: flex; - gap: 0.75rem; - flex-wrap: wrap; - } - - .form-input { - flex: 1; - min-width: 200px; - padding: 0.75rem 1rem; - border: 1px solid var(--border); - border-radius: var(--radius-sm); - font-size: 1rem; - background: var(--bg); - color: var(--text); - transition: all 0.2s; + padding: 0.5rem 1rem; + font-size: 0.875rem; } - .form-input:focus { - outline: none; - border-color: var(--primary); - box-shadow: 0 0 0 3px var(--primary-light); + /* Results count */ + .results-meta { + font-size: 0.85rem; + color: var(--text-muted); + margin-bottom: 1rem; + padding: 0 0.25rem; } + /* ── Buttons ──────────────────────────────────────────────────────────── */ .btn { display: inline-flex; align-items: center; - gap: 0.5rem; - padding: 0.75rem 1.25rem; + gap: 0.4rem; + padding: 0.6rem 1.1rem; border: none; border-radius: var(--radius-sm); - font-size: 0.95rem; + font-size: 0.875rem; font-weight: 600; cursor: pointer; - transition: all 0.2s; + transition: all 0.15s ease; white-space: nowrap; + text-decoration: none; } .btn-primary { - background: linear-gradient(135deg, var(--primary), var(--primary-hover)); - color: white; + background: linear-gradient(135deg, var(--primary) 0%, var(--primary-hover) 100%); + color: #fff; + box-shadow: 0 1px 3px rgba(37,99,235,0.3); } - .btn-primary:hover:not(.search-btn) { + .btn-primary:hover:not(:disabled) { + filter: brightness(1.08); + box-shadow: 0 3px 10px rgba(37,99,235,0.4); transform: translateY(-1px); - box-shadow: 0 4px 12px rgba(37, 99, 235, 0.4); - } - - .search-btn:hover { - box-shadow: 0 4px 12px rgba(37, 99, 235, 0.4); - filter: brightness(1.1); } .btn-secondary { background: transparent; color: var(--primary); - border: 1px solid var(--primary); + border: 1.5px solid var(--border); } - .btn-secondary:hover { - background: var(--primary-light); - } + .btn-secondary:hover:not(:disabled) { background: var(--primary-light); border-color: var(--primary); } - .btn-danger { - background: var(--danger); - color: white; - } - - .btn-danger:hover { - background: var(--danger-hover); - } + .btn-danger { background: var(--danger); color: #fff; } + .btn-danger:hover:not(:disabled) { background: var(--danger-hover); } .btn-ghost { background: transparent; color: var(--text-muted); - padding: 0.5rem; + padding: 0.4rem 0.6rem; } - .btn-ghost:hover { - color: var(--danger); - background: var(--danger-light); + .btn-ghost:hover:not(:disabled) { color: var(--danger); background: var(--danger-light); } + + .btn-sm { padding: 0.4rem 0.75rem; font-size: 0.8rem; } + + .btn:disabled { opacity: 0.5; cursor: not-allowed; transform: none !important; filter: none !important; } + + /* ── Forms ────────────────────────────────────────────────────────────── */ + .form-row { + display: flex; + gap: 0.75rem; + flex-wrap: wrap; } - .btn-sm { - padding: 0.5rem 0.75rem; - font-size: 0.85rem; + .form-input { + flex: 1; + min-width: 220px; + padding: 0.65rem 0.9rem; + border: 1.5px solid var(--border); + border-radius: var(--radius-sm); + font-size: 0.95rem; + background: var(--bg-input); + color: var(--text); + transition: border-color 0.15s, box-shadow 0.15s; + outline: none; } - .btn:disabled { - opacity: 0.6; - cursor: not-allowed; - transform: none !important; + .form-input::placeholder { color: var(--text-subtle); } + + .form-input:focus { + border-color: var(--primary); + box-shadow: 0 0 0 3px var(--primary-light); } - .status-badge { + .form-input.drag-over { + border-color: var(--primary); + background: var(--primary-light); + box-shadow: 0 0 0 3px var(--primary-light); + } + + /* ── Status badges ────────────────────────────────────────────────────── */ + .badge { display: inline-flex; align-items: center; - gap: 0.35rem; - padding: 0.35rem 0.75rem; + gap: 0.3rem; + padding: 0.3rem 0.7rem; border-radius: 9999px; + font-size: 0.8rem; + font-weight: 600; + } + + .badge-success { background: var(--success-light); color: var(--success); } + .badge-error { background: var(--danger-light); color: var(--danger); } + .badge-info { background: var(--primary-light); color: var(--primary); } + + /* ── Progress bar ─────────────────────────────────────────────────────── */ + .progress-wrap { + display: flex; + flex-direction: column; + gap: 0.6rem; + padding: 1rem 0 0.25rem; + } + + .progress-header { + display: flex; + justify-content: space-between; + align-items: center; font-size: 0.85rem; - font-weight: 500; } - .status-success { - background: var(--success-light); - color: var(--success); + .progress-label { font-weight: 500; } + .progress-count { color: var(--text-muted); } + + .progress-bar-bg { + height: 6px; + background: var(--border); + border-radius: 9999px; + overflow: hidden; } - .status-error { - background: var(--danger-light); - color: var(--danger); + .progress-bar-fill { + height: 100%; + background: linear-gradient(90deg, var(--primary), #7c3aed); + border-radius: 9999px; + transition: width 0.4s ease; + min-width: 4px; } - .status-info { - background: var(--primary-light); - color: var(--primary); + .progress-file { + font-size: 0.78rem; + color: var(--text-muted); + font-family: "SF Mono", "Monaco", "Roboto Mono", monospace; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; } + /* ── Results ──────────────────────────────────────────────────────────── */ .results-list { list-style: none; - padding: 0; - margin: 0; display: flex; flex-direction: column; - gap: 1rem; + gap: 0.85rem; } .result-card { background: var(--bg-card); border-radius: var(--radius); - padding: 1.25rem; + padding: 1.15rem 1.25rem; box-shadow: var(--shadow); border: 1px solid var(--border); - transition: all 0.2s; + transition: box-shadow 0.18s, transform 0.18s; } .result-card:hover { @@ -357,40 +416,39 @@ align-items: flex-start; justify-content: space-between; gap: 1rem; - margin-bottom: 0.75rem; + margin-bottom: 0.5rem; } .result-title { - font-size: 1.1rem; + font-size: 1rem; font-weight: 600; - margin: 0; color: var(--text); } .result-score { background: linear-gradient(135deg, var(--primary), #7c3aed); - color: white; - padding: 0.25rem 0.75rem; + color: #fff; + padding: 0.2rem 0.65rem; border-radius: 9999px; - font-size: 0.85rem; - font-weight: 600; - white-space: nowrap; + font-size: 0.78rem; + font-weight: 700; + flex-shrink: 0; } .result-path { - font-family: "SF Mono", "Monaco", "Inconsolata", "Roboto Mono", monospace; - font-size: 0.8rem; + font-family: "SF Mono", "Monaco", "Roboto Mono", monospace; + font-size: 0.75rem; color: var(--text-muted); - margin-bottom: 0.75rem; + margin-bottom: 0.6rem; word-break: break-all; } .result-snippet { - font-size: 0.95rem; + font-size: 0.9rem; color: var(--text); line-height: 1.6; background: var(--bg); - padding: 0.75rem 1rem; + padding: 0.65rem 0.9rem; border-radius: var(--radius-sm); border-left: 3px solid var(--primary); } @@ -398,58 +456,56 @@ .result-actions { display: flex; gap: 0.5rem; - margin-top: 1rem; + margin-top: 0.85rem; } + /* ── Documents table ──────────────────────────────────────────────────── */ .documents-table { width: 100%; border-collapse: collapse; + font-size: 0.875rem; } .documents-table th, .documents-table td { - padding: 0.75rem 1rem; + padding: 0.65rem 0.9rem; text-align: left; border-bottom: 1px solid var(--border); } .documents-table th { font-weight: 600; - color: var(--text-muted); - font-size: 0.85rem; + color: var(--text-subtle); + font-size: 0.75rem; text-transform: uppercase; - letter-spacing: 0.05em; - } - - .documents-table tr:hover td { + letter-spacing: 0.06em; background: var(--bg); } - .doc-title { - font-weight: 500; - color: var(--text); - } + .documents-table tr:last-child td { border-bottom: none; } + + .documents-table tbody tr:hover td { background: var(--bg); } + + .doc-title { font-weight: 500; } .doc-path { - font-family: monospace; - font-size: 0.8rem; + font-family: "SF Mono", "Monaco", monospace; + font-size: 0.75rem; color: var(--text-muted); - max-width: 300px; + max-width: 260px; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; } - .doc-meta { - font-size: 0.85rem; - color: var(--text-muted); - } + .doc-meta { color: var(--text-muted); } + /* ── Stats ────────────────────────────────────────────────────────────── */ .stats-grid { display: grid; - grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)); - gap: 1rem; - margin-bottom: 1.5rem; + grid-template-columns: repeat(auto-fit, minmax(130px, 1fr)); + gap: 0.85rem; + margin-bottom: 1.25rem; } .stat-card { @@ -457,301 +513,316 @@ border-radius: var(--radius-sm); padding: 1rem; text-align: center; + border: 1px solid var(--border); } .stat-value { - font-size: 1.75rem; + font-size: 1.6rem; font-weight: 700; color: var(--primary); + letter-spacing: -0.02em; } .stat-label { - font-size: 0.85rem; + font-size: 0.78rem; color: var(--text-muted); - margin-top: 0.25rem; + margin-top: 0.2rem; + text-transform: uppercase; + letter-spacing: 0.05em; } + /* ── Empty / loading states ───────────────────────────────────────────── */ .empty-state { text-align: center; - padding: 3rem; + padding: 3.5rem 1rem; color: var(--text-muted); } - .empty-state .icon { - font-size: 3rem; - margin-bottom: 1rem; - opacity: 0.5; - } + .empty-state .icon { font-size: 2.5rem; margin-bottom: 0.75rem; opacity: 0.45; } .loading { display: flex; align-items: center; justify-content: center; - gap: 0.75rem; - padding: 2rem; + gap: 0.65rem; + padding: 2.5rem; color: var(--text-muted); + font-size: 0.9rem; } .spinner { - width: 24px; - height: 24px; - border: 3px solid var(--border); + width: 20px; + height: 20px; + border: 2.5px solid var(--border); border-top-color: var(--primary); border-radius: 50%; - animation: spin 1s linear infinite; + animation: spin 0.8s linear infinite; + flex-shrink: 0; } - @keyframes spin { - to { transform: rotate(360deg); } - } + @keyframes spin { to { transform: rotate(360deg); } } + /* ── Toast ────────────────────────────────────────────────────────────── */ .toast { position: fixed; - bottom: 2rem; - right: 2rem; - padding: 1rem 1.5rem; + bottom: 1.75rem; + right: 1.75rem; + padding: 0.8rem 1.25rem; border-radius: var(--radius); - box-shadow: var(--shadow-lg); - z-index: 1000; - animation: slideIn 0.3s ease-out; + box-shadow: var(--shadow-xl); + z-index: 9999; + font-size: 0.875rem; + font-weight: 500; + animation: toastIn 0.25s cubic-bezier(0.34,1.56,0.64,1) both; + max-width: 320px; } - .toast.success { - background: var(--success); - color: white; - } + .toast.success { background: #166534; color: #dcfce7; } + .toast.error { background: #7f1d1d; color: #fee2e2; } + .toast.info { background: #1e3a5f; color: #dbeafe; } - .toast.error { - background: var(--danger); - color: white; + @media (prefers-color-scheme: dark) { + .toast.success { background: #14532d; } + .toast.error { background: #7f1d1d; } } - @keyframes slideIn { - from { - transform: translateX(100%); - opacity: 0; - } - to { - transform: translateX(0); - opacity: 1; - } + @keyframes toastIn { + from { opacity: 0; transform: translateY(12px) scale(0.95); } + to { opacity: 1; transform: translateY(0) scale(1); } } + /* ── Modal ────────────────────────────────────────────────────────────── */ .modal-overlay { position: fixed; inset: 0; - background: rgba(0, 0, 0, 0.5); + background: rgba(0,0,0,0.45); + backdrop-filter: blur(4px); display: flex; align-items: center; justify-content: center; z-index: 1000; opacity: 0; visibility: hidden; - transition: all 0.2s; + transition: opacity 0.2s, visibility 0.2s; } - .modal-overlay.active { - opacity: 1; - visibility: visible; - } + .modal-overlay.active { opacity: 1; visibility: visible; } .modal { background: var(--bg-card); border-radius: var(--radius); padding: 1.5rem; - max-width: 400px; - width: 90%; - box-shadow: var(--shadow-lg); - transform: scale(0.9); - transition: transform 0.2s; + max-width: 380px; + width: 92%; + box-shadow: var(--shadow-xl); + border: 1px solid var(--border); + transform: scale(0.92); + transition: transform 0.2s cubic-bezier(0.34,1.56,0.64,1); } - .modal-overlay.active .modal { - transform: scale(1); - } + .modal-overlay.active .modal { transform: scale(1); } - .modal-title { - font-size: 1.1rem; - font-weight: 600; - margin: 0 0 0.75rem; - } + .modal-title { font-size: 1.05rem; font-weight: 600; margin-bottom: 0.6rem; } + .modal-text { color: var(--text-muted); font-size: 0.9rem; margin-bottom: 1.25rem; } - .modal-text { - color: var(--text-muted); - margin-bottom: 1.5rem; - } + .modal-actions { display: flex; gap: 0.6rem; justify-content: flex-end; } - .modal-actions { + /* ── Bulk actions ─────────────────────────────────────────────────────── */ + .select-all-bar { display: flex; - gap: 0.75rem; - justify-content: flex-end; + align-items: center; + justify-content: space-between; + padding: 0.6rem 0.75rem; + background: var(--bg); + border-radius: var(--radius-sm); + margin-bottom: 0.85rem; + border: 1px solid var(--border); } .checkbox-row { display: flex; align-items: center; + gap: 0.4rem; + font-size: 0.875rem; + cursor: pointer; + } + + /* ── Tips list ────────────────────────────────────────────────────────── */ + .tips-list { + list-style: none; + display: flex; + flex-direction: column; gap: 0.5rem; - padding: 0.5rem 0; + color: var(--text-muted); + font-size: 0.875rem; } - .select-all-container { + .tips-list li { display: flex; - align-items: center; - justify-content: space-between; - padding: 0.75rem 1rem; - background: var(--bg); - border-radius: var(--radius-sm); - margin-bottom: 1rem; + gap: 0.5rem; } - @media (max-width: 768px) { - .header-content { - flex-direction: column; - align-items: stretch; - } + .tips-list li::before { content: "→"; color: var(--primary); flex-shrink: 0; } - .tabs { - justify-content: center; - } + /* ── Stats row ────────────────────────────────────────────────────────── */ + .index-stats-row { + display: flex; + gap: 1.5rem; + flex-wrap: wrap; + margin-top: 0.85rem; + } - .documents-table { - display: block; - overflow-x: auto; - } + .stat-item { + display: flex; + align-items: center; + gap: 0.35rem; + font-size: 0.875rem; + color: var(--text-muted); + } - .form-row { - flex-direction: column; - } + .stat-item strong { color: var(--text); } - .result-header { - flex-direction: column; - gap: 0.5rem; - } + /* ── Responsive ───────────────────────────────────────────────────────── */ + @media (max-width: 640px) { + .header-content { flex-wrap: wrap; height: auto; padding: 0.6rem 1rem; gap: 0.5rem; } + header { height: auto; } + .tabs { width: 100%; } + .tab { flex: 1; justify-content: center; } + .form-row { flex-direction: column; } + .result-header { flex-direction: column; gap: 0.4rem; } + .documents-table { display: block; overflow-x: auto; } }
-
+
-
- - - -
+
-
- -
+
+ +
-
- 🔍 - - + +
+ + + + + +
-
-
    -
    + +
      - -
      + +
      -

      📁 Index Documents

      +

      + + Index Documents +

      -
      -
      -

      💡 Tips

      -
      -
        -
      • Use absolute paths (e.g. /Users/name/Documents)
      • -
      • You can index entire folders or individual PDF files
      • -
      • Already indexed documents are automatically updated if modified
      • -
      • Unchanged files are skipped to speed up the process
      • +

        + + Tips +

        +
          +
        • Use absolute paths — e.g. /Users/name/Documents
        • +
        • Index entire folders: all PDFs are discovered recursively
        • +
        • Drag & drop a folder from Finder onto the input field
        • +
        • Already-indexed documents are automatically updated if modified
        • +
        • Unchanged files are skipped — re-indexing is fast
      - -
      + +
      -

      📚 Indexed Documents

      -
      +

      + + Indexed Library +

      +
      - +
      -
      - -
      + + +