itsmostafa · itsmostafa · May 3, 2026 · May 2, 2026 · May 2, 2026 · May 2, 2026
diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json
@@ -8,7 +8,7 @@
       "name": "qi",
       "source": "./",
       "description": "Local knowledge search CLI — index documents and search them using BM25 full-text search, vector embeddings, and LLM-powered Q&A, all running locally with no external dependencies.",
-      "version": "0.3.0",
+      "version": "0.4.0",
       "author": {
         "name": "itsmostafa"
       },

diff --git a/README.md b/README.md
@@ -8,7 +8,9 @@
   <img src="assets/img/qi-logo.png" alt="qi logo" width="200" />
 </p>
 
-A local-first knowledge search CLI for macOS and Linux. Index and search anything — codebases, documentation, research papers, notes, wikis, datasets, logs, contracts, books — using BM25 full-text search, vector embeddings, and LLM-powered Q&A. Choose your own models via Ollama, LM Studio, llama.cpp, MLX or using OpenAI's cloud models.
+qi is an ultra-fast knowledge search CLI for your files on your local machine. No dependencies, no runtime, just a single executable that indexes code, docs, notes, papers, logs, and other text into SQLite, then gives you BM25 search, optional vector search, and grounded LLM Q&A with citations. Use it offline with Ollama, LM Studio, llama.cpp, or MLX, or connect OpenAI for cloud models.
+
+Save tokens by delegating some of your AI Agent's work to qi.
 
 ## Features
 
@@ -105,7 +107,6 @@ qi doctor
 | `qi delete <collection>` | Delete a named collection and all its indexed data |
 | `qi stats` | Show index statistics |
 | `qi doctor` | Health check |
-| `qi version` | Print version |
 
 ## Search Modes
 

diff --git a/internal/db/migrations/003_cascade_chunk_refs.sql b/internal/db/migrations/003_cascade_chunk_refs.sql
@@ -0,0 +1,38 @@
+-- Add ON DELETE CASCADE to chunk_vectors and embeddings so reindexing a
+-- changed document (which deletes its chunks) does not fail or orphan rows.
+-- SQLite requires a table rebuild to change foreign key actions.
+
+PRAGMA foreign_keys=OFF;
+
+DROP TABLE IF EXISTS chunk_vectors_new;
+CREATE TABLE chunk_vectors_new (
+    chunk_id    INTEGER PRIMARY KEY REFERENCES chunks(id) ON DELETE CASCADE,
+    vector      BLOB NOT NULL
+);
+INSERT INTO chunk_vectors_new(chunk_id, vector)
+    SELECT chunk_id, vector FROM chunk_vectors;
+DROP TABLE chunk_vectors;
+ALTER TABLE chunk_vectors_new RENAME TO chunk_vectors;
+
+DROP TABLE IF EXISTS embeddings_new;
+CREATE TABLE embeddings_new (
+    chunk_id    INTEGER PRIMARY KEY REFERENCES chunks(id) ON DELETE CASCADE,
+    provider    TEXT NOT NULL,
+    model       TEXT NOT NULL,
+    dimension   INTEGER NOT NULL DEFAULT 0,
+    embedded_at TEXT NOT NULL DEFAULT (datetime('now'))
+);
+-- Derive dimension from vector length (float32 = 4 bytes each) so that both
+-- old schemas (no dimension column) and new ones are handled correctly.
+INSERT INTO embeddings_new(chunk_id, provider, model, dimension, embedded_at)
+    SELECT e.chunk_id, e.provider, e.model,
+           COALESCE(length(cv.vector)/4, 0),
+           e.embedded_at
+    FROM embeddings e
+    LEFT JOIN chunk_vectors cv ON cv.chunk_id = e.chunk_id;
+DROP TABLE embeddings;
+ALTER TABLE embeddings_new RENAME TO embeddings;
+
+PRAGMA foreign_keys=ON;
+
+INSERT OR IGNORE INTO schema_version(version) VALUES (3);
diff --git a/internal/indexer/indexer.go b/internal/indexer/indexer.go
@@ -5,6 +5,7 @@ import (
 	"crypto/sha256"
 	"database/sql"
 	"encoding/hex"
+	"errors"
 	"fmt"
 	"io/fs"
 	"log/slog"
@@ -95,7 +96,8 @@ func (idx *Indexer) Index(ctx context.Context, col config.Collection) (Stats, er
 			return err
 		}
 		if d.IsDir() {
-			if defaultIgnoreDirs[d.Name()] || ignoreSet[d.Name()] {
+			name := d.Name()
+			if defaultIgnoreDirs[name] || ignoreSet[name] || (strings.HasPrefix(name, ".") && name != ".") {
 				return filepath.SkipDir
 			}
 			return nil
@@ -145,18 +147,33 @@ func (idx *Indexer) indexFile(ctx context.Context, col config.Collection, relPat
 
 	hash := sha256sum(data)
 
-	// Check if document exists and unchanged
+	// Check if document exists (active or deactivated) and whether content changed
 	var existingHash string
 	var docID int64
+	var existingActive int
 	row := idx.db.QueryRowContext(ctx,
-		`SELECT id, content_hash FROM documents WHERE collection=? AND path=? AND active=1`,
+		`SELECT id, content_hash, active FROM documents WHERE collection=? AND path=?`,
 		col.Name, relPath)
-	_ = row.Scan(&docID, &existingHash)
+	if err := row.Scan(&docID, &existingHash, &existingActive); err != nil && !errors.Is(err, sql.ErrNoRows) {
+		return fmt.Errorf("looking up existing document: %w", err)
+	}
 
-	if existingHash == hash {
+	if existingActive == 1 && existingHash == hash {
 		return nil // unchanged
 	}
 
+	// Fast-path: previously deactivated document restored with byte-identical content.
+	// Reactivate the row without touching chunks or embeddings — deleting chunks would
+	// cascade into chunk_vectors/embeddings (migration 003) and force pointless re-embedding.
+	if docID != 0 && existingActive == 0 && existingHash == hash {
+		if _, err := idx.db.ExecContext(ctx,
+			`UPDATE documents SET active=1, updated_at=datetime('now') WHERE id=?`, docID); err != nil {
+			return fmt.Errorf("reactivating document: %w", err)
+		}
+		stats.FilesAdded++
+		return nil
+	}
+
 	// Upsert content
 	if _, err := idx.db.ExecContext(ctx,
 		`INSERT OR IGNORE INTO content(hash, body) VALUES (?, ?)`,
@@ -198,9 +215,9 @@ func (idx *Indexer) indexFile(ctx context.Context, col config.Collection, relPat
 		newDocID, _ = res.LastInsertId()
 		stats.FilesAdded++
 	} else {
-		// Update
+		// Update (or reactivate a previously deactivated document)
 		_, err = tx.ExecContext(ctx,
-			`UPDATE documents SET title=?, content_hash=?, updated_at=datetime('now') WHERE id=?`,
+			`UPDATE documents SET title=?, content_hash=?, active=1, updated_at=datetime('now') WHERE id=?`,
 			title, hash, docID)
 		if err != nil {
 			return fmt.Errorf("updating document: %w", err)
@@ -210,7 +227,11 @@ func (idx *Indexer) indexFile(ctx context.Context, col config.Collection, relPat
 		if _, err := tx.ExecContext(ctx, `DELETE FROM chunks WHERE doc_id=?`, docID); err != nil {
 			return fmt.Errorf("deleting old chunks: %w", err)
 		}
-		stats.FilesUpdated++
+		if existingActive == 0 {
+			stats.FilesAdded++
+		} else {
+			stats.FilesUpdated++
+		}
 	}
 
 	// Insert chunks

diff --git a/internal/indexer/indexer_test.go b/internal/indexer/indexer_test.go
@@ -103,6 +103,239 @@ func TestIndexer_IncrementalUpdate(t *testing.T) {
 	}
 }
 
+func TestIndexer_ReindexWithEmbeddings(t *testing.T) {
+	database := openTestDB(t)
+	idx := New(database, 256)
+	dir := t.TempDir()
+	path := filepath.Join(dir, "doc.md")
+	col := config.Collection{Name: "test", Path: dir, Extensions: []string{".md"}}
+
+	if err := os.WriteFile(path, []byte("# Original\nOriginal content."), 0o640); err != nil {
+		t.Fatal(err)
+	}
+	if _, err := idx.Index(context.Background(), col); err != nil {
+		t.Fatal(err)
+	}
+
+	// Find a chunk for this document and insert a fake embedding.
+	var chunkID int64
+	row := database.QueryRowContext(context.Background(),
+		`SELECT c.id FROM chunks c JOIN documents d ON d.id=c.doc_id
+		 WHERE d.collection='test' AND d.path='doc.md' LIMIT 1`)
+	if err := row.Scan(&chunkID); err != nil {
+		t.Fatalf("finding chunk: %v", err)
+	}
+	if err := database.InsertEmbedding(context.Background(), chunkID, []float32{0.1, 0.2, 0.3, 0.4}); err != nil {
+		t.Fatalf("inserting chunk_vector: %v", err)
+	}
+	if _, err := database.ExecContext(context.Background(),
+		`INSERT INTO embeddings(chunk_id, provider, model, dimension) VALUES (?, 'test', 'test-model', 4)`,
+		chunkID); err != nil {
+		t.Fatalf("inserting embeddings row: %v", err)
+	}
+
+	// Modify the file so its hash changes.
+	if err := os.WriteFile(path, []byte("# Updated\nUpdated content."), 0o640); err != nil {
+		t.Fatal(err)
+	}
+
+	// Reindex must succeed and report the file as updated.
+	stats, err := idx.Index(context.Background(), col)
+	if err != nil {
+		t.Fatalf("reindex failed: %v", err)
+	}
+	if stats.FilesUpdated != 1 {
+		t.Errorf("expected 1 updated, got %d", stats.FilesUpdated)
+	}
+
+	// No orphaned rows should remain in chunk_vectors or embeddings.
+	var orphanVectors int
+	if err := database.QueryRowContext(context.Background(),
+		`SELECT COUNT(*) FROM chunk_vectors WHERE chunk_id NOT IN (SELECT id FROM chunks)`).Scan(&orphanVectors); err != nil {
+		t.Fatalf("querying orphan chunk_vectors: %v", err)
+	}
+	if orphanVectors != 0 {
+		t.Errorf("expected 0 orphan chunk_vectors rows, got %d", orphanVectors)
+	}
+
+	var orphanEmbeddings int
+	if err := database.QueryRowContext(context.Background(),
+		`SELECT COUNT(*) FROM embeddings WHERE chunk_id NOT IN (SELECT id FROM chunks)`).Scan(&orphanEmbeddings); err != nil {
+		t.Fatalf("querying orphan embeddings: %v", err)
+	}
+	if orphanEmbeddings != 0 {
+		t.Errorf("expected 0 orphan embeddings rows, got %d", orphanEmbeddings)
+	}
+}
+
+func TestIndexer_ReindexAfterDeletion(t *testing.T) {
+	database := openTestDB(t)
+	idx := New(database, 256)
+	dir := t.TempDir()
+	path := filepath.Join(dir, "doc.md")
+	col := config.Collection{Name: "test", Path: dir, Extensions: []string{".md"}}
+
+	// Index the file
+	if err := os.WriteFile(path, []byte("# Original\nOriginal content."), 0o640); err != nil {
+		t.Fatal(err)
+	}
+	if _, err := idx.Index(context.Background(), col); err != nil {
+		t.Fatal(err)
+	}
+
+	// Delete the file → deactivates the document
+	if err := os.Remove(path); err != nil {
+		t.Fatal(err)
+	}
+	stats, err := idx.Index(context.Background(), col)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if stats.FilesRemoved != 1 {
+		t.Fatalf("expected 1 removed, got %d", stats.FilesRemoved)
+	}
+
+	// Restore the file with new content
+	if err := os.WriteFile(path, []byte("# Restored\nRestored content."), 0o640); err != nil {
+		t.Fatal(err)
+	}
+	stats, err = idx.Index(context.Background(), col)
+	if err != nil {
+		t.Fatalf("index after restore: %v", err)
+	}
+	if stats.FilesAdded != 1 {
+		t.Errorf("expected 1 added after restore, got %d", stats.FilesAdded)
+	}
+
+	// Verify the document is active with new hash and has chunks
+	var active int
+	var hash string
+	row := database.QueryRowContext(context.Background(),
+		`SELECT active, content_hash FROM documents WHERE collection='test' AND path='doc.md'`)
+	if err := row.Scan(&active, &hash); err != nil {
+		t.Fatalf("querying restored document: %v", err)
+	}
+	if active != 1 {
+		t.Errorf("expected active=1, got %d", active)
+	}
+
+	var chunkCount int
+	cRow := database.QueryRowContext(context.Background(),
+		`SELECT COUNT(*) FROM chunks c JOIN documents d ON d.id=c.doc_id WHERE d.collection='test' AND d.path='doc.md' AND d.active=1`)
+	if err := cRow.Scan(&chunkCount); err != nil {
+		t.Fatalf("querying chunks: %v", err)
+	}
+	if chunkCount == 0 {
+		t.Error("expected at least one chunk after restore")
+	}
+}
+
+func TestIndexer_ReactivateSameContent(t *testing.T) {
+	database := openTestDB(t)
+	idx := New(database, 256)
+	dir := t.TempDir()
+	path := filepath.Join(dir, "doc.md")
+	col := config.Collection{Name: "test", Path: dir, Extensions: []string{".md"}}
+
+	body := []byte("# Original\nOriginal content.")
+	if err := os.WriteFile(path, body, 0o640); err != nil {
+		t.Fatal(err)
+	}
+	if _, err := idx.Index(context.Background(), col); err != nil {
+		t.Fatal(err)
+	}
+
+	// Capture chunk IDs and seed an embedding to detect spurious deletion.
+	rows, err := database.QueryContext(context.Background(),
+		`SELECT c.id FROM chunks c JOIN documents d ON d.id=c.doc_id
+		 WHERE d.collection='test' AND d.path='doc.md' ORDER BY c.id`)
+	if err != nil {
+		t.Fatalf("listing chunks: %v", err)
+	}
+	var originalChunkIDs []int64
+	for rows.Next() {
+		var id int64
+		if err := rows.Scan(&id); err != nil {
+			t.Fatal(err)
+		}
+		originalChunkIDs = append(originalChunkIDs, id)
+	}
+	rows.Close()
+	if len(originalChunkIDs) == 0 {
+		t.Fatal("expected at least one chunk after initial index")
+	}
+
+	seedID := originalChunkIDs[0]
+	if err := database.InsertEmbedding(context.Background(), seedID, []float32{0.1, 0.2, 0.3, 0.4}); err != nil {
+		t.Fatalf("inserting chunk_vector: %v", err)
+	}
+	if _, err := database.ExecContext(context.Background(),
+		`INSERT INTO embeddings(chunk_id, provider, model, dimension) VALUES (?, 'test', 'test-model', 4)`,
+		seedID); err != nil {
+		t.Fatalf("inserting embeddings row: %v", err)
+	}
+
+	// Delete the file → deactivates the document.
+	if err := os.Remove(path); err != nil {
+		t.Fatal(err)
+	}
+	if _, err := idx.Index(context.Background(), col); err != nil {
+		t.Fatal(err)
+	}
+
+	// Restore byte-identical content.
+	if err := os.WriteFile(path, body, 0o640); err != nil {
+		t.Fatal(err)
+	}
+	stats, err := idx.Index(context.Background(), col)
+	if err != nil {
+		t.Fatalf("index after restore: %v", err)
+	}
+	if stats.FilesAdded != 1 {
+		t.Errorf("expected 1 added, got %d", stats.FilesAdded)
+	}
+
+	// Document must be active again.
+	var active int
+	if err := database.QueryRowContext(context.Background(),
+		`SELECT active FROM documents WHERE collection='test' AND path='doc.md'`).
+		Scan(&active); err != nil {
+		t.Fatalf("querying restored document: %v", err)
+	}
+	if active != 1 {
+		t.Errorf("expected active=1, got %d", active)
+	}
+
+	// Chunk ID must be preserved — proves DELETE FROM chunks did not run.
+	var preservedCount int
+	if err := database.QueryRowContext(context.Background(),
+		`SELECT COUNT(*) FROM chunks WHERE id = ?`, seedID).Scan(&preservedCount); err != nil {
+		t.Fatalf("querying preserved chunk: %v", err)
+	}
+	if preservedCount != 1 {
+		t.Fatalf("expected seed chunk %d to survive restore, got count %d", seedID, preservedCount)
+	}
+
+	// Embedding and vector for the seed chunk must still exist.
+	var embCount int
+	if err := database.QueryRowContext(context.Background(),
+		`SELECT COUNT(*) FROM embeddings WHERE chunk_id = ?`, seedID).Scan(&embCount); err != nil {
+		t.Fatalf("querying embedding: %v", err)
+	}
+	if embCount != 1 {
+		t.Errorf("expected embedding for chunk %d to survive restore, got %d", seedID, embCount)
+	}
+
+	var vecCount int
+	if err := database.QueryRowContext(context.Background(),
+		`SELECT COUNT(*) FROM chunk_vectors WHERE chunk_id = ?`, seedID).Scan(&vecCount); err != nil {
+		t.Fatalf("querying chunk_vector: %v", err)
+	}
+	if vecCount != 1 {
+		t.Errorf("expected chunk_vector for chunk %d to survive restore, got %d", seedID, vecCount)
+	}
+}
+
 func TestIndexer_DeactivatesMissingFiles(t *testing.T) {
 	database := openTestDB(t)
 	idx := New(database, 256)