From aa4b02d535d782c2664a17942d02ef7f29455d29 Mon Sep 17 00:00:00 2001
From: itsmostafa
Date: Fri, 1 May 2026 23:38:23 -0600
Subject: [PATCH 01/16] fix(indexer): reactivate deactivated documents when
file is restored
Previously, indexFile only searched for active=1 documents. When a
deactivated row existed for the same (collection, path), the subsequent
INSERT violated the UNIQUE(collection, path) constraint and the file
silently stayed unindexed.
Fix: drop the active=1 filter from the existence lookup and include the
active column. A deactivated row is now reactivated via the existing
update branch (which already regenerates chunks), and its stats are
counted as FilesAdded since the file was not searchable before.
---
internal/indexer/indexer.go | 19 ++++++++++++-------
1 file changed, 12 insertions(+), 7 deletions(-)
diff --git a/internal/indexer/indexer.go b/internal/indexer/indexer.go
index 3e43b75..98085e7 100644
--- a/internal/indexer/indexer.go
+++ b/internal/indexer/indexer.go
@@ -145,15 +145,16 @@ func (idx *Indexer) indexFile(ctx context.Context, col config.Collection, relPat
hash := sha256sum(data)
- // Check if document exists and unchanged
+ // Check if document exists (active or deactivated) and whether content changed
var existingHash string
var docID int64
+ var existingActive int
row := idx.db.QueryRowContext(ctx,
- `SELECT id, content_hash FROM documents WHERE collection=? AND path=? AND active=1`,
+ `SELECT id, content_hash, active FROM documents WHERE collection=? AND path=?`,
col.Name, relPath)
- _ = row.Scan(&docID, &existingHash)
+ _ = row.Scan(&docID, &existingHash, &existingActive)
- if existingHash == hash {
+ if existingActive == 1 && existingHash == hash {
return nil // unchanged
}
@@ -198,9 +199,9 @@ func (idx *Indexer) indexFile(ctx context.Context, col config.Collection, relPat
newDocID, _ = res.LastInsertId()
stats.FilesAdded++
} else {
- // Update
+ // Update (or reactivate a previously deactivated document)
_, err = tx.ExecContext(ctx,
- `UPDATE documents SET title=?, content_hash=?, updated_at=datetime('now') WHERE id=?`,
+ `UPDATE documents SET title=?, content_hash=?, active=1, updated_at=datetime('now') WHERE id=?`,
title, hash, docID)
if err != nil {
return fmt.Errorf("updating document: %w", err)
@@ -210,7 +211,11 @@ func (idx *Indexer) indexFile(ctx context.Context, col config.Collection, relPat
if _, err := tx.ExecContext(ctx, `DELETE FROM chunks WHERE doc_id=?`, docID); err != nil {
return fmt.Errorf("deleting old chunks: %w", err)
}
- stats.FilesUpdated++
+ if existingActive == 0 {
+ stats.FilesAdded++
+ } else {
+ stats.FilesUpdated++
+ }
}
// Insert chunks
From 964166d38b1d486f5af146048040cab19ec9d4b5 Mon Sep 17 00:00:00 2001
From: itsmostafa
Date: Fri, 1 May 2026 23:38:26 -0600
Subject: [PATCH 02/16] test(indexer): add regression test for re-indexing a
restored file
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Covers the full delete-then-restore cycle: index → remove from disk →
re-index (FilesRemoved=1) → restore with new content → re-index
(FilesAdded=1, active=1, chunks populated).
---
internal/indexer/indexer_test.go | 62 ++++++++++++++++++++++++++++++++
1 file changed, 62 insertions(+)
diff --git a/internal/indexer/indexer_test.go b/internal/indexer/indexer_test.go
index 384fc22..0cab8c2 100644
--- a/internal/indexer/indexer_test.go
+++ b/internal/indexer/indexer_test.go
@@ -103,6 +103,68 @@ func TestIndexer_IncrementalUpdate(t *testing.T) {
}
}
+func TestIndexer_ReindexAfterDeletion(t *testing.T) {
+ database := openTestDB(t)
+ idx := New(database, 256)
+ dir := t.TempDir()
+ path := filepath.Join(dir, "doc.md")
+ col := config.Collection{Name: "test", Path: dir, Extensions: []string{".md"}}
+
+ // Index the file
+ if err := os.WriteFile(path, []byte("# Original\nOriginal content."), 0o640); err != nil {
+ t.Fatal(err)
+ }
+ if _, err := idx.Index(context.Background(), col); err != nil {
+ t.Fatal(err)
+ }
+
+ // Delete the file → deactivates the document
+ if err := os.Remove(path); err != nil {
+ t.Fatal(err)
+ }
+ stats, err := idx.Index(context.Background(), col)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if stats.FilesRemoved != 1 {
+ t.Fatalf("expected 1 removed, got %d", stats.FilesRemoved)
+ }
+
+ // Restore the file with new content
+ if err := os.WriteFile(path, []byte("# Restored\nRestored content."), 0o640); err != nil {
+ t.Fatal(err)
+ }
+ stats, err = idx.Index(context.Background(), col)
+ if err != nil {
+ t.Fatalf("index after restore: %v", err)
+ }
+ if stats.FilesAdded != 1 {
+ t.Errorf("expected 1 added after restore, got %d", stats.FilesAdded)
+ }
+
+ // Verify the document is active with new hash and has chunks
+ var active int
+ var hash string
+ row := database.QueryRowContext(context.Background(),
+ `SELECT active, content_hash FROM documents WHERE collection='test' AND path='doc.md'`)
+ if err := row.Scan(&active, &hash); err != nil {
+ t.Fatalf("querying restored document: %v", err)
+ }
+ if active != 1 {
+ t.Errorf("expected active=1, got %d", active)
+ }
+
+ var chunkCount int
+ cRow := database.QueryRowContext(context.Background(),
+ `SELECT COUNT(*) FROM chunks c JOIN documents d ON d.id=c.doc_id WHERE d.collection='test' AND d.path='doc.md' AND d.active=1`)
+ if err := cRow.Scan(&chunkCount); err != nil {
+ t.Fatalf("querying chunks: %v", err)
+ }
+ if chunkCount == 0 {
+ t.Error("expected at least one chunk after restore")
+ }
+}
+
func TestIndexer_DeactivatesMissingFiles(t *testing.T) {
database := openTestDB(t)
idx := New(database, 256)
From 07a05e87e9940544e357dffea23efe8a35a7e616 Mon Sep 17 00:00:00 2001
From: itsmostafa
Date: Fri, 1 May 2026 23:40:35 -0600
Subject: [PATCH 03/16] fix(db): add ON DELETE CASCADE to chunk_vectors and
embeddings
chunk_vectors and embeddings referenced chunks(id) with NO ACTION,
causing FK violations (and a silent rollback) whenever a changed document
was reindexed while embeddings existed. SQLite does not support ALTER TABLE
to change FK actions, so this migration rebuilds both tables with the
correct ON DELETE CASCADE constraint.
---
.../db/migrations/003_cascade_chunk_refs.sql | 30 +++++++++++++++++++
1 file changed, 30 insertions(+)
create mode 100644 internal/db/migrations/003_cascade_chunk_refs.sql
diff --git a/internal/db/migrations/003_cascade_chunk_refs.sql b/internal/db/migrations/003_cascade_chunk_refs.sql
new file mode 100644
index 0000000..1741959
--- /dev/null
+++ b/internal/db/migrations/003_cascade_chunk_refs.sql
@@ -0,0 +1,30 @@
+-- Add ON DELETE CASCADE to chunk_vectors and embeddings so reindexing a
+-- changed document (which deletes its chunks) does not fail or orphan rows.
+-- SQLite requires a table rebuild to change foreign key actions.
+
+PRAGMA foreign_keys=OFF;
+
+CREATE TABLE chunk_vectors_new (
+ chunk_id INTEGER PRIMARY KEY REFERENCES chunks(id) ON DELETE CASCADE,
+ vector BLOB NOT NULL
+);
+INSERT INTO chunk_vectors_new(chunk_id, vector)
+ SELECT chunk_id, vector FROM chunk_vectors;
+DROP TABLE chunk_vectors;
+ALTER TABLE chunk_vectors_new RENAME TO chunk_vectors;
+
+CREATE TABLE embeddings_new (
+ chunk_id INTEGER PRIMARY KEY REFERENCES chunks(id) ON DELETE CASCADE,
+ provider TEXT NOT NULL,
+ model TEXT NOT NULL,
+ dimension INTEGER NOT NULL,
+ embedded_at TEXT NOT NULL DEFAULT (datetime('now'))
+);
+INSERT INTO embeddings_new(chunk_id, provider, model, dimension, embedded_at)
+ SELECT chunk_id, provider, model, dimension, embedded_at FROM embeddings;
+DROP TABLE embeddings;
+ALTER TABLE embeddings_new RENAME TO embeddings;
+
+PRAGMA foreign_keys=ON;
+
+INSERT OR IGNORE INTO schema_version(version) VALUES (3);
From 3b6a63dd079a4e75588b3a839ae26d01679f8f66 Mon Sep 17 00:00:00 2001
From: itsmostafa
Date: Fri, 1 May 2026 23:40:40 -0600
Subject: [PATCH 04/16] test(indexer): regression test for reindex with
embeddings present
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
TestIndexer_ReindexWithEmbeddings indexes a file, inserts chunk_vectors
and embeddings rows for a real chunk, modifies the file, reindexes, and
asserts FilesUpdated==1 with zero orphan rows — the failure mode this
change prevents.
---
internal/indexer/indexer_test.go | 65 ++++++++++++++++++++++++++++++++
1 file changed, 65 insertions(+)
diff --git a/internal/indexer/indexer_test.go b/internal/indexer/indexer_test.go
index 0cab8c2..8879687 100644
--- a/internal/indexer/indexer_test.go
+++ b/internal/indexer/indexer_test.go
@@ -103,6 +103,71 @@ func TestIndexer_IncrementalUpdate(t *testing.T) {
}
}
+func TestIndexer_ReindexWithEmbeddings(t *testing.T) {
+ database := openTestDB(t)
+ idx := New(database, 256)
+ dir := t.TempDir()
+ path := filepath.Join(dir, "doc.md")
+ col := config.Collection{Name: "test", Path: dir, Extensions: []string{".md"}}
+
+ if err := os.WriteFile(path, []byte("# Original\nOriginal content."), 0o640); err != nil {
+ t.Fatal(err)
+ }
+ if _, err := idx.Index(context.Background(), col); err != nil {
+ t.Fatal(err)
+ }
+
+ // Find a chunk for this document and insert a fake embedding.
+ var chunkID int64
+ row := database.QueryRowContext(context.Background(),
+ `SELECT c.id FROM chunks c JOIN documents d ON d.id=c.doc_id
+ WHERE d.collection='test' AND d.path='doc.md' LIMIT 1`)
+ if err := row.Scan(&chunkID); err != nil {
+ t.Fatalf("finding chunk: %v", err)
+ }
+ if err := database.InsertEmbedding(context.Background(), chunkID, []float32{0.1, 0.2, 0.3, 0.4}); err != nil {
+ t.Fatalf("inserting chunk_vector: %v", err)
+ }
+ if _, err := database.ExecContext(context.Background(),
+ `INSERT INTO embeddings(chunk_id, provider, model, dimension) VALUES (?, 'test', 'test-model', 4)`,
+ chunkID); err != nil {
+ t.Fatalf("inserting embeddings row: %v", err)
+ }
+
+ // Modify the file so its hash changes.
+ if err := os.WriteFile(path, []byte("# Updated\nUpdated content."), 0o640); err != nil {
+ t.Fatal(err)
+ }
+
+ // Reindex must succeed and report the file as updated.
+ stats, err := idx.Index(context.Background(), col)
+ if err != nil {
+ t.Fatalf("reindex failed: %v", err)
+ }
+ if stats.FilesUpdated != 1 {
+ t.Errorf("expected 1 updated, got %d", stats.FilesUpdated)
+ }
+
+ // No orphaned rows should remain in chunk_vectors or embeddings.
+ var orphanVectors int
+ if err := database.QueryRowContext(context.Background(),
+ `SELECT COUNT(*) FROM chunk_vectors WHERE chunk_id NOT IN (SELECT id FROM chunks)`).Scan(&orphanVectors); err != nil {
+ t.Fatalf("querying orphan chunk_vectors: %v", err)
+ }
+ if orphanVectors != 0 {
+ t.Errorf("expected 0 orphan chunk_vectors rows, got %d", orphanVectors)
+ }
+
+ var orphanEmbeddings int
+ if err := database.QueryRowContext(context.Background(),
+ `SELECT COUNT(*) FROM embeddings WHERE chunk_id NOT IN (SELECT id FROM chunks)`).Scan(&orphanEmbeddings); err != nil {
+ t.Fatalf("querying orphan embeddings: %v", err)
+ }
+ if orphanEmbeddings != 0 {
+ t.Errorf("expected 0 orphan embeddings rows, got %d", orphanEmbeddings)
+ }
+}
+
func TestIndexer_ReindexAfterDeletion(t *testing.T) {
database := openTestDB(t)
idx := New(database, 256)
From 3c63343abdb6b76fc35d4b477159d70e2385bdaa Mon Sep 17 00:00:00 2001
From: itsmostafa
Date: Fri, 1 May 2026 23:42:03 -0600
Subject: [PATCH 05/16] fix(db): make migration 003 idempotent with DROP IF
EXISTS guards
If the migration failed mid-run (after creating embeddings_new but before
completing), schema_version stayed at 2 and the next run would fail with
"table embeddings_new already exists". Adding DROP TABLE IF EXISTS before
each CREATE TABLE makes the migration safe to retry.
---
internal/db/migrations/003_cascade_chunk_refs.sql | 2 ++
1 file changed, 2 insertions(+)
diff --git a/internal/db/migrations/003_cascade_chunk_refs.sql b/internal/db/migrations/003_cascade_chunk_refs.sql
index 1741959..d33d102 100644
--- a/internal/db/migrations/003_cascade_chunk_refs.sql
+++ b/internal/db/migrations/003_cascade_chunk_refs.sql
@@ -4,6 +4,7 @@
PRAGMA foreign_keys=OFF;
+DROP TABLE IF EXISTS chunk_vectors_new;
CREATE TABLE chunk_vectors_new (
chunk_id INTEGER PRIMARY KEY REFERENCES chunks(id) ON DELETE CASCADE,
vector BLOB NOT NULL
@@ -13,6 +14,7 @@ INSERT INTO chunk_vectors_new(chunk_id, vector)
DROP TABLE chunk_vectors;
ALTER TABLE chunk_vectors_new RENAME TO chunk_vectors;
+DROP TABLE IF EXISTS embeddings_new;
CREATE TABLE embeddings_new (
chunk_id INTEGER PRIMARY KEY REFERENCES chunks(id) ON DELETE CASCADE,
provider TEXT NOT NULL,
From 6021475092ccbfd8c79f2c5fcdad090e79c7e879 Mon Sep 17 00:00:00 2001
From: itsmostafa
Date: Fri, 1 May 2026 23:43:36 -0600
Subject: [PATCH 06/16] fix(db): handle missing dimension column in legacy
embeddings table
Migration 002 used CREATE TABLE IF NOT EXISTS, which was a no-op on
databases where embeddings already existed without the dimension column.
The INSERT in migration 003 then failed with "no such column: dimension".
Fix: give dimension a DEFAULT 0 and omit it from the INSERT select list
so the migration works regardless of the source table's schema.
---
internal/db/migrations/003_cascade_chunk_refs.sql | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/internal/db/migrations/003_cascade_chunk_refs.sql b/internal/db/migrations/003_cascade_chunk_refs.sql
index d33d102..11496ab 100644
--- a/internal/db/migrations/003_cascade_chunk_refs.sql
+++ b/internal/db/migrations/003_cascade_chunk_refs.sql
@@ -19,11 +19,13 @@ CREATE TABLE embeddings_new (
chunk_id INTEGER PRIMARY KEY REFERENCES chunks(id) ON DELETE CASCADE,
provider TEXT NOT NULL,
model TEXT NOT NULL,
- dimension INTEGER NOT NULL,
+ dimension INTEGER NOT NULL DEFAULT 0,
embedded_at TEXT NOT NULL DEFAULT (datetime('now'))
);
-INSERT INTO embeddings_new(chunk_id, provider, model, dimension, embedded_at)
- SELECT chunk_id, provider, model, dimension, embedded_at FROM embeddings;
+-- Omit dimension: older DBs may not have it (002 used CREATE TABLE IF NOT EXISTS,
+-- which was a no-op when the table already existed without that column).
+INSERT INTO embeddings_new(chunk_id, provider, model, embedded_at)
+ SELECT chunk_id, provider, model, embedded_at FROM embeddings;
DROP TABLE embeddings;
ALTER TABLE embeddings_new RENAME TO embeddings;
From 36a17ca643387773025713c6f501884a4ec9fcc0 Mon Sep 17 00:00:00 2001
From: itsmostafa
Date: Fri, 1 May 2026 23:47:56 -0600
Subject: [PATCH 07/16] feat(indexer): skip all dot-directories during walk
Dot-prefixed directories (e.g. .venv, .cache, .mypy_cache) are
never user content, so skip them unconditionally rather than relying
on an enumerated denylist.
---
internal/indexer/indexer.go | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/internal/indexer/indexer.go b/internal/indexer/indexer.go
index 98085e7..b1db939 100644
--- a/internal/indexer/indexer.go
+++ b/internal/indexer/indexer.go
@@ -95,7 +95,8 @@ func (idx *Indexer) Index(ctx context.Context, col config.Collection) (Stats, er
return err
}
if d.IsDir() {
- if defaultIgnoreDirs[d.Name()] || ignoreSet[d.Name()] {
+ name := d.Name()
+ if defaultIgnoreDirs[name] || ignoreSet[name] || (strings.HasPrefix(name, ".") && name != ".") {
return filepath.SkipDir
}
return nil
From 974548d33afd34d952a59703e6632a31fadbf721 Mon Sep 17 00:00:00 2001
From: itsmostafa
Date: Fri, 1 May 2026 23:51:49 -0600
Subject: [PATCH 08/16] docs(skills): add command selection guidance to qi-cli
skill
Adds a section advising when to use qi search/query vs qi ask, and
marks qi ask as to be used sparingly since it consumes LLM tokens.
---
skills/qi-cli/SKILL.md | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/skills/qi-cli/SKILL.md b/skills/qi-cli/SKILL.md
index b084669..38f1c4f 100644
--- a/skills/qi-cli/SKILL.md
+++ b/skills/qi-cli/SKILL.md
@@ -14,11 +14,19 @@ qi index # Index the current directory, or a na
qi doctor # Verify setup
qi search "your query" # BM25 keyword search (no provider needed)
qi query "your semantic question" # Hybrid search (needs embedding provider)
-qi ask "what does X do?" # RAG Q&A (needs generation provider)
+qi ask "what does X do?" # RAG Q&A; use sparingly (needs generation provider)
```
---
+## Command selection guidance
+
+Prefer `qi index` when the task is about adding, refreshing, or organizing source material.
+Prefer `qi search` or `qi query` when the task is about finding relevant documents, passages, or citations.
+Use `qi ask` sparingly, only when the user specifically needs a synthesized answer from an LLM rather than retrieved source results.
+
+---
+
## Commands
### `qi init`
@@ -67,6 +75,7 @@ qi query "question" --explain # show BM25/vector/RRF score breakdo
### `qi ask `
RAG Q&A: searches the knowledge base, sends relevant chunks to an LLM, returns an answer with citations.
+Use this sparingly; prefer `qi query` for normal exploration, evidence gathering, and source lookup.
```bash
qi ask "What authentication methods are supported?"
@@ -240,9 +249,10 @@ qi index notes
qi query "how does X work" --explain
```
-**RAG Q&A:**
+**RAG Q&A (use sparingly):**
```bash
# also add a generation provider to config
+# prefer qi query unless you need a synthesized answer
qi ask "Summarize the key decisions in my notes"
```
From 16976bb4121200ff541aa74070dff54d9c3d35dd Mon Sep 17 00:00:00 2001
From: itsmostafa
Date: Fri, 1 May 2026 23:51:52 -0600
Subject: [PATCH 09/16] chore(plugin): bump plugin version to 0.4.0
---
.claude-plugin/marketplace.json | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json
index 71e25a4..0ff8c54 100644
--- a/.claude-plugin/marketplace.json
+++ b/.claude-plugin/marketplace.json
@@ -8,7 +8,7 @@
"name": "qi",
"source": "./",
"description": "Local knowledge search CLI — index documents and search them using BM25 full-text search, vector embeddings, and LLM-powered Q&A, all running locally with no external dependencies.",
- "version": "0.3.0",
+ "version": "0.4.0",
"author": {
"name": "itsmostafa"
},
From 22060750d63def046cf5c3a2d93933e6cb60bb6b Mon Sep 17 00:00:00 2001
From: itsmostafa
Date: Fri, 1 May 2026 23:54:43 -0600
Subject: [PATCH 10/16] update readme
---
README.md | 1 -
1 file changed, 1 deletion(-)
diff --git a/README.md b/README.md
index 722fbd7..9bdb0d9 100644
--- a/README.md
+++ b/README.md
@@ -105,7 +105,6 @@ qi doctor
| `qi delete ` | Delete a named collection and all its indexed data |
| `qi stats` | Show index statistics |
| `qi doctor` | Health check |
-| `qi version` | Print version |
## Search Modes
From 70f0d9f2892a12f8529722e2dce0845746c4bd51 Mon Sep 17 00:00:00 2001
From: itsmostafa
Date: Sat, 2 May 2026 00:01:07 -0600
Subject: [PATCH 11/16] docs(readme): rewrite tagline for clarity and brevity
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 9bdb0d9..d30a957 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@
-A local-first knowledge search CLI for macOS and Linux. Index and search anything — codebases, documentation, research papers, notes, wikis, datasets, logs, contracts, books — using BM25 full-text search, vector embeddings, and LLM-powered Q&A. Choose your own models via Ollama, LM Studio, llama.cpp, MLX or using OpenAI's cloud models.
+qi is an ultra-fast knowledge search CLI for your files on your local machine. No dependencies, no runtime, just a single executable that indexes code, docs, notes, papers, logs, and other text into SQLite, then gives you BM25 search, optional vector search, and grounded LLM Q&A with citations. Use it offline with Ollama, LM Studio, llama.cpp, or MLX, or connect OpenAI for cloud models.
## Features
From 83217e3478d889d7e2e6f8bebb81234d3b07f44c Mon Sep 17 00:00:00 2001
From: itsmostafa
Date: Sat, 2 May 2026 00:03:13 -0600
Subject: [PATCH 12/16] docs(readme): add AI agent token-saving use case blurb
---
README.md | 2 ++
1 file changed, 2 insertions(+)
diff --git a/README.md b/README.md
index d30a957..0c9c99c 100644
--- a/README.md
+++ b/README.md
@@ -10,6 +10,8 @@
qi is an ultra-fast knowledge search CLI for your files on your local machine. No dependencies, no runtime, just a single executable that indexes code, docs, notes, papers, logs, and other text into SQLite, then gives you BM25 search, optional vector search, and grounded LLM Q&A with citations. Use it offline with Ollama, LM Studio, llama.cpp, or MLX, or connect OpenAI for cloud models.
+Save tokens by delagating some of your AI Agent's work to qi.
+
## Features
- **Blazing-fast full-text search** — BM25 via SQLite FTS5, no external search engine required
From 655480daf0436893476ae02e150c8a7aa7bb541b Mon Sep 17 00:00:00 2001
From: itsmostafa
Date: Sat, 2 May 2026 21:53:38 -0600
Subject: [PATCH 13/16] docs(readme): fix typo delegating
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 0c9c99c..ac5a49a 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@
qi is an ultra-fast knowledge search CLI for your files on your local machine. No dependencies, no runtime, just a single executable that indexes code, docs, notes, papers, logs, and other text into SQLite, then gives you BM25 search, optional vector search, and grounded LLM Q&A with citations. Use it offline with Ollama, LM Studio, llama.cpp, or MLX, or connect OpenAI for cloud models.
-Save tokens by delagating some of your AI Agent's work to qi.
+Save tokens by delegating some of your AI Agent's work to qi.
## Features
From 1cc260747fea131a671cc52ab12b332102168956 Mon Sep 17 00:00:00 2001
From: itsmostafa
Date: Sat, 2 May 2026 21:55:50 -0600
Subject: [PATCH 14/16] fix(db): preserve embedding dimension during migration
003 table rebuild
The INSERT into embeddings_new omitted dimension, silently writing 0 for
all migrated rows even when the source table had real values. Derive
dimension from length(cv.vector)/4 via a LEFT JOIN on chunk_vectors so
both old schemas (no dimension column) and current ones are handled
correctly without touching a potentially-absent source column.
---
internal/db/migrations/003_cascade_chunk_refs.sql | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/internal/db/migrations/003_cascade_chunk_refs.sql b/internal/db/migrations/003_cascade_chunk_refs.sql
index 11496ab..c0e4bd8 100644
--- a/internal/db/migrations/003_cascade_chunk_refs.sql
+++ b/internal/db/migrations/003_cascade_chunk_refs.sql
@@ -22,10 +22,14 @@ CREATE TABLE embeddings_new (
dimension INTEGER NOT NULL DEFAULT 0,
embedded_at TEXT NOT NULL DEFAULT (datetime('now'))
);
--- Omit dimension: older DBs may not have it (002 used CREATE TABLE IF NOT EXISTS,
--- which was a no-op when the table already existed without that column).
-INSERT INTO embeddings_new(chunk_id, provider, model, embedded_at)
- SELECT chunk_id, provider, model, embedded_at FROM embeddings;
+-- Derive dimension from vector length (float32 = 4 bytes each) so that both
+-- old schemas (no dimension column) and new ones are handled correctly.
+INSERT INTO embeddings_new(chunk_id, provider, model, dimension, embedded_at)
+ SELECT e.chunk_id, e.provider, e.model,
+ COALESCE(length(cv.vector)/4, 0),
+ e.embedded_at
+ FROM embeddings e
+ LEFT JOIN chunk_vectors cv ON cv.chunk_id = e.chunk_id;
DROP TABLE embeddings;
ALTER TABLE embeddings_new RENAME TO embeddings;
From d58e3d6559bc2461b5b098add1b88ca034e5c31b Mon Sep 17 00:00:00 2001
From: itsmostafa
Date: Sat, 2 May 2026 21:58:14 -0600
Subject: [PATCH 15/16] fix(indexer): return error on non-ErrNoRows scan
failure in indexFile
Swallowing the Scan error left docID=0 on any transient DB failure,
causing a fallthrough to INSERT that would hit the UNIQUE(collection,path)
constraint and silently leave the file unindexed.
---
internal/indexer/indexer.go | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/internal/indexer/indexer.go b/internal/indexer/indexer.go
index b1db939..848b26d 100644
--- a/internal/indexer/indexer.go
+++ b/internal/indexer/indexer.go
@@ -5,6 +5,7 @@ import (
"crypto/sha256"
"database/sql"
"encoding/hex"
+ "errors"
"fmt"
"io/fs"
"log/slog"
@@ -153,7 +154,9 @@ func (idx *Indexer) indexFile(ctx context.Context, col config.Collection, relPat
row := idx.db.QueryRowContext(ctx,
`SELECT id, content_hash, active FROM documents WHERE collection=? AND path=?`,
col.Name, relPath)
- _ = row.Scan(&docID, &existingHash, &existingActive)
+ if err := row.Scan(&docID, &existingHash, &existingActive); err != nil && !errors.Is(err, sql.ErrNoRows) {
+ return fmt.Errorf("looking up existing document: %w", err)
+ }
if existingActive == 1 && existingHash == hash {
return nil // unchanged
From 40c23765a076d44de4fca87d601315d6a5354456 Mon Sep 17 00:00:00 2001
From: itsmostafa
Date: Sat, 2 May 2026 22:03:35 -0600
Subject: [PATCH 16/16] fix(indexer): preserve embeddings when reactivating
unchanged documents
When a deactivated document was restored with byte-identical content,
the indexer still ran DELETE FROM chunks, which cascades into
chunk_vectors and embeddings (added by migration 003), forcing
unnecessary re-embedding work.
Add a fast-path for `docID != 0 && existingActive == 0 && existingHash == hash`:
reactivate the document row and return without touching chunks or embeddings.
Adds TestIndexer_ReactivateSameContent to guard the preserved-embedding invariant.
---
internal/indexer/indexer.go | 12 ++++
internal/indexer/indexer_test.go | 106 +++++++++++++++++++++++++++++++
2 files changed, 118 insertions(+)
diff --git a/internal/indexer/indexer.go b/internal/indexer/indexer.go
index 848b26d..aa28e6d 100644
--- a/internal/indexer/indexer.go
+++ b/internal/indexer/indexer.go
@@ -162,6 +162,18 @@ func (idx *Indexer) indexFile(ctx context.Context, col config.Collection, relPat
return nil // unchanged
}
+ // Fast-path: previously deactivated document restored with byte-identical content.
+ // Reactivate the row without touching chunks or embeddings — deleting chunks would
+ // cascade into chunk_vectors/embeddings (migration 003) and force pointless re-embedding.
+ if docID != 0 && existingActive == 0 && existingHash == hash {
+ if _, err := idx.db.ExecContext(ctx,
+ `UPDATE documents SET active=1, updated_at=datetime('now') WHERE id=?`, docID); err != nil {
+ return fmt.Errorf("reactivating document: %w", err)
+ }
+ stats.FilesAdded++
+ return nil
+ }
+
// Upsert content
if _, err := idx.db.ExecContext(ctx,
`INSERT OR IGNORE INTO content(hash, body) VALUES (?, ?)`,
diff --git a/internal/indexer/indexer_test.go b/internal/indexer/indexer_test.go
index 8879687..22f3def 100644
--- a/internal/indexer/indexer_test.go
+++ b/internal/indexer/indexer_test.go
@@ -230,6 +230,112 @@ func TestIndexer_ReindexAfterDeletion(t *testing.T) {
}
}
+func TestIndexer_ReactivateSameContent(t *testing.T) {
+ database := openTestDB(t)
+ idx := New(database, 256)
+ dir := t.TempDir()
+ path := filepath.Join(dir, "doc.md")
+ col := config.Collection{Name: "test", Path: dir, Extensions: []string{".md"}}
+
+ body := []byte("# Original\nOriginal content.")
+ if err := os.WriteFile(path, body, 0o640); err != nil {
+ t.Fatal(err)
+ }
+ if _, err := idx.Index(context.Background(), col); err != nil {
+ t.Fatal(err)
+ }
+
+ // Capture chunk IDs and seed an embedding to detect spurious deletion.
+ rows, err := database.QueryContext(context.Background(),
+ `SELECT c.id FROM chunks c JOIN documents d ON d.id=c.doc_id
+ WHERE d.collection='test' AND d.path='doc.md' ORDER BY c.id`)
+ if err != nil {
+ t.Fatalf("listing chunks: %v", err)
+ }
+ var originalChunkIDs []int64
+ for rows.Next() {
+ var id int64
+ if err := rows.Scan(&id); err != nil {
+ t.Fatal(err)
+ }
+ originalChunkIDs = append(originalChunkIDs, id)
+ }
+ rows.Close()
+ if len(originalChunkIDs) == 0 {
+ t.Fatal("expected at least one chunk after initial index")
+ }
+
+ seedID := originalChunkIDs[0]
+ if err := database.InsertEmbedding(context.Background(), seedID, []float32{0.1, 0.2, 0.3, 0.4}); err != nil {
+ t.Fatalf("inserting chunk_vector: %v", err)
+ }
+ if _, err := database.ExecContext(context.Background(),
+ `INSERT INTO embeddings(chunk_id, provider, model, dimension) VALUES (?, 'test', 'test-model', 4)`,
+ seedID); err != nil {
+ t.Fatalf("inserting embeddings row: %v", err)
+ }
+
+ // Delete the file → deactivates the document.
+ if err := os.Remove(path); err != nil {
+ t.Fatal(err)
+ }
+ if _, err := idx.Index(context.Background(), col); err != nil {
+ t.Fatal(err)
+ }
+
+ // Restore byte-identical content.
+ if err := os.WriteFile(path, body, 0o640); err != nil {
+ t.Fatal(err)
+ }
+ stats, err := idx.Index(context.Background(), col)
+ if err != nil {
+ t.Fatalf("index after restore: %v", err)
+ }
+ if stats.FilesAdded != 1 {
+ t.Errorf("expected 1 added, got %d", stats.FilesAdded)
+ }
+
+ // Document must be active again.
+ var active int
+ if err := database.QueryRowContext(context.Background(),
+ `SELECT active FROM documents WHERE collection='test' AND path='doc.md'`).
+ Scan(&active); err != nil {
+ t.Fatalf("querying restored document: %v", err)
+ }
+ if active != 1 {
+ t.Errorf("expected active=1, got %d", active)
+ }
+
+ // Chunk ID must be preserved — proves DELETE FROM chunks did not run.
+ var preservedCount int
+ if err := database.QueryRowContext(context.Background(),
+ `SELECT COUNT(*) FROM chunks WHERE id = ?`, seedID).Scan(&preservedCount); err != nil {
+ t.Fatalf("querying preserved chunk: %v", err)
+ }
+ if preservedCount != 1 {
+ t.Fatalf("expected seed chunk %d to survive restore, got count %d", seedID, preservedCount)
+ }
+
+ // Embedding and vector for the seed chunk must still exist.
+ var embCount int
+ if err := database.QueryRowContext(context.Background(),
+ `SELECT COUNT(*) FROM embeddings WHERE chunk_id = ?`, seedID).Scan(&embCount); err != nil {
+ t.Fatalf("querying embedding: %v", err)
+ }
+ if embCount != 1 {
+ t.Errorf("expected embedding for chunk %d to survive restore, got %d", seedID, embCount)
+ }
+
+ var vecCount int
+ if err := database.QueryRowContext(context.Background(),
+ `SELECT COUNT(*) FROM chunk_vectors WHERE chunk_id = ?`, seedID).Scan(&vecCount); err != nil {
+ t.Fatalf("querying chunk_vector: %v", err)
+ }
+ if vecCount != 1 {
+ t.Errorf("expected chunk_vector for chunk %d to survive restore, got %d", seedID, vecCount)
+ }
+}
+
func TestIndexer_DeactivatesMissingFiles(t *testing.T) {
database := openTestDB(t)
idx := New(database, 256)