diff --git a/.gitignore b/.gitignore
index a05dac13..9147f650 100644
--- a/.gitignore
+++ b/.gitignore
@@ -84,6 +84,3 @@ wheels/
 .venv/
 venv/
 ENV/
-
-# Test workspace
-workspace*
\ No newline at end of file
diff --git a/Cargo.toml b/Cargo.toml
index 30641940..86b89fb9 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -10,11 +10,13 @@ members = [
     "vectorless-core/vectorless-metrics",
     "vectorless-core/vectorless-llm",
     "vectorless-core/vectorless-storage",
-    "vectorless-core/vectorless-query",
+    # Strategy layer moved to Python — crates kept but not compiled:
+    # "vectorless-core/vectorless-query",
+    # "vectorless-core/vectorless-agent",
+    # "vectorless-core/vectorless-retrieval",
     "vectorless-core/vectorless-index",
-    "vectorless-core/vectorless-agent",
-    "vectorless-core/vectorless-retrieval",
     "vectorless-core/vectorless-rerank",
+    "vectorless-core/vectorless-primitives",
     "vectorless-core/vectorless-engine",
     "vectorless-core/vectorless-py",
 ]
diff --git a/README.md b/README.md
index b42dc245..ab01bdbd 100644
--- a/README.md
+++ b/README.md
@@ -1,126 +1,43 @@
-<div align="center">
-
-<img src="https://vectorless.dev/img/with-title.png" alt="Vectorless" width="400">
-
-<h1>Document Understanding Engine for AI</h1>
-<h3>Reason, don't vector · Structure, not chunks · Think, then answer</h3>
+<h1>Vectorless</h1>
 
 [![PyPI](https://img.shields.io/pypi/v/vectorless.svg)](https://pypi.org/project/vectorless/)
 [![PyPI Downloads](https://static.pepy.tech/badge/vectorless/month)](https://pepy.tech/projects/vectorless)
-[![Crates.io](https://img.shields.io/crates/v/vectorless.svg)](https://crates.io/crates/vectorless)
-[![Crates.io Downloads](https://img.shields.io/crates/d/vectorless.svg)](https://crates.io/crates/vectorless)
-[![Docs](https://docs.rs/vectorless/badge.svg)](https://docs.rs/vectorless)
-[![License](https://img.shields.io/badge/license-Apache--2.0-blue.svg)](LICENSE)
-
-</div>
-
-**Vectorless** is a document understanding engine for AI. It compiles documents into structured trees of meaning, then dispatches multiple agents to reason through headings, sections, and paragraphs — evaluating how each part relates to the whole. The problem it solves is not "where to look", but "what does this mean in context". Every answer is a reasoning act, not a retrieval result.
-
-Light up a star and shine with us! ⭐
-
-## Three Rules
-- **Reason, don't vector.** Understanding is reasoning, not similarity.
-- **Model fails, we fail.** No heuristic fallbacks, no silent degradation.
-- **No thought, no answer.** Only reasoned output counts as an answer.
 
-## How It Works
+<p>Knowing by reasoning, not vectors.</p>
+<p>Deep and reliable. Vectorless plays nicely with your documents. Ask questions in plain language; get answers by reasoning with Vectorless.</p>
 
-### Four-Artifact Index Architecture
+## Installation
 
-When a document is indexed, the compile pipeline builds four artifacts:
+Install using `pip install -U vectorless`. For more details, see the [Installation](https://vectorless.dev/docs/installation) section in the documentation.
 
-```
-Content Layer          Navigation Layer              Reasoning Index            Document Card
-DocumentTree          NavigationIndex               ReasoningIndex            DocCard
-(TreeNode)            (NavEntry, ChildRoute)        (topic_paths, hot_nodes)  (title, overview,
-      │                      │                              │                 question hints)
-      │                      │                              │                    │
- Agent reads           Agent reads every            Agent's targeted        Orchestrator reads
- only on cat           decision round               search tool (grep)      for multi-doc routing
-```
-
-- **Content Layer** — The raw document tree. The agent only accesses this when reading specific paragraphs (`cat`).
-- **Navigation Layer** — Each non-leaf node stores an overview, question hints, and child routes (title + description). The agent reads this every round to decide where to go next.
-- **Reasoning Index** — Keyword-topic mappings with weights. Provides the agent's `grep` tool with structured keyword data for targeted search within a document.
-- **DocCard** — A compact document-level summary. The Orchestrator reads DocCards to decide which documents to navigate in multi-document queries, without loading full documents.
-
-This separation means the agent makes routing decisions from lightweight metadata, not by scanning full content.
-
-### Agent-Based Understanding
-
-```
-Engine.query("What drove the revenue decline?")
-  │
-  ├─ Query Understanding ── intent, concepts, strategy (LLM)
-  │
-  ├─ Orchestrator ── analyzes query, dispatches Workers
-  │   │
-  │   ├─ Worker 1 ── ls → cd "Financials" → ls → cd "Revenue" → cat
-  │   └─ Worker 2 ── ls → cd "Risk Factors" → grep "decline" → cat
-  │   │
-  │   └─ evaluate ── insufficient? → replan → dispatch new paths → loop
-  │
-  └─ Synthesis ── dedup, evidence scoring, reasoned answer with source chain
-```
-
-Worker navigation commands:
-
-| Command | Action | Reads |
-|---------|--------|-------|
-| `ls` | List child sections | Navigation Layer (ChildRoute) |
-| `cd` | Enter a child section | Navigation Layer |
-| `cat` | Read content at current node | Content Layer (DocumentTree) |
-| `grep` | Search by keyword | Reasoning Index (topic_paths) |
-
-The Orchestrator evaluates Worker results after each round. If evidence is insufficient, it **replans** — adjusting strategy, dispatching new paths, or deepening exploration. This continues until enough evidence is collected.
-
-## Quick Start
-
-```bash
-pip install vectorless
-```
+## A Simple Example
 
 ```python
 import asyncio
-from vectorless import Engine, IndexContext, QueryContext
+from vectorless import Engine
 
 async def main():
     engine = Engine(api_key="sk-...", model="gpt-4o", endpoint="https://api.openai.com/v1")
 
-    # Index a document
-    result = await engine.index(IndexContext.from_path("./report.pdf"))
+    # Compile a document
+    result = await engine.compile(path="./report.pdf")
     doc_id = result.doc_id
 
-    # Query
-    result = await engine.query(
-        QueryContext("What is the total revenue?").with_doc_ids([doc_id])
-    )
-    print(result.single().content)
+    # Ask a question
+    response = await engine.ask("What is the total revenue?", doc_ids=[doc_id])
+    print(response.single().content)
 
 asyncio.run(main())
 ```
 
-## Resources
+## Help
 
-- [Documentation](https://vectorless.dev) — Guides, architecture, API reference
-- [Rust API Docs](https://docs.rs/vectorless) — Auto-generated crate documentation
-- [PyPI](https://pypi.org/project/vectorless/) — Python package
-- [Crates.io](https://crates.io/crates/vectorless) — Rust crate
-- [Examples](examples/) — Complete usage patterns for Python and Rust
+See [documentation](https://vectorless.dev/docs/getting-started) for more details.
 
-## Contributing
 
-Contributions welcome! If you find this useful, please ⭐ the repo — it helps others discover it.
-
-## Star History
+## Contributing
 
-<a href="https://www.star-history.com/?repos=vectorlessflow%2Fvectorless&type=date&legend=top-left">
- <picture>
-   <source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/chart?repos=vectorlessflow/vectorless&type=date&theme=dark&legend=top-left" />
-   <source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/chart?repos=vectorlessflow/vectorless&type=date&legend=top-left" />
-   <img alt="Star History Chart" src="https://api.star-history.com/chart?repos=vectorlessflow/vectorless&type=date&legend=top-left" />
- </picture>
-</a>
+Contributions welcome! See [Contributing](CONTRIBUTING.md) for setup and guidelines.
 
 ## License
 
diff --git a/docs/docs/api-reference.mdx b/docs/docs/api-reference.mdx
index 5261afbf..98b0c30d 100644
--- a/docs/docs/api-reference.mdx
+++ b/docs/docs/api-reference.mdx
@@ -1,7 +1,7 @@
 ---
 sidebar_position: 9
 title: API Reference
-description: Complete API reference for Vectorless Rust crate and Python SDK.
+description: Complete API reference for the Vectorless Python SDK.
 ---
 
 # API Reference
@@ -10,8 +10,7 @@ description: Complete API reference for Vectorless Rust crate and Python SDK.
 
 In the meantime, you can refer to the following resources:
 
-- **Rust crate docs**: [docs.rs/vectorless](https://docs.rs/vectorless) — auto-generated documentation from source code
 - **Python SDK docs**: Available via `help(vectorless)` in an interactive Python session
 - **Source code**: [github.com/vectorlessflow/vectorless](https://github.com/vectorlessflow/vectorless)
 
-For usage examples, see [Quick Query](/docs/examples/quick-query), [Multi-Document](/docs/examples/multi-document), and [Batch Indexing](/docs/examples/batch-indexing).
+For usage examples, see [Quick Query](/docs/examples/quick-query), [Multi-Document](/docs/examples/multi-document), and [Batch Compiling](/docs/examples/batch-indexing).
diff --git a/docs/docs/architecture.mdx b/docs/docs/architecture.mdx
index c5edfe1a..69867710 100644
--- a/docs/docs/architecture.mdx
+++ b/docs/docs/architecture.mdx
@@ -67,7 +67,7 @@ The retrieval pipeline is a supervisor loop driven entirely by LLM reasoning. Ev
 ### Flow
 
 ```text
-Engine.query()
+Engine.ask()
   → Dispatcher
     → Query Understanding (LLM) → QueryPlan (intent, concepts, strategy)
     → Orchestrator (always — single or multi-doc)
diff --git a/docs/docs/examples/batch-indexing.mdx b/docs/docs/examples/batch-indexing.mdx
index b9f77adc..43886d5e 100644
--- a/docs/docs/examples/batch-indexing.mdx
+++ b/docs/docs/examples/batch-indexing.mdx
@@ -2,15 +2,13 @@
 sidebar_position: 3
 ---
 
-# Batch Indexing
+# Batch Compiling
 
-Index multiple documents efficiently with progress tracking and error handling.
-
-## Python
+Compile multiple documents efficiently with progress tracking and error handling.
 
 ```python
 import asyncio
-from vectorless import Engine, IndexContext, IndexOptions
+from vectorless import Engine
 
 async def main():
     engine = Engine(
@@ -18,12 +16,10 @@ async def main():
         model="gpt-4o",
     )
 
-    # Index a directory of documents
-    result = await engine.index(
-        IndexContext.from_dir("./documents/")
-    )
+    # Compile a directory of documents
+    result = await engine.compile(directory="./documents/")
 
-    print(f"Indexed {len(result.items)} documents")
+    print(f"Compiled {len(result.items)} documents")
     print(f"Failures: {len(result.failed)}")
 
     for item in result.items:
@@ -37,50 +33,19 @@ async def main():
     for fail in result.failed:
         print(f"  ✗ {fail.source}: {fail.error}")
 
-    # List all indexed documents
-    docs = await engine.list()
-    print(f"\nTotal indexed: {len(docs)} documents")
+    # List all compiled documents
+    docs = await engine.list_documents()
+    print(f"\nTotal compiled: {len(docs)} documents")
 
 asyncio.run(main())
 ```
 
-## Rust
-
-```rust
-use vectorless::{Engine, EngineBuilder, IndexContext};
-
-#[tokio::main]
-async fn main() -> vectorless::Result<()> {
-    let engine = EngineBuilder::new()
-        .with_key("sk-...")
-        .with_model("gpt-4o")
-        .build()
-        .await?;
-
-    // Index a directory
-    let result = engine.index(IndexContext::from_dir("./documents/")).await?;
-
-    println!("Indexed {} documents", result.items.len());
-    println!("Failures: {}", result.failed.len());
-
-    for item in &result.items {
-        println!("  ✓ {} ({:?}) → {}", item.name, item.format, item.doc_id);
-    }
-
-    // List all documents
-    let docs = engine.list().await?;
-    println!("Total indexed: {} documents", docs.len());
-
-    Ok(())
-}
-```
-
 ## Error Handling
 
-Each item in the result is either successful or failed. Failures don't prevent other documents from being indexed:
+Each item in the result is either successful or failed. Failures don't prevent other documents from being compiled:
 
 ```python
-result = await engine.index(IndexContext.from_paths(mixed_paths))
+result = await engine.compile(paths=mixed_paths)
 
 # Successful items
 for item in result.items:
diff --git a/docs/docs/examples/multi-document.mdx b/docs/docs/examples/multi-document.mdx
index 11ddc458..5922c98f 100644
--- a/docs/docs/examples/multi-document.mdx
+++ b/docs/docs/examples/multi-document.mdx
@@ -4,16 +4,11 @@ sidebar_position: 2
 
 # Multi-Document Retrieval
 
-Query across multiple indexed documents using the cross-document strategy with graph-based score boosting.
-
-## Python
+Query across multiple compiled documents using the cross-document strategy with graph-based score boosting.
 
 ```python
 import asyncio
-from vectorless import (
-    Engine, IndexContext, QueryContext,
-    IndexOptions,
-)
+from vectorless import Engine
 
 async def main():
     engine = Engine(
@@ -21,14 +16,14 @@ async def main():
         model="gpt-4o",
     )
 
-    # Index multiple documents
+    # Compile multiple documents
     docs = ["./report-q1.pdf", "./report-q2.pdf", "./report-q3.pdf"]
     doc_ids = []
 
     for path in docs:
-        result = await engine.index(IndexContext.from_path(path))
+        result = await engine.compile(path=path)
         doc_ids.append(result.doc_id)
-        print(f"Indexed: {path} → {result.doc_id}")
+        print(f"Compiled: {path} → {result.doc_id}")
 
     # Check the cross-document graph
     graph = await engine.get_graph()
@@ -40,25 +35,26 @@ async def main():
                 print(f"  {doc_id[:8]}... → {edge.target_doc_id[:8]}... ({edge.weight:.2f})")
 
     # Query across all documents
-    result = await engine.query(
-        QueryContext("Compare quarterly revenue trends")
-        .with_doc_ids(doc_ids)
+    response = await engine.ask(
+        "Compare quarterly revenue trends",
+        doc_ids=doc_ids,
     )
 
-    for item in result.items:
-        print(f"\n[{item.doc_id[:8]}...] Score: {item.score:.2f}")
+    for item in response.items:
+        print(f"\n[{item.doc_id[:8]}...] Confidence: {item.confidence:.2f}")
         print(item.content[:300])
 
     # Or query entire workspace
-    result = await engine.query(
-        QueryContext("What documents discuss risk factors?")
+    response = await engine.ask(
+        "What documents discuss risk factors?",
+        workspace_scope=True,
     )
 
-    print(f"\nFound in {len(result.items)} document(s)")
+    print(f"\nFound in {len(response.items)} document(s)")
 
     # Cleanup
     for doc_id in doc_ids:
-        await engine.remove(doc_id)
+        await engine.remove_document(doc_id)
 
 asyncio.run(main())
 ```
@@ -67,19 +63,8 @@ asyncio.run(main())
 
 ### Document Graph
 
-After indexing, documents are connected in a graph based on shared keywords. The graph enables:
+After compiling, documents are connected in a graph based on shared keywords. The graph enables:
 
 - **Score boosting** — High-confidence results in one document boost neighbor documents
 - **Relationship discovery** — Automatically find related documents
 - **Cross-referencing** — Results from connected documents are surfaced together
-
-### Merge Strategies
-
-The cross-document strategy supports multiple merge modes:
-
-| Strategy | Description |
-|----------|-------------|
-| **TopK** | Return top-K results across all documents |
-| **BestPerDocument** | Best result from each document |
-| **WeightedByRelevance** | Weight by each document's best score |
-| **GraphBoosted** | Use graph connections to boost scores |
diff --git a/docs/docs/examples/quick-query.mdx b/docs/docs/examples/quick-query.mdx
index 9a39c82c..31ea7040 100644
--- a/docs/docs/examples/quick-query.mdx
+++ b/docs/docs/examples/quick-query.mdx
@@ -4,13 +4,11 @@ sidebar_position: 1
 
 # Quick Query Example
 
-This example demonstrates the basic index-and-query workflow with both Python and Rust.
-
-## Python
+This example demonstrates the basic compile-and-ask workflow.
 
 ```python
 import asyncio
-from vectorless import Engine, IndexContext, QueryContext
+from vectorless import Engine
 
 async def main():
     # 1. Create engine
@@ -19,66 +17,25 @@ async def main():
         model="gpt-4o",
     )
 
-    # 2. Index a document
-    result = await engine.index(IndexContext.from_path("./report.pdf"))
+    # 2. Compile a document
+    result = await engine.compile(path="./report.pdf")
     doc_id = result.doc_id
-    print(f"Indexed document: {doc_id}")
+    print(f"Compiled document: {doc_id}")
 
     # 3. Simple keyword query
-    answer = await engine.query(
-        QueryContext("revenue")
-        .with_doc_ids([doc_id])
-    )
-    print(f"Keyword result: {answer.single().content[:200]}")
+    response = await engine.ask("revenue", doc_ids=[doc_id])
+    print(f"Result: {response.single().content[:200]}")
 
     # 4. Complex reasoning query
-    answer = await engine.query(
-        QueryContext("What are the main factors affecting performance?")
-        .with_doc_ids([doc_id])
+    response = await engine.ask(
+        "What are the main factors affecting performance?",
+        doc_ids=[doc_id],
     )
-    print(f"Score: {answer.single().score:.2f}")
-    print(f"Hybrid result: {answer.single().content[:200]}")
+    print(f"Confidence: {response.single().confidence:.2f}")
+    print(f"Result: {response.single().content[:200]}")
 
     # 5. Cleanup
-    await engine.remove(doc_id)
+    await engine.remove_document(doc_id)
 
 asyncio.run(main())
 ```
-
-## Rust
-
-```rust
-use vectorless::{Engine, EngineBuilder, IndexContext, QueryContext};
-use vectorless::StrategyPreference;
-
-#[tokio::main]
-async fn main() -> vectorless::Result<()> {
-    // 1. Create engine
-    let engine = EngineBuilder::new()
-        .with_key("sk-...")
-        .with_model("gpt-4o")
-        .build()
-        .await?;
-
-    // 2. Index a document
-    let result = engine.index(IndexContext::from_path("./report.pdf")).await?;
-    let doc_id = result.doc_id().unwrap().to_string();
-    println!("Indexed document: {}", doc_id);
-
-    // 3. Query with hybrid strategy
-    let answer = engine.query(
-        QueryContext::new("What are the main factors affecting performance?")
-            .with_doc_ids(vec![doc_id.clone()])
-    ).await?;
-
-    if let Some(item) = answer.single() {
-        println!("Score: {:.2}", item.score);
-        println!("{}", item.content);
-    }
-
-    // 4. Cleanup
-    engine.remove(&doc_id).await?;
-
-    Ok(())
-}
-```
diff --git a/docs/docs/features/cross-document-graph.mdx b/docs/docs/features/cross-document-graph.mdx
deleted file mode 100644
index 1f940735..00000000
--- a/docs/docs/features/cross-document-graph.mdx
+++ /dev/null
@@ -1,88 +0,0 @@
----
-sidebar_position: 3
----
-
-# Cross-Document Graph
-
-When multiple documents are indexed in a workspace, Vectorless automatically builds a relationship graph connecting documents by shared concepts.
-
-## Overview
-
-The cross-document graph represents documents as nodes and their relationships as weighted edges. Edge weights are computed from:
-
-- **Jaccard similarity** (60%) — Ratio of shared keywords to total unique keywords
-- **Shared keyword count** (40%) — Absolute number of overlapping keywords
-
-```text
-Document A ←── 0.72 ──→ Document B
-    │                       │
-    └── 0.45 ──→ Document C ←┘
-```
-
-## How It Works
-
-### Graph Building
-
-After each indexing operation, the graph is automatically rebuilt as a background task:
-
-1. Extract keyword profiles from each document's reasoning index
-2. Compute pairwise Jaccard similarity
-3. Create edges for document pairs exceeding the similarity threshold
-4. Store the graph in the workspace
-
-The graph builder uses keyword weights from the ReasoningIndex — keywords that appear in titles get 2.0× weight, summaries 1.5×, and content 1.0×. This ensures that structurally important keywords have more influence on the similarity calculation.
-
-### Accessing the Graph
-
-#### Python
-
-```python
-graph = await engine.get_graph()
-
-if graph:
-    print(f"Documents: {graph.node_count()}")
-    print(f"Relationships: {graph.edge_count()}")
-
-    # Get neighbors for a specific document
-    neighbors = graph.get_neighbors(doc_id)
-    for edge in neighbors:
-        print(f"  → {edge.target_doc_id} (weight: {edge.weight:.2f})")
-```
-
-#### Rust
-
-```rust
-if let Some(graph) = engine.get_graph().await? {
-    println!("Documents: {}", graph.node_count());
-    println!("Edges: {}", graph.edge_count());
-
-    for edge in graph.get_neighbors(&doc_id) {
-        println!("→ {} (weight: {:.2})", edge.target_doc_id, edge.weight);
-    }
-}
-```
-
-## Graph Node Information
-
-Each document node contains:
-
-- **doc_id** — Document identifier
-- **title** — Root node title
-- **format** — Document format (markdown, pdf)
-- **node_count** — Number of tree nodes
-- **top_keywords** — Top weighted keywords from the reasoning index
-
-## Configuration
-
-The graph is built automatically with default thresholds. Advanced configuration is available in Rust:
-
-```text
-min_shared_keywords: 3      — Minimum shared keywords to create an edge
-min_keyword_jaccard: 0.1    — Minimum Jaccard similarity threshold
-max_keywords_per_doc: 50    — Max keywords extracted per document
-max_edges_per_node: 10      — Max edges per document node
-```
-
-## Current Status
-
-The graph is built and persisted during indexing. Graph-aware retrieval features (such as score boosting for connected documents) are planned for a future release. Currently, the graph serves as a relationship discovery and inspection tool accessible via the API.
diff --git a/docs/docs/features/pdf-support.mdx b/docs/docs/features/pdf-support.mdx
deleted file mode 100644
index 48682f22..00000000
--- a/docs/docs/features/pdf-support.mdx
+++ /dev/null
@@ -1,61 +0,0 @@
----
-sidebar_position: 4
----
-
-# PDF Support
-
-Vectorless supports PDF documents with full page-level tracking and hierarchical structure extraction.
-
-## Basic Usage
-
-```python
-from vectorless import Engine, IndexContext
-
-engine = Engine(api_key="sk-...", model="gpt-4o")
-
-# Index a PDF
-result = await engine.index(IndexContext.from_path("./report.pdf"))
-doc_id = result.doc_id
-
-# Query the PDF
-answer = await engine.query(
-    QueryContext("What is discussed on page 5?").with_doc_ids([doc_id])
-)
-print(answer.single().content)
-```
-
-## Page-Level Tracking
-
-Each tree node records the page range it spans:
-
-- **Leaf nodes** — Store the exact page(s) their content comes from
-- **Branch nodes** — Page range is propagated from children (min start, max end)
-
-This enables:
-
-- **Page-scoped queries** — Filter results by page range
-- **Context display** — Show which pages contributed to an answer
-- **Citation** — Reference specific pages in results
-
-## PDF Structure Extraction
-
-The parser extracts structure from PDFs by analyzing:
-
-1. **Font size and weight** — Larger/bold text indicates headings
-2. **Text position** — Top-of-page text often indicates section titles
-3. **Spacing** — Paragraph breaks signal content boundaries
-
-Extracted sections are organized into a hierarchical tree, just like Markdown documents.
-
-## Limitations
-
-- **Scanned PDFs** — OCR is not built-in. Scanned/image-based PDFs require pre-processing
-- **Complex layouts** — Multi-column layouts may not be perfectly structured
-- **Tables** — Table content is extracted as text but loses cell structure
-- **Images** — Image content is not analyzed
-
-## Best Practices
-
-- Use text-based PDFs for best results (not scanned documents)
-- Larger documents (>50 pages) may take longer to index due to LLM summary generation
-- Use incremental indexing when re-indexing updated PDFs to avoid redundant processing
diff --git a/docs/docs/features/summary-strategies.mdx b/docs/docs/features/summary-strategies.mdx
deleted file mode 100644
index a589a0f0..00000000
--- a/docs/docs/features/summary-strategies.mdx
+++ /dev/null
@@ -1,78 +0,0 @@
----
-sidebar_position: 1
----
-
-# Summary Strategies
-
-Summaries are critical for retrieval quality. The Worker agent uses summaries in the NavigationIndex to decide which branches to explore and where to navigate. Without summaries, the Worker can only use node titles for decision-making, which significantly reduces accuracy.
-
-## Available Strategies
-
-### Full (Default)
-
-Generates summaries for every node in the tree. Branch nodes get navigation-oriented summaries ("what does this section cover"), while leaf nodes get content-oriented summaries ("what does this section say").
-
-```rust
-use vectorless::index::summary::SummaryStrategy;
-
-let strategy = SummaryStrategy::full();
-```
-
-**Trade-off**: Highest token cost during indexing, but best retrieval quality. Recommended for production use.
-
-### Selective
-
-Only generates summaries for branch nodes (non-leaves) that exceed a token threshold. Useful when indexing large document sets on a budget.
-
-```rust
-let strategy = SummaryStrategy::selective(100, true);
-//                           min_tokens ↑    ↑ branch_only
-```
-
-- `min_tokens` — Minimum content tokens to generate a summary (default: 100)
-- `branch_only` — Only generate for non-leaf nodes (default: true)
-
-**Trade-off**: Lower indexing cost, but leaf nodes lack summaries. The Worker falls back to title-only evaluation at leaf level.
-
-### Lazy
-
-Defers summary generation to query time. Summaries are generated on-demand when a node is first accessed during retrieval.
-
-```rust
-let strategy = SummaryStrategy::lazy(true);
-//                          ↑ persist to disk
-```
-
-**Trade-off**: Zero indexing cost for summaries, but adds latency to the first query that touches each node. Subsequent queries benefit from cached summaries.
-
-## Choosing a Strategy
-
-| Scenario | Recommended Strategy |
-|----------|---------------------|
-| Production, accuracy matters | **Full** |
-| Large document set, budget-constrained | **Selective** (min_tokens=100) |
-| One-time queries, minimal indexing time | **Lazy** |
-| Batch indexing with later queries | **Full** (index once, query many times) |
-
-## How Summaries Are Used
-
-During retrieval, the Worker agent reads summary data from the NavigationIndex at each decision point:
-
-1. **`ls` output** — Child nodes show their descriptions (derived from summaries) and leaf counts
-2. **Navigation decisions** — The LLM evaluates summaries to decide which branch to enter
-3. **Keyword index** — Topic tags from summaries are indexed for `find` command lookups
-4. **DocCards** — Root-level summaries power the Orchestrator's document selection
-
-When a node has no summary, the Worker's navigation quality degrades. This is why **Full** is the default — it ensures the Worker always has summary context to work with.
-
-## Navigation-Oriented Summaries
-
-Branch nodes receive structured summaries with three components:
-
-| Component | Purpose | Used By |
-|-----------|---------|---------|
-| **OVERVIEW** | 2-3 sentence routing summary | Worker's `ls` output |
-| **QUESTIONS** | 3-5 typical questions this branch can answer | Keyword index |
-| **TAGS** | 2-4 topic keywords | ReasoningIndex `find` command |
-
-This structured format enables the Worker to quickly assess whether a branch is worth exploring without reading its full content.
diff --git a/docs/docs/features/synonym-expansion.mdx b/docs/docs/features/synonym-expansion.mdx
deleted file mode 100644
index 9a88d901..00000000
--- a/docs/docs/features/synonym-expansion.mdx
+++ /dev/null
@@ -1,71 +0,0 @@
----
-sidebar_position: 2
----
-
-# Synonym Expansion
-
-When users query with different wording than the document, keyword-based retrieval can miss relevant content. Synonym expansion addresses this by generating alternative search terms during indexing.
-
-## The Problem
-
-A document might use "revenue" throughout, but a user queries for "income" or "earnings". Without synonym expansion, the keyword strategy would miss these connections entirely.
-
-## How It Works
-
-During the reasoning index stage, the system:
-
-1. Selects the top-N keywords by frequency (capped at 20-100 keywords)
-2. For each keyword, calls the LLM to generate up to 5 synonyms or related terms
-3. Adds synonym entries to the topic index with **0.6x weight** (lower than direct keyword matches)
-
-```text
-Keyword: "revenue" (weight: 1.0)
-  └── Synonym: "income" (weight: 0.6)
-  └── Synonym: "earnings" (weight: 0.6)
-  └── Synonym: "turnover" (weight: 0.6)
-```
-
-The lower weight ensures that direct keyword matches are always preferred over synonym matches, while still surfacing relevant content that would otherwise be missed.
-
-## Enabling Synonym Expansion
-
-### Python
-
-```python
-from vectorless import IndexOptions
-
-# Enabled by default
-opts = IndexOptions(enable_synonym_expansion=True)
-
-# Disable for faster indexing (at the cost of recall)
-opts = IndexOptions(enable_synonym_expansion=False)
-```
-
-### Rust
-
-The synonym expansion is controlled via `ReasoningIndexConfig`:
-
-```rust
-use vectorless::ReasoningIndexConfig;
-
-let config = ReasoningIndexConfig::default()
-    .with_synonym_expansion(true);
-```
-
-## Cost Impact
-
-Synonym expansion adds LLM calls during indexing (one per top keyword). For a typical document:
-
-- **Additional LLM calls**: 20-100 (depending on keyword count)
-- **Additional tokens**: ~500-2000 (short prompt + response per keyword)
-- **Indexing time increase**: 10-30 seconds
-
-The cost is paid once at index time. At query time, synonyms are already in the topic index, so there is **zero additional cost** during retrieval.
-
-## When to Disable
-
-Consider disabling synonym expansion when:
-
-- Your documents use highly domain-specific terminology with no common synonyms
-- You need the fastest possible indexing time
-- Your queries always use the same terminology as the documents
diff --git a/docs/docs/getting-started.mdx b/docs/docs/getting-started.mdx
index 6708b6d6..caa99a7a 100644
--- a/docs/docs/getting-started.mdx
+++ b/docs/docs/getting-started.mdx
@@ -1,45 +1,36 @@
 ---
-sidebar_position: 2
+sidebar_position: 1
 ---
 
-# Getting Started
+# Welcome to Vectorless
 
-## Prerequisites
+**Vectorless** is a document understanding engine for AI. It compiles documents into structured trees of meaning, then dispatches multiple agents to reason through headings, sections, and paragraphs — evaluating how each part relates to the whole. The problem it solves is not "where to look", but "what does this mean in context". Every answer is a reasoning act, not a retrieval result.
 
-- Python 3.9+ or Rust 1.75+
-- An LLM API key (OpenAI, or any OpenAI-compatible endpoint)
+## How It Works
 
-## Python SDK
+1. **Parse** — Documents (Markdown, PDF) are parsed into hierarchical semantic trees, preserving structure and relationships between sections.
+2. **Compile** — Trees are stored with metadata, keywords, and summaries. The pipeline resolves cross-references ("see Section 2.1") and expands keywords with LLM-generated synonyms for improved recall. Incremental compiling skips unchanged files via content fingerprinting.
+3. **Ask** — An LLM-powered agent navigates the tree to find the most relevant sections. The Orchestrator coordinates multi-document queries, dispatching Workers that use `ls`, `cd`, `cat`, `find`, and `grep` commands to explore the tree and collect evidence.
 
-### Installation
-
-```bash
-pip install vectorless
-```
-
-### Index and Query
+## Quick Start
 
 ```python
 import asyncio
-from vectorless import Engine, IndexContext, QueryContext
+from vectorless import Engine
 
 async def main():
-    # Create an engine
     engine = Engine(
         api_key="sk-...",
         model="gpt-4o",
     )
 
-    # Index a document
-    result = await engine.index(IndexContext.from_path("./report.pdf"))
+    # Compile a document
+    result = await engine.compile(path="./report.pdf")
     doc_id = result.doc_id
-    print(f"Indexed: {doc_id}")
 
-    # Query the document
-    answer = await engine.query(
-        QueryContext("What is the total revenue?").with_doc_ids([doc_id])
-    )
-    print(answer.single().content)
+    # Ask a question
+    response = await engine.ask("What is the total revenue?", doc_ids=[doc_id])
+    print(response.single().content)
 
 asyncio.run(main())
 ```
@@ -54,44 +45,14 @@ engine = Engine(
 )
 ```
 
-## Rust Crate
+### From Environment Variables
 
-### Installation
-
-Add to your `Cargo.toml`:
-
-```toml
-[dependencies]
-vectorless = "0.1"
+```python
+engine = Engine.from_env()
 ```
 
-### Index and Query
+### From Config File
 
-```rust
-use vectorless::{EngineBuilder, IndexContext, QueryContext};
-
-#[tokio::main]
-async fn main() -> vectorless::Result<()> {
-    let engine = EngineBuilder::new()
-        .with_key("sk-...")
-        .with_model("gpt-4o")
-        .build()
-        .await?;
-
-    let result = engine.index(IndexContext::from_path("./report.pdf")).await?;
-    let doc_id = result.doc_id().unwrap();
-
-    let result = engine.query(
-        QueryContext::new("What is the total revenue?").with_doc_ids(vec![doc_id.to_string()])
-    ).await?;
-    println!("{}", result.content);
-
-    Ok(())
-}
+```python
+engine = Engine.from_config_file("./config.toml")
 ```
-
-## Next Steps
-
-- [Architecture](/docs/architecture) — Understand the indexing and retrieval pipeline
-- [Indexing Overview](/docs/indexing/overview) — Learn about each pipeline stage
-- [Retrieval Strategies](/docs/retrieval/strategies) — Understand how queries are processed
diff --git a/docs/docs/indexing/configuration.mdx b/docs/docs/indexing/configuration.mdx
deleted file mode 100644
index 907d0b23..00000000
--- a/docs/docs/indexing/configuration.mdx
+++ /dev/null
@@ -1,88 +0,0 @@
----
-sidebar_position: 2
----
-
-# Configuration
-
-This page covers the configurable options for the indexing pipeline.
-
-## IndexOptions (Python)
-
-```python
-from vectorless import IndexOptions
-
-opts = IndexOptions(
-    mode="default",                   # "default", "force", "incremental"
-    generate_summaries=True,           # Generate LLM summaries
-    generate_description=False,        # Generate document description
-    include_text=True,                 # Include node text in tree
-    generate_ids=True,                 # Generate node IDs
-    enable_synonym_expansion=True,     # Expand keywords with LLM synonyms
-)
-```
-
-### Parameters
-
-| Parameter | Type | Default | Description |
-|-----------|------|---------|-------------|
-| `mode` | `str` | `"default"` | Indexing mode: `"default"`, `"force"`, or `"incremental"` |
-| `generate_summaries` | `bool` | `True` | Generate LLM summaries for tree nodes |
-| `generate_description` | `bool` | `False` | Generate a document-level description |
-| `include_text` | `bool` | `True` | Store node text content in the tree |
-| `generate_ids` | `bool` | `True` | Generate unique node IDs |
-| `enable_synonym_expansion` | `bool` | `True` | Expand indexed keywords with LLM-generated synonyms |
-
-## PipelineOptions (Rust)
-
-```rust
-use vectorless::index::{PipelineOptions, SummaryStrategy};
-
-let options = PipelineOptions::default()
-    .with_summary_strategy(SummaryStrategy::full());
-```
-
-## Summary Strategy
-
-Three strategies control how summaries are generated:
-
-| Strategy | Description | Token Cost |
-|----------|-------------|------------|
-| **Full** | Summarize every node | High |
-| **Selective** | Only branch nodes above token threshold | Medium |
-| **Lazy** | Generate on-demand at query time | Deferred |
-
-The default is **Full** since summaries are critical for Pilot navigation quality.
-
-```rust
-use vectorless::index::summary::SummaryStrategy;
-
-// Full (default)
-let strategy = SummaryStrategy::full();
-
-// Selective — only branch nodes with >= 100 tokens
-let strategy = SummaryStrategy::selective(100, true);
-
-// Lazy — generate at query time
-let strategy = SummaryStrategy::lazy(true);
-```
-
-## Reasoning Index Config
-
-The reasoning index is configured via `ReasoningIndexConfig`:
-
-| Field | Default | Description |
-|-------|---------|-------------|
-| `enabled` | `true` | Enable reasoning index construction |
-| `enable_synonym_expansion` | `true` | Expand keywords with LLM synonyms |
-| `max_keyword_entries` | `5000` | Max keyword-to-node mappings |
-| `max_topic_entries` | `20` | Max topic entries per keyword |
-| `min_keyword_length` | `2` | Minimum keyword length to index |
-| `build_summary_shortcut` | `true` | Build summary shortcut for overview queries |
-
-## Split Config
-
-| Field | Default | Description |
-|-------|---------|-------------|
-| `max_tokens_per_node` | `4000` | Token limit before splitting a leaf node |
-
-Lower values produce finer-grained nodes but increase tree size. The default of 4000 tokens balances retrieval precision with tree compactness.
diff --git a/docs/docs/indexing/incremental.mdx b/docs/docs/indexing/incremental.mdx
deleted file mode 100644
index 482cd2b5..00000000
--- a/docs/docs/indexing/incremental.mdx
+++ /dev/null
@@ -1,59 +0,0 @@
----
-sidebar_position: 3
----
-
-# Incremental Indexing
-
-When documents change, re-indexing from scratch can be wasteful. Vectorless supports incremental indexing to avoid redundant LLM calls and processing.
-
-## Content Fingerprinting
-
-Every indexed document stores a content fingerprint (hash). When incremental mode is enabled:
-
-1. Compute the fingerprint of the new document content
-2. Compare against the stored fingerprint
-3. If identical, skip reprocessing entirely
-4. If changed, reprocess only the affected parts
-
-## Usage
-
-### Python
-
-```python
-from vectorless import IndexContext, IndexOptions
-
-# Only re-index if content changed
-ctx = IndexContext.from_path("./report.pdf").with_options(
-    IndexOptions(mode="incremental")
-)
-result = await engine.index(ctx)
-```
-
-### Rust
-
-```rust
-use vectorless::{IndexContext, IndexMode};
-
-let ctx = IndexContext::from_path("./report.pdf")
-    .with_mode(IndexMode::Incremental);
-let result = engine.index(ctx).await?;
-```
-
-## Indexing Modes
-
-| Mode | Behavior |
-|------|----------|
-| `default` | Skip if already indexed (by document ID) |
-| `incremental` | Re-index only if content fingerprint changed |
-| `force` | Always re-index, overwriting existing data |
-
-## What Gets Reused
-
-When a document is incrementally re-indexed:
-
-- **Summaries** — Reused for nodes whose content hasn't changed
-- **Reasoning index** — Keyword mappings and synonym expansions are preserved
-- **Cross-references** — Re-resolved against the updated tree
-- **Metadata** — Page ranges, token counts recalculated
-
-This can reduce indexing time by 60-80% for documents with minor edits.
diff --git a/docs/docs/indexing/overview.mdx b/docs/docs/indexing/overview.mdx
deleted file mode 100644
index dbc93256..00000000
--- a/docs/docs/indexing/overview.mdx
+++ /dev/null
@@ -1,223 +0,0 @@
----
-sidebar_position: 1
----
-
-# Indexing Pipeline
-
-The compile pipeline transforms raw documents into hierarchical tree structures with pre-computed navigation indexes, ready for Agent-driven retrieval.
-
-## Pipeline Overview
-
-```
-┌─────────────────────────────────────────────────────────────────┐
-│                      Compile Pipeline                           │
-│                                                                 │
-│  Document ──→ Parse ──→ Build ──→ Validate ──→ Split           │
-│  (md/pdf)     (10)     (20)     (22)       (25)               │
-│                                                                 │
-│             ──→ Enhance ──→ Enrich ──→ ReasoningIndex (45)     │
-│                (30)       (40)                                  │
-│                                                                 │
-│             ──→ NavigationIndex ──→ Optimize                   │
-│                 (50)              (60)                          │
-│                                                                 │
-│  Output: DocumentTree + ReasoningIndex + NavigationIndex       │
-└─────────────────────────────────────────────────────────────────┘
-```
-
-Numbers in parentheses are stage priorities — lower values execute first. Stages at the same priority level run in parallel when their dependency graph allows it.
-
----
-
-## Stage Details
-
-### Parse (Priority 10)
-
-Parses raw documents into `RawNode` structures, preserving the source hierarchy.
-
-| Format | Strategy |
-|--------|----------|
-| **Markdown** | Splits by heading levels (`#`, `##`, `###`). Each heading becomes a node, content between headings becomes node content. |
-| **PDF** | Extracts text per page, groups into sections by layout analysis and font-size heuristics. |
-
-Each `RawNode` carries: title, content, heading level, line range, page number, and an estimated token count.
-
-### Build (Priority 20)
-
-Constructs an arena-based [`DocumentTree`](https://docs.rs/vectorless) from the parsed raw nodes.
-
-- Creates parent-child relationships based on heading level nesting
-- Applies **thinning** — collapses single-child chains where a parent has exactly one child and no meaningful content of its own. This reduces unnecessary tree depth
-- Assigns sequential node IDs and hierarchical structure indices (e.g. `"1"`, `"1.2"`, `"1.2.3"`)
-
-### Validate (Priority 22, optional)
-
-Checks tree integrity before expensive LLM stages run:
-
-- No orphaned nodes (every node reachable from root)
-- Consistent depth values (child depth = parent depth + 1)
-- Valid parent-child relationships in the arena
-
-Failures at this stage prevent broken trees from propagating downstream.
-
-### Split (Priority 25, optional)
-
-Splits oversized leaf nodes that exceed the token threshold (default: 4000 tokens).
-
-- Finds paragraph boundaries or heading-like patterns for clean splits
-- Each split becomes a new leaf node under the same parent
-- Preserves semantic coherence — splits don't break mid-sentence
-
-### Enhance (Priority 30)
-
-Generates LLM summaries for tree nodes. This is the most expensive stage (LLM API calls) and produces data that all downstream indexes consume.
-
-**Leaf nodes** receive content-oriented summaries:
-
-> *What does this section say?*
-
-**Non-leaf (branch) nodes** receive structured navigation output with three components:
-
-| Component | Purpose | Consumed by |
-|-----------|---------|-------------|
-| **OVERVIEW** | 2-3 sentence routing summary ("what this branch covers") | `NavEntry.overview` |
-| **QUESTIONS** | 3-5 typical questions this branch can answer | `NavEntry.question_hints` |
-| **TAGS** | 2-4 topic keywords for fast-path matching | `NavEntry.topic_tags` |
-
-Example LLM output for a non-leaf node:
-
-```
-OVERVIEW: Covers the Wix payment ecosystem, spanning payment processing, account management, and financial operations.
-QUESTIONS: How to set up Wix Payments?, What payout schedules are available?, How to handle chargebacks?
-TAGS: payments, billing, invoices, refunds
-```
-
-The stage supports four strategies:
-
-| Strategy | Behavior |
-|----------|----------|
-| **Full** | Summarize every node (default) |
-| **Selective** | Only branch nodes above a token threshold |
-| **Lazy** | Generate summaries on-demand at query time |
-| **None** | Skip LLM summaries entirely |
-
-All LLM calls run concurrently with configurable concurrency limits, and results are cached via the [MemoStore](../features/summary-strategies.mdx) for incremental re-indexing.
-
-### Enrich (Priority 40)
-
-Adds structural metadata to the tree — no LLM calls, pure computation:
-
-- **Page ranges** — Propagates page boundaries from leaves up to parent nodes, enabling page-level citation
-- **Token statistics** — Calculates total tokens and node counts per subtree
-- **Cross-reference resolution** — Parses inline references like "see Section 2.1" or "refer to Appendix G" and resolves them to actual `NodeId`s in the tree
-- **Document description** — Generates a description from the root summary
-
-### Reasoning Index (Priority 45)
-
-Builds a [`ReasoningIndex`](https://docs.rs/vectorless) — a flat keyword-to-node mapping optimized for traditional retrieval:
-
-| Field | Description |
-|-------|-------------|
-| `topic_paths` | Keywords → nodes with weighted mappings. Title keywords get 2.0×, summary 1.5×, content 1.0× |
-| `summary_shortcut` | Pre-computed document overview for "what is this about" queries |
-| `section_map` | Depth-1 section titles → `NodeId` for fast ToC lookup |
-| `hot_nodes` | Frequently retrieved nodes tracked over time |
-
-Optionally expands top keywords with **LLM-generated synonyms** at 0.6× weight, enabling fuzzy keyword matching without vector embeddings.
-
-### Navigation Index (Priority 50)
-
-Builds a [`NavigationIndex`](https://docs.rs/vectorless) — the primary data source for the retrieval Agent. This stage is pure data organization: no LLM calls, it restructures data produced by the Enhance stage.
-
-For every non-leaf node, it creates:
-
-**`NavEntry`** — routing metadata for the Agent to decide "should I enter this branch?"
-
-```rust
-pub struct NavEntry {
-    pub overview: String,          // Routing summary from Enhance
-    pub question_hints: Vec<String>, // Typical questions from Enhance
-    pub topic_tags: Vec<String>,   // Topic keywords from Enhance
-    pub leaf_count: usize,         // Total leaves in this subtree
-    pub level: usize,              // Depth in tree (root = 0)
-}
-```
-
-**`ChildRoute`** — compact routing info for one child, enabling progressive disclosure:
-
-```rust
-pub struct ChildRoute {
-    pub node_id: NodeId,
-    pub title: String,
-    pub description: String,       // One-sentence routing description
-    pub leaf_count: usize,         // Leaves in this child's subtree
-}
-```
-
-The Agent reads `child_routes` at each decision point to see all available sub-topics and their descriptions, then chooses where to navigate next — without accessing the content layer.
-
-This design is the in-memory equivalent of the [SKILL.md / INDEX.md files](https://arxiv.org/abs/2604.14572) described in the Corpus2Skill paper.
-
-### Optimize (Priority 60, optional)
-
-Final tree structure optimizations:
-
-- **Merge small leaves** — Adjacent sibling leaves below the token threshold are merged into a single node, with content prefixed by `## Title` to preserve boundaries
-- **Remove empty intermediates** — Non-leaf nodes with no content and exactly one child are marked for removal (collapsing the chain)
-
----
-
-## Data Flow
-
-```
-Document (md/pdf)
-      │
-      ▼
-  ┌─────────┐     ┌──────────────┐     ┌─────────────────┐
-  │ TreeNode  │     │ReasoningIndex│     │NavigationIndex  │
-  │ (content) │     │ (keyword →   │     │ (Node → NavEntry│
-  │           │     │  node map)   │     │ + ChildRoutes)  │
-  └────┬─────┘     └──────┬───────┘     └───────┬─────────┘
-       │                  │                      │
-       │    Retrieved by  │   Retrieved by       │
-       │    content       │   keyword lookup     │   Agent navigation
-       │    collection    │                      │
-       ▼                  ▼                      ▼
-  ┌──────────────────────────────────────────────────────┐
-  │                  Retrieval Phase                      │
-  │  Agent reads NavigationIndex to decide where to go,  │
-  │  then reads TreeNode.content only when needed.        │
-  └──────────────────────────────────────────────────────┘
-```
-
----
-
-## Usage
-
-```python
-from vectorless import Engine
-
-engine = Engine.builder().build()
-
-# Compile a document (runs all stages)
-result = engine.compile("./docs/")
-
-# Access the indexes
-print(f"Tree nodes: {result.node_count()}")
-print(f"Reasoning index keywords: {result.keyword_count()}")
-print(f"Navigation entries: {result.nav_entry_count()}")
-```
-
-```rust
-use vectorless::client::EngineBuilder;
-
-let engine = EngineBuilder::new().build()?;
-let result = engine.compile("./docs/").await?;
-
-println!("Tree nodes: {}", result.node_count());
-println!("Nav entries: {}", result.nav_entry_count());
-```
-
-## Configuration
-
-See [Indexing Configuration](./configuration.mdx) for all available options including summary strategies, token thresholds, and concurrency settings.
diff --git a/docs/docs/installation.mdx b/docs/docs/installation.mdx
new file mode 100644
index 00000000..15c23aaf
--- /dev/null
+++ b/docs/docs/installation.mdx
@@ -0,0 +1,23 @@
+---
+sidebar_position: 2
+---
+
+# Installation
+
+## Prerequisites
+
+- Python 3.9+
+- An LLM API key (OpenAI, or any OpenAI-compatible endpoint)
+
+## Install
+
+```bash
+pip install vectorless
+```
+
+## Verify
+
+```python
+import vectorless
+print(vectorless.__version__)
+```
diff --git a/docs/docs/intro.mdx b/docs/docs/intro.mdx
deleted file mode 100644
index eb13c61c..00000000
--- a/docs/docs/intro.mdx
+++ /dev/null
@@ -1,85 +0,0 @@
----
-sidebar_position: 1
----
-
-# Introduction
-
-**Vectorless** is a document understanding engine for AI. It compiles documents into structured trees of meaning, then dispatches multiple agents to reason through headings, sections, and paragraphs — evaluating how each part relates to the whole. The problem it solves is not "where to look", but "what does this mean in context". Every answer is a reasoning act, not a retrieval result.
-
-## How It Works
-
-1. **Parse** — Documents (Markdown, PDF) are parsed into hierarchical semantic trees, preserving structure and relationships between sections.
-2. **Index** — Trees are stored with metadata, keywords, and summaries. The pipeline resolves cross-references ("see Section 2.1") and expands keywords with LLM-generated synonyms for improved recall. Incremental indexing skips unchanged files via content fingerprinting.
-3. **Query** — An LLM-powered agent navigates the tree to find the most relevant sections. The Orchestrator coordinates multi-document queries, dispatching Workers that use `ls`, `cd`, `cat`, `find`, and `grep` commands to explore the tree and collect evidence.
-
-## Quick Start
-
-### Python
-
-```bash
-pip install vectorless
-```
-
-```python
-import asyncio
-from vectorless import Engine, IndexContext, QueryContext
-
-async def main():
-    engine = Engine(
-        api_key="sk-...",
-        model="gpt-4o",
-    )
-
-    result = await engine.index(IndexContext.from_path("./report.pdf"))
-    doc_id = result.doc_id
-
-    answer = await engine.query(
-        QueryContext("What is the total revenue?").with_doc_ids([doc_id])
-    )
-    print(answer.single().content)
-
-asyncio.run(main())
-```
-
-### Rust
-
-```toml
-[dependencies]
-vectorless = "0.1"
-```
-
-```rust
-use vectorless::{EngineBuilder, IndexContext, QueryContext};
-
-#[tokio::main]
-async fn main() -> vectorless::Result<()> {
-    let engine = EngineBuilder::new()
-        .with_key("sk-...")
-        .with_model("gpt-4o")
-        .build()
-        .await?;
-
-    let result = engine.index(IndexContext::from_path("./report.pdf")).await?;
-    let doc_id = result.doc_id().unwrap();
-
-    let result = engine.query(
-        QueryContext::new("What is the total revenue?").with_doc_ids(vec![doc_id.to_string()])
-    ).await?;
-    println!("{}", result.content);
-
-    Ok(())
-}
-```
-
-## Features
-
-- **Hierarchical Semantic Trees** — Preserves document structure, not flat chunks
-- **LLM-Powered Agent Navigation** — Worker agents navigate the tree using commands (ls, cd, cat, find, grep), making every decision through LLM reasoning
-- **Cross-Reference Resolution** — Automatically resolves "see Section 2.1", "Appendix G" references during indexing
-- **Synonym Expansion** — LLM-generated synonyms for indexed keywords improve recall for differently-worded queries
-- **Orchestrator Supervisor Loop** — Multi-document queries are coordinated by an LLM supervisor that dispatches Workers, evaluates evidence, and replans when needed
-- **Cross-Document Graph** — Automatic relationship discovery between documents via shared keywords
-- **Incremental Indexing** — Content fingerprinting skips unchanged files
-- **DocCard Catalog** — Lightweight document metadata index enables fast multi-document analysis without loading full documents
-- **Multi-Format** — Markdown and PDF support
-- **Zero Infrastructure** — Just an LLM API key, nothing else to deploy
diff --git a/docs/docs/retrieval/cross-references.mdx b/docs/docs/retrieval/cross-references.mdx
deleted file mode 100644
index 6ec4b7d2..00000000
--- a/docs/docs/retrieval/cross-references.mdx
+++ /dev/null
@@ -1,71 +0,0 @@
----
-sidebar_position: 4
----
-
-# Cross-Reference Navigation
-
-Documents often contain internal references like "see Section 2.1" or "refer to Appendix G". Vectorless automatically extracts and resolves these references, enabling the retrieval engine to follow them during search.
-
-## How It Works
-
-### Extraction
-
-During the enrich stage, the `ReferenceExtractor` scans node content for reference patterns:
-
-| Pattern | Example | Reference Type |
-|---------|---------|---------------|
-| `Section X.Y` | "see Section 2.1" | Section |
-| `Chapter X` | "Chapter 3" | Section |
-| `Appendix X` | "Appendix G" | Appendix |
-| `Table X.Y` | "Table 5.3" | Table |
-| `Figure X.Y` | "Figure 2.1" | Figure |
-| `Equation X.Y` | "Equation 2.3" | Equation |
-| `Page X` | "see page 42" | Page |
-
-### Resolution
-
-Extracted references are resolved to actual `NodeId`s in the tree:
-
-- **Section references** — Matched by structure index (e.g., "2.1" → node with structure "2.1")
-- **Appendix references** — Matched by title ("Appendix G")
-- **Table/Figure references** — Matched by title substring
-- **Page references** — Matched via the page index
-
-Resolved references are stored on the node with a confidence score.
-
-### Search Integration
-
-During retrieval, when the search algorithm expands a node's children, it also includes resolved reference targets:
-
-```text
-Node: "Results Overview"
-├── Child: "Performance Metrics"
-├── Child: "Comparison"
-└── Reference → "Appendix A: Raw Data"   ← followed during search
-```
-
-This means the search engine can jump from a section that references an appendix directly to that appendix, even though it's not a direct child in the tree.
-
-## Supported Reference Types
-
-- `Section` — Matches section numbers like "2.1", "3.2.1"
-- `Appendix` — Matches lettered appendices like "A", "G"
-- `Table` — Matches table numbers like "5.3"
-- `Figure` — Matches figure numbers like "2.1"
-- `Equation` — Matches equation numbers
-- `Page` — Matches page numbers
-
-## Example
-
-Given this document structure:
-
-```text
-1. Introduction
-2. Results
-   2.1. Performance
-   2.2. Analysis (content: "see Appendix A for raw data")
-3. Conclusion
-Appendix A: Raw Data
-```
-
-When the search reaches "2.2 Analysis", it will also have "Appendix A" as a candidate thanks to the resolved cross-reference. If the query asks about "raw data", the search can jump directly to the appendix.
diff --git a/docs/docs/retrieval/overview.mdx b/docs/docs/retrieval/overview.mdx
deleted file mode 100644
index 79ca9ab6..00000000
--- a/docs/docs/retrieval/overview.mdx
+++ /dev/null
@@ -1,61 +0,0 @@
----
-sidebar_position: 1
----
-
-# Retrieval Overview
-
-The retrieval pipeline transforms a user query into relevant document content by navigating the hierarchical tree structure with LLM-guided reasoning.
-
-## Pipeline Phases
-
-```text
-Query ──▶ Understand ──▶ Orchestrate ──▶ Navigate ──▶ Evaluate ──▶ Result
-              │               │              │            │
-              ▼               ▼              ▼            ▼
-          QueryPlan      DocCards +      ls/cd/cat/    Evidence
-          Intent +       Dispatch       find/grep     sufficiency
-          Concepts       Workers                      check
-```
-
-### Understand
-
-- LLM analyzes the query to extract intent, key concepts, and strategy hints
-- Produces a `QueryPlan` that guides the entire retrieval process
-- Extracts BM25 keywords for index lookup and evidence scoring
-
-### Orchestrate
-
-- LLM reviews DocCard metadata (lightweight, no full document loading)
-- Selects relevant documents and assigns specific tasks to each
-- When user specifies doc_ids directly, skips analysis and dispatches immediately
-- Fan-out Workers in parallel — one per document
-
-### Navigate
-
-Each Worker navigates its assigned document through a command loop:
-
-1. **Bird's-eye** — `ls` at root to see the document structure
-2. **Plan** — LLM generates a navigation plan using keyword index hits
-3. **Command loop** — LLM picks a command (`ls`, `cd`, `cat`, `find`, `grep`, etc.), executes it, observes the result, and repeats
-4. **Collect evidence** — `cat` automatically saves node content as evidence
-
-### Evaluate
-
-- Workers can self-evaluate with `check` — an LLM assesses evidence sufficiency
-- Orchestrator evaluates overall evidence across all Workers
-- If insufficient, the Orchestrator triggers a replan and dispatches additional Workers
-
-### Result
-
-- Deduplicate and rank collected evidence by BM25 relevance score
-- Return original document text with source attribution — no LLM synthesis
-
-## Quick Selection Guide
-
-| Use Case | Flow |
-|----------|------|
-| Single document, specific question | Worker dispatched directly → navigate → collect evidence |
-| Single document, broad exploration | Worker with navigation plan → multi-round exploration |
-| Multiple documents | Orchestrator analyzes DocCards → dispatches Workers per document |
-| Workspace-wide query | Orchestrator reviews all DocCards → selects relevant documents |
-| Specified doc_ids | Skip Orchestrator analysis → direct Worker dispatch |
diff --git a/docs/docs/retrieval/search-algorithms.mdx b/docs/docs/retrieval/search-algorithms.mdx
deleted file mode 100644
index 5482133f..00000000
--- a/docs/docs/retrieval/search-algorithms.mdx
+++ /dev/null
@@ -1,119 +0,0 @@
----
-sidebar_position: 3
----
-
-# Worker Navigation Commands
-
-Workers navigate document trees using a set of commands that mimic filesystem operations. Each command is selected by the LLM based on the current context, collected evidence, and navigation plan.
-
-## Command Overview
-
-| Command | Purpose | Collects Evidence |
-|---------|---------|-------------------|
-| **`ls`** | List children at current position | No |
-| **`cd <name>`** | Navigate into a child node | No |
-| **`cd ..`** | Navigate back to parent | No |
-| **`cat <name>`** | Read node content | **Yes** |
-| **`head <name>`** | Preview first N lines | No |
-| **`find <keyword>`** | Search ReasoningIndex | No |
-| **`findtree <pattern>`** | Search by title pattern | No |
-| **`grep <pattern>`** | Regex search subtree content | No |
-| **`wc <name>`** | Show content size | No |
-| **`pwd`** | Show current path | No |
-| **`check`** | Evaluate evidence sufficiency | No |
-| **`done`** | End navigation | No |
-
-## Navigation Strategy
-
-Workers follow a priority-ordered strategy for efficient navigation:
-
-### 1. Keyword-First (Preferred)
-
-When the ReasoningIndex has keyword matches for the query, Workers use `find` to jump directly to relevant sections:
-
-```text
-Keyword matches available:
-  'revenue' → root/Financial Statements/Revenue (weight 0.85)
-
-Worker: find revenue
-Result: Found in "Revenue" section at depth 2
-Worker: cd "Financial Statements"
-Worker: cd Revenue
-Worker: cat .
-```
-
-This avoids manual tree traversal and is the fastest path to relevant content.
-
-### 2. Manual Exploration
-
-When no keyword hints are available, Workers explore the tree manually:
-
-```text
-Worker: ls
-Result: [1] Introduction (3 leaves), [2] Architecture (5 leaves), [3] Performance (4 leaves)
-Worker: cd Architecture
-Worker: ls
-Result: [1] Overview (2 leaves), [2] Components (3 leaves)
-```
-
-### 3. Title Search
-
-When the section name is known but not the exact path:
-
-```text
-Worker: findtree performance
-Result: Matches: "Performance" (depth 1), "Performance Metrics" (depth 2)
-```
-
-## Target Resolution
-
-When a Worker issues `cd`, `cat`, or `head` with a target name, the system resolves it using multi-level matching:
-
-| Priority | Match Type | Example |
-|----------|-----------|---------|
-| 1 | Exact title match | `"Revenue"` → Revenue |
-| 2 | Case-insensitive match | `"revenue"` → Revenue |
-| 3 | Substring (contains) match | `"rev"` → Revenue |
-| 4 | Numeric index | `"1"` → first child |
-
-## Evidence Collection
-
-The `cat` command is the primary evidence collection mechanism:
-
-- `cat <name>` — Read a child node's content and save as evidence
-- `cat` (no argument) — Read the current node's content (useful at leaf nodes)
-- Evidence is automatically deduplicated — the Worker tracks visited nodes and avoids re-reading
-
-## Self-Evaluation
-
-The `check` command triggers an LLM-based sufficiency evaluation:
-
-```text
-Worker: check
-LLM evaluates: "Is the collected evidence sufficient to answer the query?"
-Response: SUFFICIENT — evidence contains revenue figures matching the query
-Worker: done
-```
-
-If evidence is insufficient, the response includes what's missing, triggering a dynamic re-plan.
-
-## Dynamic Re-planning
-
-After `check` finds insufficient evidence, the Worker generates a new navigation plan:
-
-1. LLM reviews the query, current evidence, and missing information
-2. Generates an updated navigation plan targeting the gaps
-3. Worker follows the new plan in subsequent rounds
-
-This allows Workers to adapt their strategy when initial plans don't yield sufficient results.
-
-## Budget Controls
-
-Workers operate within configurable budgets:
-
-| Parameter | Default | Description |
-|-----------|---------|-------------|
-| `max_rounds` | 8 | Maximum navigation rounds |
-| `max_llm_calls` | 12 | Maximum LLM calls per Worker |
-
-These prevent runaway navigation loops while giving Workers enough room for multi-hop exploration.
diff --git a/docs/docs/retrieval/strategies.mdx b/docs/docs/retrieval/strategies.mdx
deleted file mode 100644
index fc4e1d45..00000000
--- a/docs/docs/retrieval/strategies.mdx
+++ /dev/null
@@ -1,87 +0,0 @@
----
-sidebar_position: 2
----
-
-# Retrieval Strategies
-
-Vectorless uses a unified agent-based retrieval approach where the Orchestrator and Workers coordinate through LLM reasoning. The strategy adapts automatically based on query scope and complexity.
-
-## Strategy Overview
-
-| Mode | When | LLM Calls | Description |
-|------|------|-----------|-------------|
-| **Direct Dispatch** | User specifies doc_ids | Medium | Skip Orchestrator analysis, dispatch Worker directly |
-| **Single-Document** | One relevant document found | Medium | Orchestrator analyzes, dispatches one Worker |
-| **Multi-Document** | Multiple relevant documents | High | Orchestrator selects docs, dispatches parallel Workers |
-| **Workspace** | No scope specified | High | Orchestrator reviews all DocCards, selects relevant docs |
-
-## Direct Dispatch
-
-When the user specifies document IDs in the query context, the system skips Orchestrator analysis and dispatches Workers directly to each specified document.
-
-```python
-from vectorless import QueryContext
-
-answer = await engine.query(
-    QueryContext("What is the total revenue?").with_doc_ids([doc_id])
-)
-```
-
-This is the fastest path — no DocCard analysis LLM call, just direct navigation.
-
-## Single-Document Retrieval
-
-The Orchestrator analyzes the query and available DocCards, determines one document is relevant, and dispatches a single Worker to navigate it.
-
-```python
-answer = await engine.query(
-    QueryContext("What are the growth trends?").with_doc_ids([doc_id])
-)
-```
-
-The Worker follows a plan-navigate-evaluate loop:
-1. Generate a navigation plan from keyword index hits
-2. Navigate the tree using commands (ls, cd, cat, find, grep)
-3. Self-evaluate evidence sufficiency with `check`
-4. Return collected evidence
-
-## Multi-Document Retrieval
-
-The Orchestrator identifies multiple relevant documents and dispatches Workers in parallel — one per document, each with a specific sub-task.
-
-```python
-answer = await engine.query(
-    QueryContext("Compare quarterly revenue across reports").with_doc_ids([doc_id_1, doc_id_2])
-)
-```
-
-The Orchestrator's supervisor loop:
-1. **Analyze** — LLM reviews DocCards and creates a dispatch plan with per-document tasks
-2. **Dispatch** — Fan-out Workers in parallel
-3. **Evaluate** — LLM checks if combined evidence is sufficient
-4. **Replan** (if insufficient) — LLM identifies gaps and dispatches additional Workers
-
-## Workspace Retrieval
-
-When no document scope is specified, the Orchestrator reviews all indexed documents via the lightweight DocCard catalog.
-
-```python
-answer = await engine.query(
-    QueryContext("What documents discuss performance?")
-)
-```
-
-The DocCard catalog (`catalog.bin`) stores lightweight metadata for each document — enabling fast analysis without loading full document trees. This is critical for workspaces with many documents.
-
-## Query Understanding
-
-Every query (regardless of mode) passes through LLM-based understanding that produces a `QueryPlan`:
-
-| Field | Description |
-|-------|-------------|
-| **Intent** | Factual, Analytical, Navigational, or Summary |
-| **Strategy Hint** | focused, exploratory, comparative, or summary |
-| **Key Concepts** | LLM-extracted concepts from the query |
-| **Keywords** | BM25 keywords for index lookup and evidence scoring |
-
-The QueryPlan guides Worker navigation — for example, a "factual" intent with a "focused" strategy hint tells the Worker to look for a specific answer rather than exploring broadly.
diff --git a/docs/docs/sdk/python.mdx b/docs/docs/sdk/python.mdx
deleted file mode 100644
index d26b6f4e..00000000
--- a/docs/docs/sdk/python.mdx
+++ /dev/null
@@ -1,159 +0,0 @@
----
-sidebar_position: 1
----
-
-# Python SDK
-
-The Python SDK provides an async API built on PyO3 for high-performance integration with Python applications.
-
-## Installation
-
-```bash
-pip install vectorless
-```
-
-## Engine
-
-The `Engine` is the main entry point. It requires an LLM API key and model name.
-
-```python
-from vectorless import Engine
-
-engine = Engine(
-    api_key="sk-...",      # LLM API key
-    model="gpt-4o",        # LLM model name
-    endpoint=None,         # Optional: custom API endpoint
-)
-```
-
-## Indexing
-
-### From a File
-
-```python
-from vectorless import IndexContext, IndexOptions
-
-result = await engine.index(IndexContext.from_path("./report.pdf"))
-print(result.doc_id)  # Document ID for querying
-```
-
-### From Multiple Files
-
-```python
-result = await engine.index(IndexContext.from_paths(["./a.pdf", "./b.md"]))
-```
-
-### From a Directory
-
-```python
-result = await engine.index(IndexContext.from_dir("./documents/"))
-```
-
-### From Text Content
-
-```python
-result = await engine.index(
-    IndexContext.from_content("# Title\n\nContent...", "markdown").with_name("my-doc")
-)
-```
-
-### With Options
-
-```python
-result = await engine.index(
-    IndexContext.from_path("./report.pdf").with_options(
-        IndexOptions(
-            mode="force",
-            generate_summaries=True,
-            enable_synonym_expansion=True,
-        )
-    )
-)
-```
-
-## Querying
-
-### Single Document
-
-```python
-from vectorless import QueryContext
-
-answer = await engine.query(
-    QueryContext("What is the total revenue?")
-    .with_doc_ids([doc_id])
-)
-
-if answer.single():
-    print(answer.single().content)
-    print(f"Score: {answer.single().score}")
-```
-
-### Multiple Documents
-
-```python
-answer = await engine.query(
-    QueryContext("Compare the approaches")
-    .with_doc_ids(["doc-1", "doc-2"])
-)
-```
-
-### Workspace Query
-
-```python
-answer = await engine.query(
-    QueryContext("What documents discuss performance?")
-)
-```
-
-### Query Options
-
-```python
-answer = await engine.query(
-    QueryContext("Explain the architecture")
-    .with_doc_ids([doc_id])
-    .with_timeout_secs(60)              # Per-operation timeout
-    .with_force_analysis(True)          # Force Orchestrator analysis
-)
-```
-
-## Document Management
-
-```python
-# List all indexed documents
-docs = await engine.list()
-for doc in docs:
-    print(f"{doc.id}: {doc.name} ({doc.format})")
-
-# Check if a document exists
-exists = await engine.exists(doc_id)
-
-# Remove a document
-removed = await engine.remove(doc_id)
-
-# Remove all documents
-count = await engine.clear()
-```
-
-## Document Graph
-
-```python
-graph = await engine.get_graph()
-if graph:
-    print(f"Nodes: {graph.node_count()}, Edges: {graph.edge_count()}")
-    neighbors = graph.get_neighbors(doc_id)
-    for edge in neighbors:
-        print(f"  → {edge.target_doc_id} (weight: {edge.weight:.2f})")
-```
-
-## API Reference
-
-### IndexOptions
-
-| Parameter | Type | Default | Description |
-|-----------|------|---------|-------------|
-| `mode` | `str` | `"default"` | Indexing mode |
-| `generate_summaries` | `bool` | `True` | Generate LLM summaries |
-| `generate_description` | `bool` | `False` | Generate document description |
-| `include_text` | `bool` | `True` | Include node text |
-| `generate_ids` | `bool` | `True` | Generate node IDs |
-| `enable_synonym_expansion` | `bool` | `True` | LLM synonym expansion |
diff --git a/docs/docs/sdk/rust.mdx b/docs/docs/sdk/rust.mdx
deleted file mode 100644
index c9e3efc1..00000000
--- a/docs/docs/sdk/rust.mdx
+++ /dev/null
@@ -1,123 +0,0 @@
----
-sidebar_position: 2
----
-
-# Rust Crate
-
-The Rust crate provides the core engine with full control over the indexing and retrieval pipeline.
-
-## Installation
-
-```toml
-[dependencies]
-vectorless = "0.1"
-```
-
-## Engine
-
-```rust
-use vectorless::{Engine, EngineBuilder};
-
-let engine = EngineBuilder::new()
-    .with_key("sk-...")
-    .with_model("gpt-4o")
-    .with_endpoint("https://api.openai.com/v1") // optional
-    .build()
-    .await?;
-```
-
-## Indexing
-
-```rust
-use vectorless::{IndexContext, IndexOptions, IndexMode};
-
-// From a file
-let result = engine.index(IndexContext::from_path("./report.pdf")).await?;
-let doc_id = result.doc_id().unwrap();
-
-// From content
-let result = engine.index(
-    IndexContext::from_content("# Title\n\nContent...", DocumentFormat::Markdown)
-).await?;
-
-// With options
-let opts = IndexOptions::new()
-    .with_mode(IndexMode::Force);
-let result = engine.index(
-    IndexContext::from_path("./report.pdf").with_options(opts)
-).await?;
-```
-
-## Querying
-
-```rust
-use vectorless::QueryContext;
-
-let result = engine.query(
-    QueryContext::new("What is the total revenue?")
-        .with_doc_ids(vec![doc_id.to_string()])
-        .with_timeout_secs(60)
-).await?;
-
-if let Some(item) = result.single() {
-    println!("Score: {:.2}", item.score);
-    println!("Content: {}", item.content);
-}
-```
-
-## Document Management
-
-```rust
-// List documents
-for doc in engine.list().await? {
-    println!("{}: {} ({})", doc.id, doc.name, doc.format);
-}
-
-// Check existence
-let exists = engine.exists(&doc_id).await?;
-
-// Remove
-let removed = engine.remove(&doc_id).await?;
-
-// Clear all
-let count = engine.clear().await?;
-```
-
-## Document Graph
-
-```rust
-if let Some(graph) = engine.get_graph().await? {
-    println!("Nodes: {}", graph.node_count());
-    println!("Edges: {}", graph.edge_count());
-
-    for edge in graph.get_neighbors(&doc_id) {
-        println!("→ {} (weight: {:.2})", edge.target_doc_id, edge.weight);
-    }
-}
-```
-
-## Advanced Configuration
-
-### Summary Strategy
-
-```rust
-use vectorless::index::summary::SummaryStrategy;
-
-// Full summaries (default)
-let strategy = SummaryStrategy::full();
-
-// Selective — only branch nodes with >= 100 tokens
-let strategy = SummaryStrategy::selective(100, true);
-
-// Lazy — generate at query time
-let strategy = SummaryStrategy::lazy(true);
-```
-
-### Pipeline Options
-
-```rust
-use vectorless::index::PipelineOptions;
-
-let options = PipelineOptions::default();
-// Pass to indexer via the engine builder or context
-```
diff --git a/docs/docusaurus.config.ts b/docs/docusaurus.config.ts
index 76f4f877..758f9bd4 100644
--- a/docs/docusaurus.config.ts
+++ b/docs/docusaurus.config.ts
@@ -73,10 +73,7 @@ const config: Config = {
         target: '_self' // This makes the logo click follow the link in the same window
       },
       items: [
-        {to: '/docs/sdk/python', label: 'Python', position: 'left'},
-        {to: '/docs/sdk/rust', label: 'Rust', position: 'left'},
-        {to: '/docs/intro', label: 'Documentation', position: 'left'},
-        // {to: '/blog', label: 'Blog', position: 'left'},
+        {to: '/docs/getting-started', label: 'Documentation', position: 'left'},
       ],
     },
     prism: {
diff --git a/docs/sidebars.ts b/docs/sidebars.ts
index 4b5877c2..1c2d1db8 100644
--- a/docs/sidebars.ts
+++ b/docs/sidebars.ts
@@ -2,46 +2,15 @@ import type {SidebarsConfig} from '@docusaurus/plugin-content-docs';
 
 const sidebars: SidebarsConfig = {
   tutorialSidebar: [
-    'intro',
-    'getting-started',
-    'architecture',
-    {
-      type: 'category',
-      label: 'Indexing',
-      items: [
-        'indexing/overview',
-        'indexing/configuration',
-        'indexing/incremental',
-      ],
-    },
-    {
-      type: 'category',
-      label: 'Retrieval',
-      items: [
-        'retrieval/overview',
-        'retrieval/strategies',
-        'retrieval/search-algorithms',
-        'retrieval/cross-references',
-      ],
-    },
     {
       type: 'category',
-      label: 'Features',
+      label: 'Get Started',
       items: [
-        'features/summary-strategies',
-        'features/synonym-expansion',
-        'features/cross-document-graph',
-        'features/pdf-support',
-      ],
-    },
-    {
-      type: 'category',
-      label: 'SDK',
-      items: [
-        'sdk/python',
-        'sdk/rust',
+        'getting-started',
+        'installation',
       ],
     },
+    'architecture',
     {
       type: 'category',
       label: 'RFC',
@@ -51,15 +20,6 @@ const sidebars: SidebarsConfig = {
       ],
     },
     'api-reference',
-    {
-      type: 'category',
-      label: 'Examples',
-      items: [
-        'examples/quick-query',
-        'examples/multi-document',
-        'examples/batch-indexing',
-      ],
-    },
   ],
 };
 
diff --git a/docs/src/css/custom.css b/docs/src/css/custom.css
index e808cee0..9c5bb996 100644
--- a/docs/src/css/custom.css
+++ b/docs/src/css/custom.css
@@ -137,6 +137,12 @@ nav.navbar,
   color: var(--primary) !important;
 }
 
+/* ===== Homepage Code Syntax Highlighting ===== */
+.kw { color: #FF7B72; font-weight: 600; }
+.fn { color: #D2A8FF; }
+.str { color: #A5D6FF; }
+.cmt { color: #8B949E; font-style: italic; }
+
 /* ===== Prevent scroll on homepage ===== */
 html:has(.heroBanner) {
   overflow: hidden;
diff --git a/docs/src/pages/index.module.css b/docs/src/pages/index.module.css
index 233e81e5..c1d265ef 100644
--- a/docs/src/pages/index.module.css
+++ b/docs/src/pages/index.module.css
@@ -1,22 +1,24 @@
 /**
  * Homepage styles for Vectorless.
- * Uses rose color scheme tokens from custom.css.
  */
 
 /* ===== Hero Banner ===== */
 .heroBanner {
   margin: 0;
   padding: 0;
-  height: calc(100vh - 68px);
+  min-height: calc(100vh - 68px);
   overflow: hidden;
   position: relative;
   display: flex;
+  flex-direction: column;
   align-items: center;
   justify-content: center;
+  gap: 48px;
   background-color: var(--bg);
   font-family: 'Space Grotesk', sans-serif;
   color: var(--text);
   line-height: 1.5;
+  padding: 40px 24px;
 }
 
 /* ===== Stats widget (top-right corner) ===== */
@@ -29,237 +31,195 @@
 
 /* ===== Hero Container ===== */
 .hero {
-  max-width: 1280px;
-  width: 90%;
+  max-width: 1120px;
+  width: 100%;
   margin: 0 auto;
   display: flex;
   flex-wrap: wrap;
   align-items: center;
-  gap: 36px;
-  padding: 32px 28px;
-  background: radial-gradient(circle at 20% 30%, var(--primary-soft), transparent 70%);
-  border-radius: 48px;
+  gap: 48px;
 }
 
-/* ===== Left: Brand + Features ===== */
+/* ===== Left: Brand + CTA ===== */
 .heroContent {
-  flex: 1.2;
-  min-width: 280px;
+  flex: 1.1;
+  min-width: 300px;
 }
 
 .mainTitle {
-  font-size: 3.8rem;
+  font-size: 4rem;
   font-weight: 800;
-  letter-spacing: -0.03em;
+  letter-spacing: -0.04em;
   background: linear-gradient(135deg, var(--text) 0%, var(--text-light) 80%);
   -webkit-background-clip: text;
   background-clip: text;
   color: transparent;
-  margin-bottom: 8px;
-  line-height: 1.1;
+  margin-bottom: 12px;
+  line-height: 1.05;
 }
 
-.subTitle {
-  font-size: 1.4rem;
-  font-weight: 500;
+.tagline {
+  font-size: 1.2rem;
+  font-weight: 600;
   color: var(--primary);
-  margin-bottom: 24px;
-  letter-spacing: -0.2px;
-  border-left: 3px solid var(--primary);
-  padding-left: 16px;
+  margin-bottom: 16px;
+  font-style: italic;
 }
 
-/* ===== Feature Pills ===== */
-.featureList {
-  display: flex;
-  flex-direction: column;
-  gap: 14px;
-  margin-bottom: 24px;
-}
-
-.featureItem {
-  display: flex;
-  align-items: center;
-  gap: 14px;
-  font-size: 1rem;
-  font-weight: 500;
-  background: var(--primary-soft);
-  padding: 10px 16px;
-  border-radius: 60px;
-  width: fit-content;
-  backdrop-filter: blur(4px);
-}
-
-.featureIcon {
-  width: 32px;
-  height: 32px;
-  background: var(--primary-soft);
-  border-radius: 50%;
-  display: inline-flex;
-  align-items: center;
-  justify-content: center;
-  color: var(--primary);
+.subTitle {
   font-size: 1rem;
+  font-weight: 400;
+  color: var(--text-light);
+  margin-bottom: 32px;
+  line-height: 1.6;
+  max-width: 480px;
 }
 
-/* ===== GitHub Star Button ===== */
+/* ===== Buttons ===== */
 .heroActions {
   display: flex;
-  gap: 1rem;
-  justify-content: flex-start;
+  gap: 12px;
   align-items: center;
   flex-wrap: wrap;
 }
 
-.githubStarButton {
+.primaryButton {
   display: inline-flex;
   align-items: center;
   gap: 0.5rem;
-  padding: 0.9rem 2rem;
-  font-size: 1rem;
+  padding: 0.85rem 2rem;
+  font-size: 0.95rem;
   font-weight: 600;
   border-radius: 999px;
-  background-color: var(--text);
-  color: var(--bg);
+  background-color: var(--primary);
+  color: #fff;
   text-decoration: none;
   transition: transform 0.15s, box-shadow 0.15s;
   font-family: 'Space Grotesk', sans-serif;
 }
 
-.githubStarButton:hover {
-  color: var(--bg);
+.primaryButton:hover {
+  color: #fff;
   text-decoration: none;
   transform: translateY(-2px);
-  box-shadow: 0 8px 24px rgba(0, 0, 0, 0.25);
-}
-
-.starIcon {
-  margin-left: 0.4rem;
+  box-shadow: 0 8px 24px rgba(175, 120, 139, 0.35);
 }
 
-/* ===== Right: Principles Card ===== */
-.heroPrinciples {
-  flex: 0.9;
-  min-width: 280px;
-  background: var(--bg-offset);
-  backdrop-filter: blur(12px);
+.secondaryButton {
+  display: inline-flex;
+  align-items: center;
+  gap: 0.5rem;
+  padding: 0.85rem 1.8rem;
+  font-size: 0.95rem;
+  font-weight: 600;
+  border-radius: 999px;
+  background-color: transparent;
+  color: var(--text);
   border: 1px solid var(--border);
-  border-radius: 40px;
-  padding: 24px 22px;
-  box-shadow: 0 20px 35px -12px rgba(0,0,0,0.2);
-  transition: transform 0.2s ease, border-color 0.2s ease;
+  text-decoration: none;
+  transition: transform 0.15s, border-color 0.15s;
+  font-family: 'Space Grotesk', sans-serif;
 }
 
-.heroPrinciples:hover {
+.secondaryButton:hover {
+  color: var(--text);
+  text-decoration: none;
   border-color: var(--primary);
-  transform: translateY(-4px);
-}
-
-.principlesTitle {
-  font-size: 1rem;
-  text-transform: uppercase;
-  letter-spacing: 2px;
-  color: var(--primary);
-  margin-bottom: 18px;
-  display: flex;
-  align-items: center;
-  gap: 8px;
-}
-
-.principle {
-  margin-bottom: 18px;
-  padding-bottom: 14px;
-  border-bottom: 1px solid var(--border);
+  transform: translateY(-2px);
 }
 
-.principle:last-of-type {
-  border-bottom: none;
-  margin-bottom: 0;
-  padding-bottom: 0;
+/* ===== Right: Code Card ===== */
+.codeCard {
+  flex: 0.9;
+  min-width: 340px;
+  background: #0D1117 !important;
+  border-radius: 12px;
+  overflow: hidden;
+  border: 1px solid #30363D;
+  box-shadow: 0 16px 48px -8px rgba(0, 0, 0, 0.4);
 }
 
-.principleHead {
-  font-weight: 700;
-  font-size: 1rem;
-  margin-bottom: 6px;
+.codeHeader {
   display: flex;
   align-items: center;
   gap: 8px;
-  color: var(--text);
+  padding: 12px 16px;
+  background: #161B22;
+  border-bottom: 1px solid #30363D;
 }
 
-.principleDesc {
-  font-size: 0.8rem;
-  color: var(--text-light);
-  line-height: 1.4;
+.codeDot {
+  width: 12px;
+  height: 12px;
+  border-radius: 50%;
 }
 
-.badgeRust {
-  background: var(--primary-soft);
-  color: var(--primary);
-  border-radius: 30px;
-  padding: 2px 8px;
-  font-size: 0.7rem;
-  font-weight: 500;
+.codeTitle {
   margin-left: 8px;
+  font-size: 0.78rem;
+  color: #8B949E;
+  font-family: 'Space Grotesk', sans-serif;
 }
 
-.principlesFooter {
-  margin-top: 14px;
-  font-size: 0.7rem;
-  text-align: right;
-  opacity: 0.6;
-  border-top: 1px solid var(--border);
-  padding-top: 10px;
-  display: flex;
-  align-items: center;
-  justify-content: flex-end;
-  gap: 6px;
-  color: var(--text-light);
+.codeBlock {
+  margin: 0;
+  padding: 20px;
+  font-size: 0.82rem;
+  line-height: 1.75;
+  color: #E6EDF3 !important;
+  background: transparent !important;
+  font-family: 'SF Mono', 'Fira Code', 'Consolas', monospace;
+  overflow-x: auto;
+  white-space: pre;
 }
 
+
 /* ===== Responsive ===== */
-@media (max-width: 880px) {
+@media (max-width: 960px) {
   .hero {
     flex-direction: column;
-    padding: 40px 20px;
+    align-items: center;
   }
 
-  .mainTitle {
-    font-size: 2.8rem;
+  .heroContent {
+    text-align: center;
   }
 
   .subTitle {
-    font-size: 1.2rem;
+    max-width: 100%;
+  }
+
+  .heroActions {
+    justify-content: center;
   }
 
-  .featureItem {
+  .mainTitle {
+    font-size: 3rem;
+  }
+
+  .codeCard {
+    min-width: 0;
     width: 100%;
   }
 }
 
-@media (max-width: 480px) {
-  .heroBanner {
-    align-items: flex-start;
-    padding-top: 80px;
+@media (max-width: 640px) {
+  .principles {
+    flex-direction: column;
+    align-items: center;
+    gap: 24px;
   }
 
   .mainTitle {
-    font-size: 2.2rem;
+    font-size: 2.4rem;
   }
 
-  .subTitle {
+  .tagline {
     font-size: 1.05rem;
-    padding-left: 12px;
-  }
-
-  .featureItem {
-    font-size: 0.9rem;
-    padding: 8px 12px;
   }
 
-  .heroPrinciples {
-    border-radius: 24px;
-    padding: 20px 16px;
+  .heroBanner {
+    padding: 32px 16px;
+    gap: 32px;
   }
 }
diff --git a/docs/src/pages/index.tsx b/docs/src/pages/index.tsx
index d506d453..50d048cf 100644
--- a/docs/src/pages/index.tsx
+++ b/docs/src/pages/index.tsx
@@ -2,118 +2,79 @@ import type {ReactNode} from 'react';
 import useDocusaurusContext from '@docusaurus/useDocusaurusContext';
 import Layout from '@theme/Layout';
 import Link from '@docusaurus/Link';
-import GitHubStats from '@site/src/components/GitHubStats';
 
 import styles from './index.module.css';
 
-/* ===== Hamster SVG Icon ===== */
-function HamsterIcon({size = 14}: {size?: number}) {
-  return (
-    <svg width={size} height={size} viewBox="0 0 32 32" fill="none" xmlns="http://www.w3.org/2000/svg">
-      <ellipse cx="16" cy="19" rx="11" ry="9" fill="var(--primary, #AF788B)"/>
-      <circle cx="16" cy="11" r="8" fill="var(--primary, #AF788B)"/>
-      <ellipse cx="10" cy="4.5" rx="3.2" ry="3.8" fill="var(--primary, #AF788B)"/>
-      <ellipse cx="10" cy="4.5" rx="2" ry="2.6" fill="var(--primary-light, #C9A0AE)"/>
-      <ellipse cx="22" cy="4.5" rx="3.2" ry="3.8" fill="var(--primary, #AF788B)"/>
-      <ellipse cx="22" cy="4.5" rx="2" ry="2.6" fill="var(--primary-light, #C9A0AE)"/>
-      <circle cx="10" cy="13" r="2.8" fill="var(--primary-light, #C9A0AE)"/>
-      <circle cx="22" cy="13" r="2.8" fill="var(--primary-light, #C9A0AE)"/>
-      <circle cx="13" cy="10" r="1.6" fill="#1E293B"/>
-      <circle cx="19" cy="10" r="1.6" fill="#1E293B"/>
-      <circle cx="13.5" cy="9.5" r="0.5" fill="#fff"/>
-      <circle cx="19.5" cy="9.5" r="0.5" fill="#fff"/>
-      <ellipse cx="16" cy="13" rx="1.2" ry="0.8" fill="var(--primary-dark, #8B5E6F)"/>
-      <path d="M14.5 14.2 Q16 15.5 17.5 14.2" stroke="var(--primary-deeper, #6D4A58)" strokeWidth="0.6" fill="none" strokeLinecap="round"/>
-      <ellipse cx="16" cy="21" rx="6" ry="4.5" fill="var(--primary-light, #C9A0AE)"/>
-      <ellipse cx="7.5" cy="22" rx="2" ry="1.2" fill="var(--primary-dark, #8B5E6F)"/>
-      <ellipse cx="24.5" cy="22" rx="2" ry="1.2" fill="var(--primary-dark, #8B5E6F)"/>
-    </svg>
-  );
-}
-
-/* ===== Hero ===== */
 function HomepageHeader() {
   return (
     <header className={styles.heroBanner}>
-      <div className={styles.statsCorner}>
-        <GitHubStats />
-      </div>
       <div className={styles.hero}>
-        {/* Left: Brand + Features */}
+        {/* Left: Brand + Tagline + CTA */}
         <div className={styles.heroContent}>
           <h1 className={styles.mainTitle}>Vectorless</h1>
-          <p className={styles.subTitle}>Document Understanding Engine for AI</p>
-
-          <div className={styles.featureList}>
-            <div className={styles.featureItem}>
-              <span>Open source by design</span>
-            </div>
-            <div className={styles.featureItem}>
-              <span>Rust-powered · Python ecosystem</span>
-            </div>
-            <div className={styles.featureItem}>
-              <span>Rules of Three — no exceptions</span>
-            </div>
-          </div>
+          <p className={styles.tagline}>Knowing by reasoning, not vectors.</p>
+          <p className={styles.subTitle}>
+            Deep and reliable. Vectorless plays nicely with your documents.
+            Ask questions in plain language; get answers by reasoning with Vectorless.
+          </p>
 
           <div className={styles.heroActions}>
             <Link
-              className={styles.githubStarButton}
+              className={styles.primaryButton}
+              to="/docs/getting-started">
+              Get Started
+            </Link>
+            <Link
+              className={styles.secondaryButton}
               href="https://github.com/vectorlessflow/vectorless"
               target="_blank"
               rel="noopener noreferrer">
-              <svg stroke="currentColor" fill="currentColor" strokeWidth="0" viewBox="0 0 496 512" height="18" width="18" xmlns="http://www.w3.org/2000/svg"><path d="M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6zm-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3zm44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9zM244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8zM97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1zm-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7zm32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1zm-11.4-14.7c-1.6 1-1.6 3.6 0 5.9 1.6 2.3 4.3 3.3 5.6 2.3 1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2z"></path></svg>
-              Star on GitHub
-              <svg className={styles.starIcon} stroke="currentColor" fill="currentColor" strokeWidth="0" viewBox="0 0 24 24" height="18" width="18" xmlns="http://www.w3.org/2000/svg"><path d="M16.6,20.463a1.5,1.5,0,0,1-.7-.174l-3.666-1.927a.5.5,0,0,0-.464,0L8.1,20.289a1.5,1.5,0,0,1-2.177-1.581l.7-4.082a.5.5,0,0,0-.143-.442L3.516,11.293a1.5,1.5,0,0,1,.832-2.559l4.1-.6a.5.5,0,0,0,.376-.273l1.833-3.714a1.5,1.5,0,0,1,2.69,0l1.833,3.714a.5.5,0,0,0,.376.274l4.1.6a1.5,1.5,0,0,1,.832,2.559l-2.965,2.891a.5.5,0,0,0-.144.442l.7,4.082A1.5,1.5,0,0,1,16.6,20.463Zm-3.9-2.986L16.364,19.4a.5.5,0,0,0,.725-.527l-.7-4.082a1.5,1.5,0,0,1,.432-1.328l2.965-2.89a.5.5,0,0,0-.277-.853l-4.1-.6a1.5,1.5,0,0,1-1.13-.821L12.449,4.594a.516.516,0,0,0-.9,0L9.719,8.308a1.5,1.5,0,0,1-1.13.82l-4.1.6a.5.5,0,0,0-.277.853L7.18,13.468A1.5,1.5,0,0,1,7.611,14.8l-.7,4.082a.5.5,0,0,0,.726.527L11.3,17.477a1.5,1.5,0,0,1,1.4,0Z"></path></svg>
+              <svg stroke="currentColor" fill="currentColor" strokeWidth="0" viewBox="0 0 496 512" height="16" width="16" xmlns="http://www.w3.org/2000/svg"><path d="M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6zm-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3zm44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9zM244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8zM97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1zm-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7zm32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1zm-11.4-14.7c-1.6 1-1.6 3.6 0 5.9 1.6 2.3 4.3 3.3 5.6 2.3 1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2z"></path></svg>
+              GitHub
             </Link>
           </div>
         </div>
 
-        {/* Right: Principles Card */}
-        <div className={styles.heroPrinciples}>
-          <div className={styles.principlesTitle}>
-            Three rules · No exceptions
-          </div>
-
-          <div className={styles.principle}>
-            <div className={styles.principleHead}>
-              1. Reason, don't vector
-              <span className={styles.badgeRust}>core</span>
-            </div>
-            <div className={styles.principleDesc}>
-              Every retrieval is a reasoning act, not a similarity computation. No embeddings, no approximate matches.
-            </div>
-          </div>
-
-          <div className={styles.principle}>
-            <div className={styles.principleHead}>
-              2. Model fails, we fail
-            </div>
-            <div className={styles.principleDesc}>
-              No heuristic fallbacks. No silent degradation. If the reasoning model cannot find an answer, we return nothing — not a guess.
-            </div>
-          </div>
-
-          <div className={styles.principle}>
-            <div className={styles.principleHead}>
-              3. No thought, no answer
-            </div>
-            <div className={styles.principleDesc}>
-              Only reasoned output counts as an answer. Every response must be traceable through a semantic tree path — no hallucinated filler.
-            </div>
-          </div>
-
-          <div className={styles.principlesFooter}>
-            <HamsterIcon size={14} />
-            reason, don't vector
+        {/* Right: Code Preview */}
+        <div className={styles.codeCard}>
+          <div className={styles.codeHeader}>
+            <span className={styles.codeDot} style={{background: '#FF5F57'}} />
+            <span className={styles.codeDot} style={{background: '#FEBC2E'}} />
+            <span className={styles.codeDot} style={{background: '#28C840'}} />
+            <span className={styles.codeTitle}>quick_start.py</span>
           </div>
+          <pre className={styles.codeBlock}><code dangerouslySetInnerHTML={{__html: CODE_HTML}} /></pre>
         </div>
       </div>
     </header>
   );
 }
 
-/* ===== Main Page ===== */
+const CODE_HTML = [
+  `<span class="kw">import</span> asyncio`,
+  `<span class="kw">from</span> vectorless <span class="kw">import</span> Engine`,
+  ``,
+  `<span class="kw">async def</span> <span class="fn">main</span>():`,
+  `    engine = <span class="fn">Engine</span>(`,
+  `        api_key=<span class="str">"sk-..."</span>,`,
+  `        model=<span class="str">"gpt-4o"</span>,`,
+  `    )`,
+  ``,
+  `    <span class="cmt"># Compile a document</span>`,
+  `    result = <span class="kw">await</span> engine.<span class="fn">compile</span>(`,
+  `        path=<span class="str">"./report.pdf"</span>`,
+  `    )`,
+  ``,
+  `    <span class="cmt"># Ask a question</span>`,
+  `    response = <span class="kw">await</span> engine.<span class="fn">ask</span>(`,
+  `        <span class="str">"What is the total revenue?"</span>,`,
+  `        doc_ids=[result.doc_id],`,
+  `    )`,
+  `    <span class="fn">print</span>(response.<span class="fn">single</span>().content)`,
+  ``,
+  `asyncio.<span class="fn">run</span>(<span class="fn">main</span>())`,
+].join('\n');
+
 export default function Home(): ReactNode {
   const {siteConfig} = useDocusaurusContext();
   return (
diff --git a/docs/src/theme/Navbar/index.tsx b/docs/src/theme/Navbar/index.tsx
index 666e74f1..d2f16ab7 100644
--- a/docs/src/theme/Navbar/index.tsx
+++ b/docs/src/theme/Navbar/index.tsx
@@ -5,6 +5,7 @@ import NavbarItem from '@theme/NavbarItem';
 import NavbarMobileSidebarToggle from '@theme/Navbar/MobileSidebar/Toggle';
 import useBaseUrl from '@docusaurus/useBaseUrl';
 import Link from '@docusaurus/Link';
+import GitHubStar from '@site/src/components/GitHubStar';
 import type {Props as NavbarItemConfig} from '@theme/NavbarItem';
 import styles from './styles.module.css';
 
@@ -60,6 +61,7 @@ export default function Navbar(): React.ReactElement {
           {centerItems.map((item, i) => <NavbarItem {...(item as NavbarItemConfig)} key={i} />)}
         </div>
         <div className={styles.navbarRight}>
+          <GitHubStar />
           <ColorModeToggle />
         </div>
       </div>
diff --git a/docs/src/theme/Navbar/styles.module.css b/docs/src/theme/Navbar/styles.module.css
index 9e352f58..aa58e38e 100644
--- a/docs/src/theme/Navbar/styles.module.css
+++ b/docs/src/theme/Navbar/styles.module.css
@@ -4,6 +4,7 @@
   height: 100%;
   display: flex;
   align-items: center;
+  justify-content: space-between;
 }
 
 .navbarBrand {
@@ -44,10 +45,12 @@
 .navbarCenter {
   position: absolute;
   left: 50%;
-  transform: translateX(-50%);
+  top: 50%;
+  transform: translate(-50%, -50%);
   display: flex;
   align-items: center;
   gap: 2rem;
+  height: 100%;
 }
 
 .navbarCenter :global(.navbar__link) {
@@ -57,6 +60,9 @@
   padding: 0;
   text-decoration: none;
   transition: opacity 0.15s ease;
+  display: flex;
+  align-items: center;
+  height: 100%;
 }
 
 .navbarCenter :global(.navbar__link:hover) {
@@ -73,13 +79,12 @@
   opacity: 1;
 }
 
-/* Right: theme toggle */
+/* Right: theme toggle + GitHub star */
 .navbarRight {
   display: flex;
   align-items: center;
-  gap: 1rem;
+  gap: 12px;
   flex-shrink: 0;
-  margin-left: auto;
   padding-right: 24px;
 }
 
@@ -88,9 +93,9 @@
   display: inline-flex;
   align-items: center;
   justify-content: center;
-  width: 32px;
-  height: 32px;
-  border-radius: 8px;
+  width: 28px;
+  height: 28px;
+  border-radius: 6px;
   border: 1px solid var(--border);
   background: transparent;
   color: var(--text-light);
diff --git a/examples/single_doc_challenge.py b/examples/single_doc_challenge.py
index 10a55fe6..fa0d15e5 100644
--- a/examples/single_doc_challenge.py
+++ b/examples/single_doc_challenge.py
@@ -203,10 +203,10 @@ async def main() -> None:
 
         try:
             answer = await engine.ask(question, doc_ids=[doc_id])
-            if not answer.content:
+            if not answer.answer:
                 print("   (no answer found)\n")
             else:
-                lines = answer.content.split("\n")
+                lines = answer.answer.split("\n")
                 for line in lines[:3]:
                     print(f"   {line}")
                 remaining = len(lines) - 3
diff --git a/pyproject.toml b/pyproject.toml
index cee403e3..9f7bd100 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,6 +30,8 @@ keywords = ["document", "understanding", "ai", "reasoning", "document-intelligen
 dependencies = [
     "pydantic>=2.0",
     "click>=8.0",
+    "litellm>=1.50",
+    "instructor>=1.0",
     "tomli>=2.0; python_version < '3.11'",  # 3.10 only, 3.11+ has tomllib built-in
 ]
 
diff --git a/tests/conftest.py b/tests/conftest.py
deleted file mode 100644
index 3217bcfb..00000000
--- a/tests/conftest.py
+++ /dev/null
@@ -1,90 +0,0 @@
-"""Shared test fixtures."""
-
-from __future__ import annotations
-
-import pytest
-from unittest.mock import AsyncMock, MagicMock
-
-
-@pytest.fixture
-def mock_engine():
-    """Mock Rust Engine for testing without LLM."""
-    engine = MagicMock()
-
-    # Mock index result
-    index_result = MagicMock()
-    index_result.doc_id = "test-doc-id"
-    index_item = MagicMock()
-    index_item.doc_id = "test-doc-id"
-    index_item.name = "test.md"
-    index_item.format = "markdown"
-    index_item.description = None
-    index_item.source_path = "/path/to/test.md"
-    index_item.page_count = None
-    index_item.metrics = None
-    index_result.items = [index_item]
-    index_result.failed = []
-    index_result.has_failures.return_value = False
-    index_result.total.return_value = 1
-    index_result.__len__ = lambda self: 1
-
-    engine.index = AsyncMock(return_value=index_result)
-
-    # Mock query result
-    query_item = MagicMock()
-    query_item.doc_id = "test-doc-id"
-    query_item.content = "Test answer content"
-    query_item.score = 0.85
-    query_item.confidence = 0.85
-    query_item.node_ids = ["node-1"]
-    query_item.evidence = []
-    query_item.metrics = None
-
-    query_result = MagicMock()
-    query_result.items = [query_item]
-    query_result.failed = []
-    query_result.single.return_value = query_item
-    query_result.has_failures.return_value = False
-    query_result.__len__ = lambda self: 1
-
-    engine.query = AsyncMock(return_value=query_result)
-
-    # Mock list
-    doc_info = MagicMock()
-    doc_info.id = "test-doc-id"
-    doc_info.name = "test.md"
-    doc_info.format = "markdown"
-    doc_info.description = None
-    doc_info.source_path = "/path/to/test.md"
-    doc_info.page_count = None
-    doc_info.line_count = 42
-    engine.list = AsyncMock(return_value=[doc_info])
-
-    # Mock other operations
-    engine.remove = AsyncMock(return_value=True)
-    engine.clear = AsyncMock(return_value=1)
-    engine.exists = AsyncMock(return_value=True)
-
-    # Mock graph
-    engine.get_graph = AsyncMock(return_value=None)
-
-    # Mock metrics
-    metrics_report = MagicMock()
-    metrics_report.total_cost_usd.return_value = 0.001
-    engine.metrics_report.return_value = metrics_report
-
-    return engine
-
-
-@pytest.fixture
-def sample_config_dict():
-    """Sample configuration dict."""
-    return {
-        "llm": {
-            "model": "gpt-4o",
-            "api_key": "sk-test-key",
-            "endpoint": "https://api.openai.com/v1",
-        },
-        "retrieval": {"top_k": 5},
-        "storage": {"workspace_dir": "/tmp/test-vectorless"},
-    }
diff --git a/tests/test_cli/__init__.py b/tests/test_cli/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/test_compat/__init__.py b/tests/test_compat/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/test_config.py b/tests/test_config.py
deleted file mode 100644
index 673cdfff..00000000
--- a/tests/test_config.py
+++ /dev/null
@@ -1,95 +0,0 @@
-"""Tests for configuration models and loading."""
-
-from __future__ import annotations
-
-import os
-import tempfile
-from pathlib import Path
-
-import pytest
-
-from vectorless.config.models import (
-    EngineConfig,
-    LlmConfig,
-    MetricsConfig,
-    RetrievalConfig,
-    StorageConfig,
-)
-
-
-class TestEngineConfig:
-    def test_defaults(self):
-        config = EngineConfig()
-        assert config.llm.model == ""
-        assert config.llm.api_key is None
-        assert config.retrieval.top_k == 3
-        assert config.storage.workspace_dir == "~/.vectorless"
-        assert config.metrics.enabled is True
-
-    def test_custom_values(self):
-        config = EngineConfig(
-            llm=LlmConfig(model="gpt-4o", api_key="sk-test"),
-            retrieval=RetrievalConfig(top_k=10),
-            storage=StorageConfig(workspace_dir="/data/vl"),
-        )
-        assert config.llm.model == "gpt-4o"
-        assert config.llm.api_key == "sk-test"
-        assert config.retrieval.top_k == 10
-        assert config.storage.workspace_dir == "/data/vl"
-
-    def test_to_rust_config(self):
-        config = EngineConfig(
-            llm=LlmConfig(model="gpt-4o", api_key="sk-test"),
-            retrieval=RetrievalConfig(top_k=5, max_iterations=20),
-            storage=StorageConfig(workspace_dir="/tmp/vl"),
-            metrics=MetricsConfig(enabled=False),
-        )
-        # to_rust_config should not raise
-        rust_config = config.to_rust_config()
-        assert rust_config is not None
-
-    def test_validation_top_k_minimum(self):
-        with pytest.raises(Exception):
-            RetrievalConfig(top_k=0)
-
-    def test_json_roundtrip(self):
-        config = EngineConfig(
-            llm=LlmConfig(model="gpt-4o", api_key="sk-test"),
-        )
-        data = config.model_dump()
-        restored = EngineConfig(**data)
-        assert restored.llm.model == "gpt-4o"
-        assert restored.llm.api_key == "sk-test"
-
-
-class TestConfigLoading:
-    def test_load_from_env(self):
-        os.environ["VECTORLESS_API_KEY"] = "sk-env-test"
-        os.environ["VECTORLESS_MODEL"] = "gpt-4o-mini"
-        os.environ["VECTORLESS_TOP_K"] = "7"
-
-        try:
-            from vectorless.config.loading import load_config_from_env
-
-            config = load_config_from_env()
-            assert config.llm.api_key == "sk-env-test"
-            assert config.llm.model == "gpt-4o-mini"
-            assert config.retrieval.top_k == 7
-        finally:
-            del os.environ["VECTORLESS_API_KEY"]
-            del os.environ["VECTORLESS_MODEL"]
-            del os.environ["VECTORLESS_TOP_K"]
-
-    def test_load_from_file(self):
-        with tempfile.NamedTemporaryFile(mode="wb", suffix=".toml", delete=False) as f:
-            f.write(b'[llm]\nmodel = "gpt-4o"\napi_key = "sk-file"\n')
-            f.flush()
-
-            try:
-                from vectorless.config.loading import load_config_from_file
-
-                config = load_config_from_file(Path(f.name))
-                assert config.llm.model == "gpt-4o"
-                assert config.llm.api_key == "sk-file"
-            finally:
-                os.unlink(f.name)
diff --git a/tests/test_events.py b/tests/test_events.py
deleted file mode 100644
index 4bd34e4d..00000000
--- a/tests/test_events.py
+++ /dev/null
@@ -1,78 +0,0 @@
-"""Tests for the event system."""
-
-from __future__ import annotations
-
-from vectorless.events import (
-    EventEmitter,
-    IndexEventData,
-    IndexEventType,
-    QueryEventData,
-    QueryEventType,
-)
-
-
-class TestEventEmitter:
-    def test_index_events(self):
-        received = []
-        emitter = EventEmitter()
-
-        @emitter.on_index
-        def handler(event):
-            received.append(event)
-
-        event = IndexEventData(
-            event_type=IndexEventType.STARTED,
-            path="/test/doc.pdf",
-        )
-        emitter.emit_index(event)
-
-        assert len(received) == 1
-        assert received[0].path == "/test/doc.pdf"
-        assert received[0].event_type == IndexEventType.STARTED
-
-    def test_query_events(self):
-        received = []
-        emitter = EventEmitter()
-
-        @emitter.on_query
-        def handler(event):
-            received.append(event)
-
-        event = QueryEventData(
-            event_type=QueryEventType.COMPLETE,
-            query="What is revenue?",
-            total_results=3,
-        )
-        emitter.emit_query(event)
-
-        assert len(received) == 1
-        assert received[0].query == "What is revenue?"
-        assert received[0].total_results == 3
-
-    def test_multiple_handlers(self):
-        count = [0]
-        emitter = EventEmitter()
-
-        emitter.on_index(lambda e: count.__setitem__(0, count[0] + 1))
-        emitter.on_index(lambda e: count.__setitem__(0, count[0] + 1))
-
-        emitter.emit_index(
-            IndexEventData(event_type=IndexEventType.COMPLETE)
-        )
-
-        assert count[0] == 2
-
-    def test_chaining(self):
-        emitter = EventEmitter()
-        result = emitter.on_index(lambda e: None)
-        assert result is emitter
-
-    def test_no_handlers(self):
-        emitter = EventEmitter()
-        # Should not raise
-        emitter.emit_index(
-            IndexEventData(event_type=IndexEventType.COMPLETE)
-        )
-        emitter.emit_query(
-            QueryEventData(event_type=QueryEventType.COMPLETE)
-        )
diff --git a/tests/test_session.py b/tests/test_session.py
deleted file mode 100644
index 990e2a6c..00000000
--- a/tests/test_session.py
+++ /dev/null
@@ -1,55 +0,0 @@
-"""Tests for Session high-level API."""
-
-from __future__ import annotations
-
-import pytest
-from unittest.mock import AsyncMock, MagicMock, patch
-
-
-class TestSessionConstruction:
-    def test_session_rejects_no_source(self):
-        """Session.index() should reject calls with no source."""
-        # We can't fully test Session without a real Engine,
-        # but we can test validation logic
-        from vectorless.session import Session
-
-        # This will fail because no api_key/model provided
-        # We just verify the source validation in index()
-        pass
-
-
-class TestSessionIndex:
-    @pytest.mark.asyncio
-    async def test_index_requires_exactly_one_source(self):
-        from vectorless.session import Session
-
-        # Patch Engine construction
-        with patch("vectorless.session.Engine") as MockEngine:
-            mock_engine = MagicMock()
-            mock_result = MagicMock()
-            mock_result.doc_id = "doc-1"
-            mock_result.items = []
-            mock_result.failed = []
-            mock_engine.index = AsyncMock(return_value=mock_result)
-            MockEngine.return_value = mock_engine
-
-            from vectorless.config import EngineConfig, LlmConfig
-
-            with patch(
-                "vectorless.session.Session._resolve_config",
-                return_value=EngineConfig(llm=LlmConfig(model="test", api_key="test")),
-            ):
-                session = Session.__new__(Session)
-                session._config = EngineConfig(
-                    llm=LlmConfig(model="test", api_key="test")
-                )
-                session._engine = mock_engine
-                session._events = MagicMock()
-
-                # No source
-                with pytest.raises(ValueError, match="exactly one source"):
-                    await session.index()
-
-                # Multiple sources
-                with pytest.raises(ValueError, match="exactly one source"):
-                    await session.index(path="a.pdf", content="text")
diff --git a/tests/test_types.py b/tests/test_types.py
deleted file mode 100644
index e9e8b848..00000000
--- a/tests/test_types.py
+++ /dev/null
@@ -1,145 +0,0 @@
-"""Tests for typed result wrappers."""
-
-from __future__ import annotations
-
-from unittest.mock import MagicMock
-
-from vectorless.types.results import (
-    Evidence,
-    FailedItem,
-    IndexItemWrapper,
-    IndexMetrics,
-    IndexResultWrapper,
-    QueryMetrics,
-    QueryResponse,
-    QueryResult,
-)
-
-
-class TestEvidence:
-    def test_from_rust(self):
-        item = MagicMock()
-        item.title = "Section 1"
-        item.path = "Root/Section 1"
-        item.content = "Some evidence text"
-        item.doc_name = "report.pdf"
-
-        ev = Evidence.from_rust(item)
-        assert ev.title == "Section 1"
-        assert ev.path == "Root/Section 1"
-        assert ev.content == "Some evidence text"
-        assert ev.doc_name == "report.pdf"
-
-    def test_to_dict(self):
-        ev = Evidence(title="T", path="P", content="C", doc_name=None)
-        d = ev.to_dict()
-        assert d == {"title": "T", "path": "P", "content": "C"}
-
-    def test_to_json(self):
-        ev = Evidence(title="T", path="P", content="C")
-        import json
-
-        parsed = json.loads(ev.to_json())
-        assert parsed["title"] == "T"
-
-    def test_frozen(self):
-        ev = Evidence(title="T", path="P", content="C")
-        with pytest.raises(AttributeError):
-            ev.title = "new"
-
-
-class TestQueryResult:
-    def test_from_rust(self):
-        item = MagicMock()
-        item.doc_id = "doc-1"
-        item.content = "Result text"
-        item.score = 0.9
-        item.confidence = 0.9
-        item.node_ids = ["node-1", "node-2"]
-        item.evidence = []
-        item.metrics = None
-
-        result = QueryResult.from_rust(item)
-        assert result.doc_id == "doc-1"
-        assert result.content == "Result text"
-        assert result.score == 0.9
-        assert len(result.node_ids) == 2
-        assert result.metrics is None
-
-    def test_to_dict(self):
-        result = QueryResult(
-            doc_id="doc-1",
-            content="text",
-            score=0.9,
-            confidence=0.9,
-            node_ids=["n1"],
-            evidence=[],
-            metrics=None,
-        )
-        d = result.to_dict()
-        assert d["doc_id"] == "doc-1"
-        assert "metrics" not in d
-
-
-class TestQueryResponse:
-    def test_from_rust(self):
-        rust_result = MagicMock()
-        rust_result.items = []
-        rust_result.failed = []
-
-        response = QueryResponse.from_rust(rust_result)
-        assert len(response) == 0
-        assert response.single() is None
-        assert not response.has_failures()
-
-    def test_single(self):
-        item = QueryResult(
-            doc_id="doc-1", content="text", score=0.9, confidence=0.9
-        )
-        response = QueryResponse(items=[item])
-        assert response.single() == item
-        assert len(response) == 1
-
-    def test_iteration(self):
-        items = [
-            QueryResult(doc_id=f"doc-{i}", content="t", score=0.5, confidence=0.5)
-            for i in range(3)
-        ]
-        response = QueryResponse(items=items)
-        assert list(response) == items
-
-    def test_to_dict(self):
-        response = QueryResponse(
-            items=[QueryResult(doc_id="d", content="t", score=0.5, confidence=0.5)],
-            failed=[FailedItem(source="s", error="e")],
-        )
-        d = response.to_dict()
-        assert len(d["items"]) == 1
-        assert len(d["failed"]) == 1
-        assert d["failed"][0]["source"] == "s"
-
-
-class TestIndexResult:
-    def test_from_rust(self):
-        rust_result = MagicMock()
-        rust_result.doc_id = "doc-1"
-        item = MagicMock()
-        item.doc_id = "doc-1"
-        item.name = "test.md"
-        item.format = "markdown"
-        item.description = None
-        item.source_path = None
-        item.page_count = None
-        item.metrics = None
-        rust_result.items = [item]
-        rust_result.failed = []
-
-        result = IndexResultWrapper.from_rust(rust_result)
-        assert result.doc_id == "doc-1"
-        assert len(result.items) == 1
-        assert result.items[0].name == "test.md"
-        assert not result.has_failures()
-        assert result.total() == 1
-
-
-import pytest
diff --git a/vectorless-core/vectorless-engine/Cargo.toml b/vectorless-core/vectorless-engine/Cargo.toml
index acf7c7c6..71533ee3 100644
--- a/vectorless-core/vectorless-engine/Cargo.toml
+++ b/vectorless-core/vectorless-engine/Cargo.toml
@@ -9,7 +9,6 @@ repository.workspace = true
 homepage.workspace = true
 
 [dependencies]
-vectorless-agent = { path = "../vectorless-agent" }
 vectorless-config = { path = "../vectorless-config" }
 vectorless-document = { path = "../vectorless-document" }
 vectorless-error = { path = "../vectorless-error" }
@@ -18,7 +17,6 @@ vectorless-graph = { path = "../vectorless-graph" }
 vectorless-index = { path = "../vectorless-index" }
 vectorless-llm = { path = "../vectorless-llm" }
 vectorless-metrics = { path = "../vectorless-metrics" }
-vectorless-retrieval = { path = "../vectorless-retrieval" }
 vectorless-rerank = { path = "../vectorless-rerank" }
 vectorless-storage = { path = "../vectorless-storage" }
 vectorless-utils = { path = "../vectorless-utils" }
diff --git a/vectorless-core/vectorless-engine/src/builder.rs b/vectorless-core/vectorless-engine/src/builder.rs
index 5fd2d6b9..7527575e 100644
--- a/vectorless-core/vectorless-engine/src/builder.rs
+++ b/vectorless-core/vectorless-engine/src/builder.rs
@@ -12,7 +12,6 @@ use vectorless_metrics::MetricsHub;
 use vectorless_storage::Workspace;
 
 use super::engine::Engine;
-use super::retriever::RetrieverClient;
 
 /// Builder for creating a [`Engine`] client.
 ///
@@ -198,12 +197,9 @@ impl EngineBuilder {
         // Indexer uses pool.index()
         let indexer = super::indexer::IndexerClient::with_llm(pool.index().clone());
 
-        // Retriever uses pool.retrieval() via agent system
-        let retriever = RetrieverClient::new(pool.retrieval().clone());
-
-        // Build engine
+        // Build engine (retrieval handled by Python strategy layer)
         let events = self.events.unwrap_or_default();
-        Engine::with_components(config, workspace, retriever, indexer, events, metrics_hub)
+        Engine::with_components(config, workspace, indexer, events, metrics_hub)
             .await
             .map_err(|e| BuildError::Other(e.to_string()))
     }
diff --git a/vectorless-core/vectorless-engine/src/engine.rs b/vectorless-core/vectorless-engine/src/engine.rs
index e1833f43..359c9407 100644
--- a/vectorless-core/vectorless-engine/src/engine.rs
+++ b/vectorless-core/vectorless-engine/src/engine.rs
@@ -65,7 +65,6 @@ use vectorless_storage::{PersistedDocument, Workspace};
 use super::{
     index_context::{IndexContext, IndexSource},
     indexer::IndexerClient,
-    retriever::RetrieverClient,
     types::{FailedItem, IndexItem, IndexMode, IndexResult},
     workspace::WorkspaceClient,
 };
@@ -90,9 +89,6 @@ pub struct Engine {
     /// Indexer client for document indexing.
     indexer: IndexerClient,
 
-    /// Retriever client for queries.
-    retriever: RetrieverClient,
-
     /// Workspace client for persistence.
     workspace: WorkspaceClient,
 
@@ -109,7 +105,6 @@ impl Engine {
     pub(crate) async fn with_components(
         config: Config,
         workspace: Workspace,
-        retriever: RetrieverClient,
         indexer: IndexerClient,
         events: EventEmitter,
         metrics_hub: Arc<MetricsHub>,
@@ -119,9 +114,6 @@ impl Engine {
         // Attach event emitter to indexer
         let indexer = indexer.with_events(events.clone());
 
-        // Attach event emitter to retriever
-        let retriever = retriever.with_events(events.clone());
-
         // Create workspace client
         let workspace_client = WorkspaceClient::new(workspace)
             .await
@@ -130,7 +122,6 @@ impl Engine {
         Ok(Self {
             config,
             indexer,
-            retriever,
             workspace: workspace_client,
             metrics_hub,
         })
@@ -450,61 +441,14 @@ impl Engine {
 
     /// Ask a question — returns a reasoned answer with evidence and trace.
     ///
-    /// - `input`: the question (required)
-    /// - `ids`: document IDs to search. Empty = search all documents.
+    /// Ask a question about the indexed documents.
     ///
-    /// Always returns an [`Answer`] with content, evidence, confidence, and
-    /// a mandatory reasoning trace.
-    pub async fn ask(&self, input: &str, ids: &[String]) -> Result<Answer> {
-        // Resolve doc IDs
-        let doc_ids = if ids.is_empty() {
-            let docs = self.list_documents().await?;
-            if docs.is_empty() {
-                return Err(Error::Config("Workspace is empty".into()));
-            }
-            docs.into_iter().map(|d| d.doc_id).collect::<Vec<_>>()
-        } else {
-            ids.to_vec()
-        };
-
-        // Load documents
-        let (documents, failed) = self.load_documents(&doc_ids).await?;
-        if documents.is_empty() {
-            return Err(Error::Config(format!(
-                "No documents available: {} failures",
-                failed.len()
-            )));
-        }
-
-        // Build DocContexts from Documents and dispatch
-        let doc_contexts: Vec<vectorless_agent::DocContext> = documents
-            .iter()
-            .map(|doc| vectorless_agent::DocContext {
-                tree: &doc.tree,
-                nav_index: &doc.nav_index,
-                reasoning_index: &doc.reasoning_index,
-                doc_name: &doc.name,
-            })
-            .collect();
-
-        let skip_analysis = !ids.is_empty();
-        let scope = if skip_analysis {
-            vectorless_agent::Scope::Specified(doc_contexts)
-        } else {
-            vectorless_agent::Scope::Workspace(vectorless_agent::WorkspaceContext::new(
-                doc_contexts,
-            ))
-        };
-
-        let emitter = vectorless_agent::EventEmitter::noop();
-        let config = self.retriever.config().clone();
-        let llm = self.retriever.llm().clone();
-        let output =
-            vectorless_retrieval::dispatcher::dispatch(input, scope, &config, &llm, &emitter)
-                .await?;
-
-        // Convert Output -> Answer
-        Ok(Self::output_to_answer(&output))
+    /// **Note**: Retrieval is now handled by the Python strategy layer.
+    /// This method returns an error — use Engine.ask() from the Python SDK.
+    pub async fn ask(&self, _input: &str, _ids: &[String]) -> Result<Answer> {
+        Err(Error::Config(
+            "Retrieval has been migrated to Python. Use Engine.ask() from the Python SDK.".into(),
+        ))
     }
 
     /// Remove a document from the workspace.
@@ -545,6 +489,22 @@ impl Engine {
         self.workspace.exists(doc_id).await
     }
 
+    /// Load a full Document by ID (for navigation via primitives).
+    pub async fn load_document(
+        &self,
+        doc_id: &str,
+    ) -> Result<Option<vectorless_document::Document>> {
+        match self.workspace.load(doc_id).await? {
+            Some(persisted) => Ok(Some(Self::persisted_to_understanding_document(persisted))),
+            None => Ok(None),
+        }
+    }
+
+    /// List all document IDs in the workspace.
+    pub async fn list_document_ids(&self) -> Result<Vec<String>> {
+        Ok(self.workspace.inner().list_documents().await)
+    }
+
     /// Remove all documents from the workspace.
     ///
     /// Returns the number of documents removed.
@@ -599,30 +559,6 @@ impl Engine {
         }
     }
 
-    /// Convert agent Output to public Answer type.
-    fn output_to_answer(output: &vectorless_agent::Output) -> Answer {
-        // Build evidence
-        let evidence: Vec<Evidence> = output
-            .evidence
-            .iter()
-            .map(|e| Evidence {
-                content: e.content.clone(),
-                source_path: e.source_path.clone(),
-                doc_name: e.doc_name.clone().unwrap_or_default(),
-                relevance: 0.0,
-            })
-            .collect();
-
-        Answer {
-            content: output.answer.clone(),
-            evidence,
-            confidence: output.confidence,
-            trace: ReasoningTrace {
-                steps: output.trace_steps.clone(),
-            },
-        }
-    }
-
     // ============================================================
     // Internal
     // ============================================================
@@ -874,7 +810,6 @@ impl Clone for Engine {
         Self {
             config: Arc::clone(&self.config),
             indexer: self.indexer.clone(),
-            retriever: self.retriever.clone(),
             workspace: self.workspace.clone(),
             metrics_hub: Arc::clone(&self.metrics_hub),
         }
diff --git a/vectorless-core/vectorless-engine/src/retriever.rs b/vectorless-core/vectorless-engine/src/retriever.rs
index 217e182a..1c833725 100644
--- a/vectorless-core/vectorless-engine/src/retriever.rs
+++ b/vectorless-core/vectorless-engine/src/retriever.rs
@@ -1,143 +1,25 @@
 // Copyright (c) 2026 vectorless developers
 // SPDX-License-Identifier: Apache-2.0
 
-//! Document retrieval client.
+//! Document retrieval client — STUB.
 //!
-//! This module provides query and retrieval operations for document content,
-//! dispatching through the retrieval layer to the agent-based system.
+//! The strategy layer (agent, orchestrator, worker) has been migrated to Python.
+//! This module is a stub that returns an error for any query attempt.
+//! All retrieval now goes through the Python Engine.ask() path.
 
-use tracing::info;
+use vectorless_error::{Error, Result};
 
-use super::types::QueryResult;
-use vectorless_agent::{
-    self, config::AgentConfig, config::DocContext, config::Scope, config::WorkspaceContext,
-    events::EventEmitter as AgentEventEmitter,
-};
-use vectorless_document::{DocumentTree, NavigationIndex, ReasoningIndex};
-use vectorless_error::Result;
-use vectorless_events::{EventEmitter, QueryEvent};
-use vectorless_llm::LlmClient;
-use vectorless_retrieval::{dispatcher, postprocessor};
-
-/// Document retrieval client.
+/// Document retrieval client (stub).
 ///
-/// Delegates to the agent-based retrieval system.
-pub(crate) struct RetrieverClient {
-    /// LLM client for agent navigation decisions.
-    llm: LlmClient,
-
-    /// Agent configuration.
-    config: AgentConfig,
-
-    /// Event emitter.
-    events: EventEmitter,
-}
+/// All retrieval is now handled by the Python strategy layer.
+#[allow(dead_code)]
+pub(crate) struct RetrieverClient;
 
 impl RetrieverClient {
-    /// Create a new retriever client with an LLM client.
-    pub fn new(llm: LlmClient) -> Self {
-        Self {
-            llm,
-            config: AgentConfig::default(),
-            events: EventEmitter::new(),
-        }
-    }
-
-    /// Create with event emitter.
-    pub fn with_events(mut self, events: EventEmitter) -> Self {
-        self.events = events;
-        self
-    }
-
-    /// Set custom agent configuration.
-    pub fn with_config(mut self, config: AgentConfig) -> Self {
-        self.config = config;
-        self
-    }
-
-    /// Get a reference to the agent configuration.
-    pub fn config(&self) -> &AgentConfig {
-        &self.config
-    }
-
-    /// Get a reference to the LLM client.
-    pub fn llm(&self) -> &LlmClient {
-        &self.llm
-    }
-
-    /// Query documents through the agent-based retrieval system.
-    ///
-    /// - `skip_analysis = true` → `Scope::Specified` (user-specified docs, skip Orchestrator analysis)
-    /// - `skip_analysis = false` → `Scope::Workspace` (full Orchestrator analysis flow)
-    #[tracing::instrument(skip_all, fields(question = %question, docs = documents.len()))]
-    pub async fn query(
-        &self,
-        documents: &[(DocumentTree, NavigationIndex, ReasoningIndex, String)],
-        question: &str,
-        skip_analysis: bool,
-    ) -> Result<QueryResult> {
-        self.events.emit_query(QueryEvent::Started {
-            query: question.to_string(),
-        });
-
-        info!(
-            docs = documents.len(),
-            skip_analysis, "Querying: {:?}", question
-        );
-
-        let doc_contexts: Vec<DocContext> = documents
-            .iter()
-            .map(|(tree, nav, ridx, id)| DocContext {
-                tree,
-                nav_index: nav,
-                reasoning_index: ridx,
-                doc_name: id.as_str(),
-            })
-            .collect();
-
-        let scope = if skip_analysis {
-            Scope::Specified(doc_contexts)
-        } else {
-            Scope::Workspace(WorkspaceContext::new(doc_contexts))
-        };
-
-        let emitter = AgentEventEmitter::noop();
-        let output =
-            dispatcher::dispatch(question, scope, &self.config, &self.llm, &emitter).await?;
-
-        let fallback_id = documents
-            .first()
-            .map(|(_, _, _, id)| id.as_str())
-            .unwrap_or("");
-        let items = postprocessor::to_results(&output, fallback_id);
-        let result = QueryResult::new_with_items(items);
-
-        self.events.emit_query(QueryEvent::Complete {
-            total_results: result.len(),
-            confidence: result.single().map(|i| i.confidence).unwrap_or(0.0),
-        });
-
-        Ok(result)
-    }
-}
-
-impl Clone for RetrieverClient {
-    fn clone(&self) -> Self {
-        Self {
-            llm: self.llm.clone(),
-            config: self.config.clone(),
-            events: self.events.clone(),
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_retriever_client_creation() {
-        let _client =
-            RetrieverClient::new(LlmClient::new(vectorless_llm::config::LlmConfig::default()));
+    /// Not available — retrieval is handled by Python.
+    pub async fn query(&self, _question: &str) -> Result<()> {
+        todo!(
+            "Document retrieval is now handled by the Python strategy layer. This method should not be called."
+        )
     }
 }
diff --git a/vectorless-core/vectorless-engine/src/types.rs b/vectorless-core/vectorless-engine/src/types.rs
index 7cd421f5..247191e6 100644
--- a/vectorless-core/vectorless-engine/src/types.rs
+++ b/vectorless-core/vectorless-engine/src/types.rs
@@ -253,10 +253,67 @@ impl IndexItem {
 }
 
 // ============================================================
-// Query Types — re-exported from retrieval crate
+// Query Types — defined locally (strategy layer moved to Python)
 // ============================================================
 
-pub use vectorless_retrieval::{Confidence, EvidenceItem, QueryMetrics, QueryResultItem};
+/// Confidence level of a query result.
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub struct Confidence(pub f64);
+
+impl Confidence {
+    /// Create a new confidence value (0.0 - 1.0).
+    pub fn new(value: f64) -> Self {
+        Self(value.clamp(0.0, 1.0))
+    }
+
+    /// Get the raw value.
+    pub fn value(&self) -> f64 {
+        self.0
+    }
+}
+
+/// A piece of evidence supporting a query result.
+#[derive(Debug, Clone)]
+pub struct EvidenceItem {
+    /// Title of the source section.
+    pub title: String,
+    /// Path within the document.
+    pub path: String,
+    /// Content of the evidence.
+    pub content: String,
+}
+
+/// Metrics for a single query result.
+#[derive(Debug, Clone, Default)]
+pub struct QueryMetrics {
+    /// Number of LLM calls made.
+    pub llm_calls: usize,
+    /// Number of navigation rounds used.
+    pub rounds_used: usize,
+    /// Number of document nodes visited.
+    pub nodes_visited: usize,
+    /// Number of evidence items collected.
+    pub evidence_count: usize,
+    /// Total characters in evidence.
+    pub evidence_chars: usize,
+}
+
+/// A single query result item.
+#[derive(Debug, Clone)]
+pub struct QueryResultItem {
+    /// Document ID.
+    pub doc_id: String,
+    /// Node IDs that contributed evidence.
+    pub node_ids: Vec<String>,
+    /// Result content.
+    pub content: String,
+    /// Supporting evidence.
+    pub evidence: Vec<EvidenceItem>,
+    /// Optional metrics.
+    pub metrics: Option<QueryMetrics>,
+    /// Confidence score.
+    pub confidence: f64,
+}
 
 /// Result of a document query.
 ///
diff --git a/vectorless-core/vectorless-engine/src/workspace.rs b/vectorless-core/vectorless-engine/src/workspace.rs
new file mode 100644
index 00000000..e05a175b
--- /dev/null
+++ b/vectorless-core/vectorless-engine/src/workspace.rs
@@ -0,0 +1,243 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Workspace management client.
+//!
+//! This module provides async CRUD operations for document persistence
+//! through the workspace abstraction.
+//!
+//! # Example
+//!
+//! ```rust,ignore
+//! let workspace = WorkspaceClient::new(workspace_storage).await;
+//!
+//! // Save a document
+//! workspace.save(&doc).await?;
+//!
+//! // Load a document
+//! let doc = workspace.load("doc-id").await?;
+//!
+//! // List all documents
+//! for doc in workspace.list().await? {
+//!     println!("{}: {}", doc.id, doc.name);
+//! }
+//! ```
+
+use std::sync::Arc;
+
+use tracing::{debug, info};
+
+use vectorless_error::Result;
+use vectorless_storage::{PersistedDocument, Workspace};
+
+use super::types::DocumentInfo;
+use vectorless_events::{EventEmitter, WorkspaceEvent};
+
+/// Workspace management client.
+///
+/// Provides async thread-safe CRUD operations for document persistence.
+/// All operations are async and can be safely called from multiple tasks.
+///
+/// # Thread Safety
+///
+/// The client is fully thread-safe and can be cloned cheaply
+/// (it uses `Arc` internally).
+#[derive(Clone)]
+pub(crate) struct WorkspaceClient {
+    /// Workspace storage.
+    workspace: Arc<Workspace>,
+
+    /// Event emitter.
+    events: EventEmitter,
+}
+
+impl WorkspaceClient {
+    /// Create a new workspace client.
+    pub async fn new(workspace: Workspace) -> Self {
+        Self {
+            workspace: Arc::new(workspace),
+            events: EventEmitter::new(),
+        }
+    }
+
+    /// Create with event emitter.
+    pub fn with_events(mut self, events: EventEmitter) -> Self {
+        self.events = events;
+        self
+    }
+
+    /// Save a document to the workspace.
+    ///
+    /// If a document with the same ID already exists, logs a warning
+    /// (this can happen during concurrent indexing of the same source).
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the workspace write fails.
+    pub async fn save(&self, doc: &PersistedDocument) -> Result<()> {
+        let doc_id = doc.meta.id.clone();
+
+        if self.workspace.contains(&doc_id).await {
+            tracing::warn!(
+                doc_id,
+                name = %doc.meta.name,
+                "Overwriting existing document — possible concurrent index of the same source"
+            );
+        }
+
+        self.workspace.add(doc).await?;
+
+        info!("Saved document: {}", doc_id);
+        self.events.emit_workspace(WorkspaceEvent::Saved { doc_id });
+
+        Ok(())
+    }
+
+    /// Load a document from the workspace.
+    ///
+    /// Returns `Ok(None)` if the document doesn't exist.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the workspace read fails.
+    pub async fn load(&self, doc_id: &str) -> Result<Option<PersistedDocument>> {
+        let doc = self.workspace.load_and_cache(doc_id).await?;
+
+        if let Some(ref _d) = doc {
+            debug!("Loaded document: {}", doc_id);
+        }
+
+        self.events.emit_workspace(WorkspaceEvent::Loaded {
+            doc_id: doc_id.to_string(),
+            cache_hit: doc.is_some(),
+        });
+
+        Ok(doc)
+    }
+
+    /// Remove a document from the workspace.
+    ///
+    /// Returns `Ok(true)` if the document was removed, `Ok(false)` if it didn't exist.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the workspace write fails.
+    pub async fn remove(&self, doc_id: &str) -> Result<bool> {
+        let removed = self.workspace.remove(doc_id).await?;
+
+        if removed {
+            info!("Removed document: {}", doc_id);
+            self.events.emit_workspace(WorkspaceEvent::Removed {
+                doc_id: doc_id.to_string(),
+            });
+        }
+
+        Ok(removed)
+    }
+
+    /// Check if a document exists in the workspace.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the workspace read fails.
+    pub async fn exists(&self, doc_id: &str) -> Result<bool> {
+        Ok(self.workspace.contains(doc_id).await)
+    }
+
+    /// List all documents in the workspace.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the workspace read fails.
+    pub async fn list(&self) -> Result<Vec<DocumentInfo>> {
+        let doc_ids = self.workspace.list_documents().await;
+        let mut result = Vec::with_capacity(doc_ids.len());
+
+        for id in &doc_ids {
+            if let Some(meta) = self.workspace.get_meta(id).await {
+                result.push(DocumentInfo {
+                    id: meta.id,
+                    name: meta.doc_name,
+                    format: meta.doc_type,
+                    description: meta.doc_description,
+                    source_path: meta.path,
+                    page_count: meta.page_count,
+                    line_count: meta.line_count,
+                });
+            }
+        }
+
+        Ok(result)
+    }
+
+    /// Get document info by ID.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the workspace read fails.
+    pub async fn get_document_info(&self, doc_id: &str) -> Result<Option<DocumentInfo>> {
+        Ok(self
+            .workspace
+            .get_meta(doc_id)
+            .await
+            .map(|meta| DocumentInfo {
+                id: meta.id,
+                name: meta.doc_name,
+                format: meta.doc_type,
+                description: meta.doc_description,
+                source_path: meta.path,
+                page_count: meta.page_count,
+                line_count: meta.line_count,
+            }))
+    }
+
+    /// Clear all documents from the workspace.
+    ///
+    /// Returns the number of documents removed.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the workspace write fails.
+    pub async fn clear(&self) -> Result<usize> {
+        let doc_ids = self.workspace.list_documents().await;
+        let mut removed = 0usize;
+
+        for doc_id in &doc_ids {
+            match self.workspace.remove(doc_id).await {
+                Ok(true) => removed += 1,
+                Ok(false) => {}
+                Err(e) => tracing::warn!("Failed to remove document {}: {}", doc_id, e),
+            }
+        }
+
+        if removed > 0 {
+            info!("Cleared workspace: {removed} documents removed");
+            self.events
+                .emit_workspace(WorkspaceEvent::Cleared { count: removed });
+        }
+
+        Ok(removed)
+    }
+
+    /// Get the underlying workspace Arc (for advanced use).
+    pub(crate) fn inner(&self) -> Arc<Workspace> {
+        Arc::clone(&self.workspace)
+    }
+
+    /// Find a document ID by its source file path.
+    ///
+    /// Used for incremental indexing to check if a file has already been indexed.
+    pub async fn find_by_source_path(&self, path: &std::path::Path) -> Option<String> {
+        self.workspace.find_by_source_path(path).await
+    }
+
+    /// Get the document graph, loading from backend if not cached.
+    pub async fn get_graph(&self) -> Result<Option<vectorless_graph::DocumentGraph>> {
+        self.workspace.get_graph().await
+    }
+
+    /// Persist the document graph to the backend.
+    pub async fn set_graph(&self, graph: &vectorless_graph::DocumentGraph) -> Result<()> {
+        self.workspace.set_graph(graph).await
+    }
+}
diff --git a/vectorless-core/vectorless-primitives/Cargo.toml b/vectorless-core/vectorless-primitives/Cargo.toml
new file mode 100644
index 00000000..5c8bb4e7
--- /dev/null
+++ b/vectorless-core/vectorless-primitives/Cargo.toml
@@ -0,0 +1,18 @@
+[package]
+name = "vectorless-primitives"
+version.workspace = true
+edition.workspace = true
+authors.workspace = true
+description = "Document navigation primitives for AI agents"
+license.workspace = true
+repository.workspace = true
+homepage.workspace = true
+
+[dependencies]
+vectorless-document = { path = "../vectorless-document" }
+vectorless-error = { path = "../vectorless-error" }
+regex = { workspace = true }
+tokio = { workspace = true, features = ["sync"] }
+
+[lints]
+workspace = true
diff --git a/vectorless-core/vectorless-primitives/src/lib.rs b/vectorless-core/vectorless-primitives/src/lib.rs
new file mode 100644
index 00000000..2edc65d2
--- /dev/null
+++ b/vectorless-core/vectorless-primitives/src/lib.rs
@@ -0,0 +1,18 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Document navigation primitives for AI agents.
+//!
+//! Provides [`DocumentNavigator`] — a stateful navigator over an understood
+//! document, with methods for tree traversal, content reading, regex search,
+//! evidence collection, and index queries.
+//!
+//! All methods are `async` for compatibility with the PyO3 async bridge.
+
+pub mod navigator;
+pub mod resolve;
+pub mod subtree;
+pub mod types;
+
+pub use navigator::DocumentNavigator;
+pub use types::*;
diff --git a/vectorless-core/vectorless-primitives/src/navigator.rs b/vectorless-core/vectorless-primitives/src/navigator.rs
new file mode 100644
index 00000000..ae52433a
--- /dev/null
+++ b/vectorless-core/vectorless-primitives/src/navigator.rs
@@ -0,0 +1,444 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Document navigation — the core type for navigating an understood document.
+//!
+//! `DocumentNavigator` holds an owned `Document` plus mutable navigation state
+//! (cursor, breadcrumb, visited set, collected evidence). All methods are
+//! `async` so they integrate naturally with the PyO3 async bridge.
+//!
+//! Implementation is split across files:
+//! - `navigator.rs` (this file) — struct definition, core navigation, content, evidence
+//! - `navigator_inspection.rs` — P1 inspection tools (toc, stats, siblings, etc.)
+//! - `navigator_search.rs` — search tools (grep, find, similar, etc.)
+
+use std::collections::{HashMap, HashSet};
+
+use vectorless_document::{Document, NodeId};
+use vectorless_error::{Error, Result};
+
+use crate::resolve::resolve_target_extended;
+use crate::subtree::collect_subtree;
+use crate::types::*;
+
+/// Navigation state machine over a single understood document.
+///
+/// Created from a [`Document`] (produced by the compile pipeline).
+/// Python Worker holds one `DocumentNavigator` and calls navigation methods
+/// to traverse the document tree, collect evidence, and query indexes.
+pub struct DocumentNavigator {
+    doc: Document,
+    cursor: NodeId,
+    breadcrumb: Vec<String>,
+    /// Navigation history stack for `back()`. Pushed on every cd/cd_by_title.
+    history: Vec<NodeId>,
+    node_id_map: HashMap<u64, NodeId>,
+    visited: HashSet<NodeId>,
+    collected: HashSet<NodeId>,
+    evidence: Vec<CollectedEvidence>,
+}
+
+impl DocumentNavigator {
+    /// Create a new navigator starting at the document root.
+    pub fn new(doc: Document) -> Self {
+        let cursor = doc.tree.root();
+        let mut node_id_map = HashMap::new();
+        for id in doc.tree.traverse() {
+            node_id_map.insert(usize::from(id.0) as u64, id);
+        }
+        Self {
+            doc,
+            cursor,
+            breadcrumb: vec!["root".to_string()],
+            history: Vec::new(),
+            node_id_map,
+            visited: HashSet::new(),
+            collected: HashSet::new(),
+            evidence: Vec::new(),
+        }
+    }
+
+    // -----------------------------------------------------------------------
+    // NodeId bridge
+    // -----------------------------------------------------------------------
+
+    fn parse_id(&self, s: &str) -> Result<NodeId> {
+        let num: u64 = s
+            .strip_prefix('n')
+            .ok_or_else(|| Error::InvalidInput(format!("NodeId must start with 'n', got: {s}")))?
+            .parse()
+            .map_err(|_| Error::InvalidInput(format!("Invalid NodeId: {s}")))?;
+        self.node_id_map
+            .get(&num)
+            .copied()
+            .ok_or_else(|| Error::NodeNotFound(format!("n{num}")))
+    }
+
+    fn id_to_u64(&self, id: NodeId) -> u64 {
+        usize::from(id.0) as u64
+    }
+
+    fn resolve_optional_id(&self, opt: Option<&str>) -> Result<NodeId> {
+        match opt {
+            Some(s) => self.parse_id(s),
+            None => Ok(self.cursor),
+        }
+    }
+
+    // -----------------------------------------------------------------------
+    // Navigation
+    // -----------------------------------------------------------------------
+
+    /// List children of the current node with rich metadata.
+    pub async fn ls(&self) -> Vec<NodeInfo> {
+        let routes = self.doc.nav_index.get_child_routes(self.cursor);
+        match routes {
+            Some(routes) => routes
+                .iter()
+                .map(|route| {
+                    let child_count = self.doc.tree.children(route.node_id).len();
+                    let (hints, tags, leaf_count) = self
+                        .doc
+                        .nav_index
+                        .get_entry(route.node_id)
+                        .map(|e| (e.question_hints.clone(), e.topic_tags.clone(), e.leaf_count))
+                        .unwrap_or_default();
+                    let depth = self.doc.tree.depth(route.node_id);
+                    NodeInfo {
+                        id: self.id_to_u64(route.node_id),
+                        title: route.title.clone(),
+                        depth,
+                        child_count,
+                        leaf_count,
+                        question_hints: hints,
+                        topic_tags: tags,
+                    }
+                })
+                .collect(),
+            None => Vec::new(),
+        }
+    }
+
+    /// Navigate to a specific node by numeric id.
+    pub async fn cd(&mut self, node_id: &str) -> Result<()> {
+        let id = self.parse_id(node_id)?;
+        self.visited.insert(id);
+        let title = self
+            .doc
+            .tree
+            .get(id)
+            .map(|n| n.title.as_str())
+            .unwrap_or("unknown")
+            .to_string();
+        self.history.push(self.cursor);
+        self.cursor = id;
+        self.breadcrumb.push(title);
+        Ok(())
+    }
+
+    /// Navigate to a child by title (fuzzy matching via resolve_target_extended).
+    pub async fn cd_by_title(&mut self, title: &str) -> Result<()> {
+        let id = resolve_target_extended(title, &self.doc.nav_index, self.cursor, &self.doc.tree)
+            .ok_or_else(|| {
+            Error::NodeNotFound(format!(
+                "Target '{title}' not found. Use ls to see children."
+            ))
+        })?;
+        let resolved_title = self
+            .doc
+            .tree
+            .get(id)
+            .map(|n| n.title.as_str())
+            .unwrap_or(title)
+            .to_string();
+        self.visited.insert(id);
+        self.history.push(self.cursor);
+        self.cursor = id;
+        self.breadcrumb.push(resolved_title);
+        Ok(())
+    }
+
+    /// Navigate up to the parent of the current node.
+    pub async fn cd_up(&mut self) -> Result<()> {
+        if self.breadcrumb.len() <= 1 {
+            return Err(Error::InvalidInput("Already at root.".into()));
+        }
+        let parent = self
+            .doc
+            .tree
+            .parent(self.cursor)
+            .ok_or_else(|| Error::NodeNotFound("No parent.".into()))?;
+        self.breadcrumb.pop();
+        self.cursor = parent;
+        Ok(())
+    }
+
+    /// Navigate back to the root node.
+    pub async fn cd_root(&mut self) {
+        self.cursor = self.doc.tree.root();
+        self.breadcrumb = vec!["root".to_string()];
+    }
+
+    /// Go back to the previous position (uses navigation history stack).
+    pub async fn back(&mut self) -> Result<()> {
+        let prev = self
+            .history
+            .pop()
+            .ok_or_else(|| Error::InvalidInput("No previous position.".into()))?;
+        self.cursor = prev;
+        self._rebuild_breadcrumb();
+        Ok(())
+    }
+
+    /// Return the current navigation path (e.g., "root / Chapter 1 / Section 1.2").
+    pub async fn pwd(&self) -> String {
+        self.breadcrumb.join(" / ")
+    }
+
+    // -----------------------------------------------------------------------
+    // Content
+    // -----------------------------------------------------------------------
+
+    /// Read a node's content and collect it as evidence.
+    /// `node_id` is `"n42"` or None for current node.
+    pub async fn cat(&mut self, node_id: Option<&str>) -> Result<String> {
+        let id = self.resolve_optional_id(node_id)?;
+        let node = self
+            .doc
+            .tree
+            .get(id)
+            .ok_or_else(|| Error::NodeNotFound("Node not found.".into()))?;
+
+        let title = node.title.clone();
+        let content = node.content.clone();
+
+        if !content.is_empty() && !self.collected.contains(&id) {
+            let source_path = self.breadcrumb.join(" / ") + " / " + &title;
+            self.evidence.push(CollectedEvidence {
+                node_id: self.id_to_u64(id),
+                title: title.clone(),
+                content: content.clone(),
+                source_path,
+            });
+            self.collected.insert(id);
+        }
+        self.visited.insert(id);
+
+        Ok(content)
+    }
+
+    /// Preview the first N lines of a node without collecting evidence.
+    pub async fn head(&self, node_id: Option<&str>, n: usize) -> Result<String> {
+        let id = self.resolve_optional_id(node_id)?;
+        let node = self
+            .doc
+            .tree
+            .get(id)
+            .ok_or_else(|| Error::NodeNotFound("Node not found.".into()))?;
+
+        let content = &node.content;
+        let title = &node.title;
+        let total_lines = content.lines().count();
+        let preview: Vec<&str> = content.lines().take(n).collect();
+
+        let mut output = format!(
+            "[Preview: {title} — showing {}/{total_lines} lines]\n",
+            preview.len().min(n)
+        );
+        output.push_str(&preview.join("\n"));
+
+        if total_lines > n {
+            output.push_str(&format!(
+                "\n... ({} more lines, use cat to read all)",
+                total_lines - n
+            ));
+        }
+
+        Ok(output)
+    }
+
+    /// Count lines, words, and characters in a node's content.
+    pub async fn wc(&self, node_id: Option<&str>) -> Result<WordCount> {
+        let id = self.resolve_optional_id(node_id)?;
+        let content = self
+            .doc
+            .tree
+            .get(id)
+            .map(|n| n.content.as_str())
+            .ok_or_else(|| Error::NodeNotFound("Node not found.".into()))?;
+
+        Ok(WordCount {
+            lines: content.lines().count(),
+            words: content.split_whitespace().count(),
+            chars: content.len(),
+        })
+    }
+
+    // -----------------------------------------------------------------------
+    // Metadata
+    // -----------------------------------------------------------------------
+
+    /// Document-level summary.
+    pub async fn summary(&self) -> &str {
+        &self.doc.summary
+    }
+
+    /// Number of sections in the tree.
+    pub async fn section_count(&self) -> usize {
+        self.doc.section_count
+    }
+
+    /// Document ID.
+    pub async fn doc_id(&self) -> &str {
+        &self.doc.doc_id
+    }
+
+    /// Document name.
+    pub async fn doc_name(&self) -> &str {
+        &self.doc.name
+    }
+
+    // -----------------------------------------------------------------------
+    // Evidence
+    // -----------------------------------------------------------------------
+
+    /// Explicitly collect evidence from a node.
+    pub async fn collect_evidence(&mut self, node_id: &str) -> Result<()> {
+        let id = self.parse_id(node_id)?;
+        if self.collected.contains(&id) {
+            return Ok(());
+        }
+        let node = self
+            .doc
+            .tree
+            .get(id)
+            .ok_or_else(|| Error::NodeNotFound("Node not found.".into()))?;
+
+        let title = node.title.clone();
+        let content = node.content.clone();
+        if !content.is_empty() {
+            let source_path = self.breadcrumb.join(" / ") + " / " + &title;
+            self.evidence.push(CollectedEvidence {
+                node_id: self.id_to_u64(id),
+                title,
+                content,
+                source_path,
+            });
+        }
+        self.collected.insert(id);
+        self.visited.insert(id);
+        Ok(())
+    }
+
+    /// Return all collected evidence.
+    pub async fn evidence(&self) -> &[CollectedEvidence] {
+        &self.evidence
+    }
+
+    /// Clear all collected evidence.
+    pub async fn clear_evidence(&mut self) {
+        self.evidence.clear();
+        self.collected.clear();
+    }
+
+    // -----------------------------------------------------------------------
+    // Tree inspection
+    // -----------------------------------------------------------------------
+
+    /// Root node id.
+    pub async fn root_id(&self) -> u64 {
+        self.id_to_u64(self.doc.tree.root())
+    }
+
+    /// Current cursor node id.
+    pub async fn current_id(&self) -> u64 {
+        self.id_to_u64(self.cursor)
+    }
+
+    /// List children of an arbitrary node.
+    pub async fn children_of(&self, node_id: &str) -> Result<Vec<NodeInfo>> {
+        let id = self.parse_id(node_id)?;
+        let routes = self.doc.nav_index.get_child_routes(id);
+        match routes {
+            Some(routes) => Ok(routes
+                .iter()
+                .map(|route| {
+                    let child_count = self.doc.tree.children(route.node_id).len();
+                    let (hints, tags, leaf_count) = self
+                        .doc
+                        .nav_index
+                        .get_entry(route.node_id)
+                        .map(|e| (e.question_hints.clone(), e.topic_tags.clone(), e.leaf_count))
+                        .unwrap_or_default();
+                    let depth = self.doc.tree.depth(route.node_id);
+                    NodeInfo {
+                        id: self.id_to_u64(route.node_id),
+                        title: route.title.clone(),
+                        depth,
+                        child_count,
+                        leaf_count,
+                        question_hints: hints,
+                        topic_tags: tags,
+                    }
+                })
+                .collect()),
+            None => Ok(Vec::new()),
+        }
+    }
+
+    /// Parent of a node.
+    pub async fn parent_of(&self, node_id: &str) -> Result<Option<u64>> {
+        let id = self.parse_id(node_id)?;
+        Ok(self.doc.tree.parent(id).map(|p| self.id_to_u64(p)))
+    }
+
+    /// Depth of a node in the tree.
+    pub async fn depth_of(&self, node_id: &str) -> Result<usize> {
+        let id = self.parse_id(node_id)?;
+        Ok(self.doc.tree.depth(id))
+    }
+
+    /// Title of a node.
+    pub async fn node_title(&self, node_id: &str) -> Result<String> {
+        let id = self.parse_id(node_id)?;
+        Ok(self
+            .doc
+            .tree
+            .get(id)
+            .map(|n| n.title.clone())
+            .unwrap_or_default())
+    }
+
+    /// All node ids in the tree.
+    pub async fn all_node_ids(&self) -> Vec<u64> {
+        self.doc
+            .tree
+            .traverse()
+            .iter()
+            .map(|&id| self.id_to_u64(id))
+            .collect()
+    }
+
+    // -----------------------------------------------------------------------
+    // Internal helpers
+    // -----------------------------------------------------------------------
+
+    /// Rebuild breadcrumb from root to current cursor.
+    fn _rebuild_breadcrumb(&mut self) {
+        let path = self.doc.tree.path_from_root(self.cursor);
+        self.breadcrumb = std::iter::once("root".to_string())
+            .chain(
+                path.iter()
+                    .skip(1)
+                    .filter_map(|&id| self.doc.tree.get(id).map(|n| n.title.clone())),
+            )
+            .collect();
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Split implementation files
+// ---------------------------------------------------------------------------
+
+include!("navigator_inspection.rs");
+include!("navigator_search.rs");
diff --git a/vectorless-core/vectorless-primitives/src/navigator_inspection.rs b/vectorless-core/vectorless-primitives/src/navigator_inspection.rs
new file mode 100644
index 00000000..a0d7d25a
--- /dev/null
+++ b/vectorless-core/vectorless-primitives/src/navigator_inspection.rs
@@ -0,0 +1,187 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+//
+// P1 inspection tools — structural and metadata queries.
+// Included into navigator.rs via `include!`.
+
+impl DocumentNavigator {
+    // -----------------------------------------------------------------------
+    // P1: Inspection tools
+    // -----------------------------------------------------------------------
+
+    /// Return the full table of contents as a flat list of entries.
+    pub async fn toc(&self) -> Vec<TocEntry> {
+        fn walk(
+            tree: &vectorless_document::DocumentTree,
+            node_id: NodeId,
+            depth: usize,
+            entries: &mut Vec<TocEntry>,
+        ) {
+            if depth > 0 {
+                // skip root
+                let child_count = tree.children(node_id).len();
+                let title = tree.get(node_id).map(|n| n.title.clone()).unwrap_or_default();
+                let id_u64 = usize::from(node_id.0) as u64;
+                entries.push(TocEntry {
+                    id: id_u64,
+                    title,
+                    depth,
+                    child_count,
+                });
+            }
+            for child in tree.children(node_id) {
+                walk(tree, child, depth + 1, entries);
+            }
+        }
+        let mut entries = Vec::new();
+        walk(&self.doc.tree, self.doc.tree.root(), 0, &mut entries);
+        entries
+    }
+
+    /// Get statistics about a node (or the current node if None).
+    pub async fn stats(&self, node_id: Option<&str>) -> Result<NodeStats> {
+        let id = self.resolve_optional_id(node_id)?;
+        let node = self
+            .doc
+            .tree
+            .get(id)
+            .ok_or_else(|| Error::NodeNotFound("Node not found.".into()))?;
+
+        let children = self.doc.tree.children(id);
+        let depth = self.doc.tree.depth(id);
+        let leaf_count = self
+            .doc
+            .nav_index
+            .get_entry(id)
+            .map(|e| e.leaf_count)
+            .unwrap_or(0);
+
+        Ok(NodeStats {
+            id: self.id_to_u64(id),
+            title: node.title.clone(),
+            depth,
+            child_count: children.len(),
+            leaf_count,
+            char_count: node.content.len(),
+            word_count: node.content.split_whitespace().count(),
+            is_leaf: children.is_empty(),
+        })
+    }
+
+    /// List sibling nodes at the same level as a given node (or current node).
+    pub async fn siblings(&self, node_id: Option<&str>) -> Result<Vec<NodeInfo>> {
+        let id = self.resolve_optional_id(node_id)?;
+        let mut result = Vec::new();
+        for sibling_id in self.doc.tree.siblings_iter(id) {
+            let child_count = self.doc.tree.children(sibling_id).len();
+            let (hints, tags, leaf_count) = self
+                .doc
+                .nav_index
+                .get_entry(sibling_id)
+                .map(|e| (e.question_hints.clone(), e.topic_tags.clone(), e.leaf_count))
+                .unwrap_or_default();
+            let depth = self.doc.tree.depth(sibling_id);
+            let title = self
+                .doc
+                .tree
+                .get(sibling_id)
+                .map(|n| n.title.clone())
+                .unwrap_or_default();
+            result.push(NodeInfo {
+                id: self.id_to_u64(sibling_id),
+                title,
+                depth,
+                child_count,
+                leaf_count,
+                question_hints: hints,
+                topic_tags: tags,
+            });
+        }
+        Ok(result)
+    }
+
+    /// List ancestors from root to the current (or specified) node, inclusive.
+    pub async fn ancestors(&self, node_id: Option<&str>) -> Result<Vec<NodeInfo>> {
+        let id = self.resolve_optional_id(node_id)?;
+        let path = self.doc.tree.path_from_root(id);
+        let mut result = Vec::new();
+        for path_id in &path {
+            let child_count = self.doc.tree.children(*path_id).len();
+            let (hints, tags, leaf_count) = self
+                .doc
+                .nav_index
+                .get_entry(*path_id)
+                .map(|e| (e.question_hints.clone(), e.topic_tags.clone(), e.leaf_count))
+                .unwrap_or_default();
+            let depth = self.doc.tree.depth(*path_id);
+            let title = self
+                .doc
+                .tree
+                .get(*path_id)
+                .map(|n| n.title.clone())
+                .unwrap_or_default();
+            result.push(NodeInfo {
+                id: self.id_to_u64(*path_id),
+                title,
+                depth,
+                child_count,
+                leaf_count,
+                question_hints: hints,
+                topic_tags: tags,
+            });
+        }
+        Ok(result)
+    }
+
+    /// Document-level overview card (title, overview, sections, concepts).
+    pub async fn doc_card(&self) -> Option<DocCardInfo> {
+        self.doc.nav_index.doc_card().map(|card| DocCardInfo {
+            title: card.title.clone(),
+            overview: card.overview.clone(),
+            question_hints: card.question_hints.clone(),
+            topic_tags: card.topic_tags.clone(),
+            sections: card
+                .sections
+                .iter()
+                .map(|s| SectionCardInfo {
+                    title: s.title.clone(),
+                    description: s.description.clone(),
+                    leaf_count: s.leaf_count,
+                })
+                .collect(),
+            total_leaves: card.total_leaves,
+        })
+    }
+
+    /// Key concepts extracted from the document.
+    pub async fn concepts(&self) -> Vec<ConceptInfo> {
+        self.doc
+            .concepts
+            .iter()
+            .map(|c| ConceptInfo {
+                name: c.name.clone(),
+                summary: c.summary.clone(),
+                sections: c.sections.clone(),
+            })
+            .collect()
+    }
+
+    /// Find a section by exact title (case-insensitive).
+    pub async fn find_section(&self, title: &str) -> Option<FindResult> {
+        let id = self.doc.reasoning_index.find_section(title)?;
+        let node = self.doc.tree.get(id)?;
+        let depth = self.doc.tree.depth(id);
+        let leaf_count = self
+            .doc
+            .nav_index
+            .get_entry(id)
+            .map(|e| e.leaf_count)
+            .unwrap_or(0);
+        Some(FindResult {
+            node_id: self.id_to_u64(id),
+            title: node.title.clone(),
+            depth,
+            leaf_count,
+        })
+    }
+}
diff --git a/vectorless-core/vectorless-primitives/src/navigator_search.rs b/vectorless-core/vectorless-primitives/src/navigator_search.rs
new file mode 100644
index 00000000..1aa6762a
--- /dev/null
+++ b/vectorless-core/vectorless-primitives/src/navigator_search.rs
@@ -0,0 +1,251 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+//
+// Search tools — content search and reasoning index queries.
+// Included into navigator.rs via `include!`.
+
+impl DocumentNavigator {
+    // -----------------------------------------------------------------------
+    // Content search
+    // -----------------------------------------------------------------------
+
+    /// Regex search across all node content in the current subtree.
+    /// Returns up to 30 matches.
+    pub async fn grep(&self, pattern: &str) -> Result<Vec<MatchResult>> {
+        let re = regex::Regex::new(pattern)
+            .map_err(|e| Error::InvalidInput(format!("Invalid regex '{pattern}': {e}")))?;
+
+        let subtree = collect_subtree(self.cursor, &self.doc.tree);
+        let mut results = Vec::new();
+        let max_matches = 30;
+
+        for node_id in &subtree {
+            if results.len() >= max_matches {
+                break;
+            }
+            let content = match self.doc.tree.get(*node_id).map(|n| n.content.as_str()) {
+                Some(c) if !c.is_empty() => c,
+                _ => continue,
+            };
+            let title = self
+                .doc
+                .tree
+                .get(*node_id)
+                .map(|n| n.title.as_str())
+                .unwrap_or("?");
+
+            for (i, line) in content.lines().enumerate() {
+                if results.len() >= max_matches {
+                    break;
+                }
+                if re.is_match(line) {
+                    results.push(MatchResult {
+                        node_id: self.id_to_u64(*node_id),
+                        title: title.to_string(),
+                        snippet: line.to_string(),
+                        line_number: i + 1,
+                    });
+                }
+            }
+        }
+
+        Ok(results)
+    }
+
+    /// Search for nodes by keyword in title or content (case-insensitive).
+    pub async fn find(&self, keyword: &str) -> Vec<FindResult> {
+        let kw = keyword.to_lowercase();
+        self.doc
+            .tree
+            .traverse()
+            .iter()
+            .filter_map(|&id| {
+                let node = self.doc.tree.get(id)?;
+                if node.title.to_lowercase().contains(&kw)
+                    || node.content.to_lowercase().contains(&kw)
+                {
+                    let depth = self.doc.tree.depth(id);
+                    let leaf_count = self
+                        .doc
+                        .nav_index
+                        .get_entry(id)
+                        .map(|e| e.leaf_count)
+                        .unwrap_or(0);
+                    Some(FindResult {
+                        node_id: self.id_to_u64(id),
+                        title: node.title.clone(),
+                        depth,
+                        leaf_count,
+                    })
+                } else {
+                    None
+                }
+            })
+            .collect()
+    }
+
+    /// Search within a specific node's content without moving the cursor.
+    pub async fn grep_node(
+        &self,
+        node_id: &str,
+        pattern: &str,
+    ) -> Result<Vec<MatchResult>> {
+        let id = self.parse_id(node_id)?;
+        let re = regex::Regex::new(pattern)
+            .map_err(|e| Error::InvalidInput(format!("Invalid regex '{pattern}': {e}")))?;
+
+        let node = self
+            .doc
+            .tree
+            .get(id)
+            .ok_or_else(|| Error::NodeNotFound("Node not found.".into()))?;
+
+        let title = node.title.clone();
+        let content = &node.content;
+        let mut results = Vec::new();
+
+        for (i, line) in content.lines().enumerate() {
+            if results.len() >= 30 {
+                break;
+            }
+            if re.is_match(line) {
+                results.push(MatchResult {
+                    node_id: self.id_to_u64(id),
+                    title: title.clone(),
+                    snippet: line.to_string(),
+                    line_number: i + 1,
+                });
+            }
+        }
+
+        Ok(results)
+    }
+
+    /// Find semantically similar nodes using the reasoning index.
+    pub async fn similar(&self, node_id: &str) -> Vec<SimilarResult> {
+        let id = match self.parse_id(node_id) {
+            Ok(id) => id,
+            Err(_) => return Vec::new(),
+        };
+
+        // Reverse lookup: find all keywords that point to the reference node
+        let ref_id_u64 = self.id_to_u64(id);
+        let mut ref_keywords: Vec<String> = Vec::new();
+        for (kw, entries) in self.doc.reasoning_index.all_topic_entries() {
+            if entries.iter().any(|e| self.id_to_u64(e.node_id) == ref_id_u64) {
+                ref_keywords.push(kw.clone());
+            }
+        }
+
+        if ref_keywords.is_empty() {
+            return Vec::new();
+        }
+
+        // Find all nodes that share keywords with the reference
+        let mut candidates: HashMap<u64, (f32, Vec<String>)> = HashMap::new();
+        for kw in &ref_keywords {
+            if let Some(entries) = self.doc.reasoning_index.topic_entries(kw) {
+                for entry in entries {
+                    let cid = self.id_to_u64(entry.node_id);
+                    if cid == ref_id_u64 {
+                        continue;
+                    }
+                    let (weight, keywords) = candidates.entry(cid).or_insert((0.0, Vec::new()));
+                    *weight += entry.weight;
+                    keywords.push(kw.clone());
+                }
+            }
+        }
+
+        let mut results: Vec<SimilarResult> = candidates
+            .into_iter()
+            .filter_map(|(cid, (weight, shared))| {
+                let nav_id = self.node_id_map.get(&cid)?;
+                let title = self.doc.tree.get(*nav_id).map(|n| n.title.clone())?;
+                Some(SimilarResult {
+                    id: cid,
+                    title,
+                    relevance: weight,
+                    shared_keywords: shared,
+                })
+            })
+            .collect();
+
+        results.sort_by(|a, b| {
+            b.relevance
+                .partial_cmp(&a.relevance)
+                .unwrap_or(std::cmp::Ordering::Equal)
+        });
+        results.truncate(10);
+        results
+    }
+
+    /// Get the pre-computed overview for a section from the navigation index.
+    pub async fn section_overview(&self, node_id: &str) -> Result<String> {
+        let id = self.parse_id(node_id)?;
+        let entry = self
+            .doc
+            .nav_index
+            .get_entry(id)
+            .ok_or_else(|| Error::NodeNotFound("No nav entry for this node.".into()))?;
+        Ok(entry.overview.clone())
+    }
+
+    // -----------------------------------------------------------------------
+    // Reasoning index queries
+    // -----------------------------------------------------------------------
+
+    /// Look up topic entries for a keyword in the reasoning index.
+    pub async fn keyword_entries(&self, keyword: &str) -> Vec<TopicEntryInfo> {
+        self.doc
+            .reasoning_index
+            .topic_entries(keyword)
+            .map(|entries| {
+                entries
+                    .iter()
+                    .map(|e| TopicEntryInfo {
+                        node_id: self.id_to_u64(e.node_id),
+                        weight: e.weight,
+                        depth: e.depth,
+                    })
+                    .collect()
+            })
+            .unwrap_or_default()
+    }
+
+    /// Section summaries from the reasoning index.
+    pub async fn topic_summary(&self) -> Vec<SectionSummaryInfo> {
+        self.doc
+            .reasoning_index
+            .summary_shortcut()
+            .map(|sc| {
+                sc.section_summaries
+                    .iter()
+                    .map(|s| SectionSummaryInfo {
+                        node_id: self.id_to_u64(s.node_id),
+                        title: s.title.clone(),
+                        summary: s.summary.clone(),
+                        depth: s.depth,
+                    })
+                    .collect()
+            })
+            .unwrap_or_default()
+    }
+
+    /// Find sections related to any of the given keywords.
+    pub async fn related_sections(&self, keywords: &[String]) -> Vec<u64> {
+        let mut seen = HashSet::new();
+        let mut result = Vec::new();
+        for kw in keywords {
+            if let Some(entries) = self.doc.reasoning_index.topic_entries(kw) {
+                for entry in entries {
+                    let id = self.id_to_u64(entry.node_id);
+                    if seen.insert(id) {
+                        result.push(id);
+                    }
+                }
+            }
+        }
+        result
+    }
+}
diff --git a/vectorless-core/vectorless-primitives/src/resolve.rs b/vectorless-core/vectorless-primitives/src/resolve.rs
new file mode 100644
index 00000000..5bffe649
--- /dev/null
+++ b/vectorless-core/vectorless-primitives/src/resolve.rs
@@ -0,0 +1,267 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Target resolution — map a user-provided string to a NodeId.
+//!
+//! Extracted from `vectorless-agent/src/command.rs` (strategy layer).
+//! Command parsing stays in agent; only resolution logic lives here.
+
+use vectorless_document::{DocumentTree, NavigationIndex, NodeId};
+
+/// Strip surrounding quotes from a target string.
+///
+/// Handles straight quotes (`"`, `'`) and Unicode smart quotes.
+pub fn strip_quotes(s: &str) -> String {
+    let trimmed = s.trim();
+    let chars: Vec<char> = trimmed.chars().collect();
+    if chars.len() < 2 {
+        return trimmed.to_string();
+    }
+    let (first, last) = (chars[0], chars[chars.len() - 1]);
+    let matching = (first == '"' && last == '"')
+        || (first == '\'' && last == '\'')
+        || (first == '\u{201c}' && last == '\u{201d}')
+        || (first == '\u{2018}' && last == '\u{2019}');
+    if matching {
+        trimmed[chars[0].len_utf8()..trimmed.len() - chars[chars.len() - 1].len_utf8()].to_string()
+    } else {
+        trimmed.to_string()
+    }
+}
+
+/// Resolve a target string to a NodeId using multi-level matching.
+///
+/// Matching priority:
+/// 1. Exact title match
+/// 2. Case-insensitive title match
+/// 3. Substring (contains) match
+/// 4. Numeric index match ("1" → first child, "2" → second, etc.)
+pub fn resolve_target(
+    target: &str,
+    nav_index: &NavigationIndex,
+    current_node: NodeId,
+) -> Option<NodeId> {
+    let target = strip_quotes(target);
+    let routes = nav_index.get_child_routes(current_node)?;
+
+    // 1. Exact match
+    if let Some(r) = routes.iter().find(|r| r.title == target) {
+        return Some(r.node_id);
+    }
+
+    // 2. Case-insensitive match
+    let target_lower = target.to_lowercase();
+    if let Some(r) = routes
+        .iter()
+        .find(|r| r.title.to_lowercase() == target_lower)
+    {
+        return Some(r.node_id);
+    }
+
+    // 3. Substring (contains) match
+    if let Some(r) = routes
+        .iter()
+        .find(|r| r.title.to_lowercase().contains(&target_lower))
+    {
+        return Some(r.node_id);
+    }
+
+    // 4. Numeric index match ("1" → first child)
+    if let Ok(idx) = target.parse::<usize>() {
+        if idx > 0 && idx <= routes.len() {
+            return Some(routes[idx - 1].node_id);
+        }
+    }
+
+    None
+}
+
+/// Resolve a target with additional context from tree node titles.
+///
+/// Matching priority:
+/// 1. Direct children via NavigationIndex (exact, case-insensitive, substring, numeric)
+/// 2. Direct children via TreeNode titles (case-insensitive contains)
+/// 3. Deep descendant search (BFS, up to depth 4)
+pub fn resolve_target_extended(
+    target: &str,
+    nav_index: &NavigationIndex,
+    current_node: NodeId,
+    tree: &DocumentTree,
+) -> Option<NodeId> {
+    let target = strip_quotes(target);
+    // Try the primary resolver first
+    if let Some(id) = resolve_target(&target, nav_index, current_node) {
+        return Some(id);
+    }
+
+    let target_lower = target.to_lowercase();
+
+    // Extended: check all direct children by their TreeNode titles
+    for child_id in tree.children_iter(current_node) {
+        if let Some(node) = tree.get(child_id) {
+            if node.title.to_lowercase().contains(&target_lower) {
+                return Some(child_id);
+            }
+        }
+    }
+
+    // Deep search: BFS through descendants up to depth 4.
+    search_descendants(&target_lower, current_node, tree, 4)
+}
+
+/// BFS search through descendants, returning the shallowest matching NodeId.
+fn search_descendants(
+    target_lower: &str,
+    start: NodeId,
+    tree: &DocumentTree,
+    max_depth: usize,
+) -> Option<NodeId> {
+    let mut queue: Vec<(NodeId, usize)> = vec![(start, 0)];
+
+    while let Some((node_id, depth)) = queue.pop() {
+        if depth >= max_depth {
+            continue;
+        }
+        for child_id in tree.children_iter(node_id) {
+            if let Some(node) = tree.get(child_id) {
+                if node.title.to_lowercase().contains(target_lower) {
+                    return Some(child_id);
+                }
+            }
+            queue.push((child_id, depth + 1));
+        }
+    }
+
+    None
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use vectorless_document::{ChildRoute, DocumentTree, NavigationIndex};
+
+    #[test]
+    fn test_strip_quotes() {
+        assert_eq!(strip_quotes("\"hello\""), "hello");
+        assert_eq!(strip_quotes("'hello'"), "hello");
+        assert_eq!(strip_quotes("hello"), "hello");
+    }
+
+    #[test]
+    fn test_resolve_target_exact() {
+        let mut tree = DocumentTree::new("Root", "root");
+        let root = tree.root();
+        let c1 = tree.add_child(root, "Getting Started", "gs");
+
+        let mut nav = NavigationIndex::new();
+        nav.add_child_routes(
+            root,
+            vec![ChildRoute {
+                node_id: c1,
+                title: "Getting Started".into(),
+                description: "Setup".into(),
+                leaf_count: 3,
+            }],
+        );
+
+        assert_eq!(resolve_target("Getting Started", &nav, root), Some(c1));
+    }
+
+    #[test]
+    fn test_resolve_target_case_insensitive() {
+        let mut tree = DocumentTree::new("Root", "root");
+        let root = tree.root();
+        let c1 = tree.add_child(root, "Getting Started", "gs");
+
+        let mut nav = NavigationIndex::new();
+        nav.add_child_routes(
+            root,
+            vec![ChildRoute {
+                node_id: c1,
+                title: "Getting Started".into(),
+                description: "Setup".into(),
+                leaf_count: 3,
+            }],
+        );
+
+        assert_eq!(resolve_target("getting started", &nav, root), Some(c1));
+    }
+
+    #[test]
+    fn test_resolve_target_numeric() {
+        let mut tree = DocumentTree::new("Root", "root");
+        let root = tree.root();
+        let c1 = tree.add_child(root, "First", "1");
+        let c2 = tree.add_child(root, "Second", "2");
+
+        let mut nav = NavigationIndex::new();
+        nav.add_child_routes(
+            root,
+            vec![
+                ChildRoute {
+                    node_id: c1,
+                    title: "First".into(),
+                    description: "1".into(),
+                    leaf_count: 1,
+                },
+                ChildRoute {
+                    node_id: c2,
+                    title: "Second".into(),
+                    description: "2".into(),
+                    leaf_count: 1,
+                },
+            ],
+        );
+
+        assert_eq!(resolve_target("1", &nav, root), Some(c1));
+        assert_eq!(resolve_target("2", &nav, root), Some(c2));
+        assert_eq!(resolve_target("3", &nav, root), None);
+    }
+
+    #[test]
+    fn test_resolve_target_extended_deep() {
+        let mut tree = DocumentTree::new("Root", "root");
+        let root = tree.root();
+        let wrapper = tree.add_child(root, "Wrapper", "w");
+        let labs = tree.add_child(wrapper, "Research Labs", "labs");
+        let lab_b = tree.add_child(labs, "Lab B", "lb");
+
+        let mut nav = NavigationIndex::new();
+        nav.add_child_routes(
+            root,
+            vec![ChildRoute {
+                node_id: wrapper,
+                title: "Wrapper".into(),
+                description: "W".into(),
+                leaf_count: 2,
+            }],
+        );
+        nav.add_child_routes(
+            wrapper,
+            vec![ChildRoute {
+                node_id: labs,
+                title: "Research Labs".into(),
+                description: "Labs".into(),
+                leaf_count: 1,
+            }],
+        );
+        nav.add_child_routes(
+            labs,
+            vec![ChildRoute {
+                node_id: lab_b,
+                title: "Lab B".into(),
+                description: "LB".into(),
+                leaf_count: 1,
+            }],
+        );
+
+        assert_eq!(
+            resolve_target_extended("Research Labs", &nav, root, &tree),
+            Some(labs)
+        );
+        assert_eq!(
+            resolve_target_extended("Lab B", &nav, root, &tree),
+            Some(lab_b)
+        );
+    }
+}
diff --git a/vectorless-core/vectorless-primitives/src/subtree.rs b/vectorless-core/vectorless-primitives/src/subtree.rs
new file mode 100644
index 00000000..8c729925
--- /dev/null
+++ b/vectorless-core/vectorless-primitives/src/subtree.rs
@@ -0,0 +1,21 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Subtree traversal helpers.
+
+use vectorless_document::{DocumentTree, NodeId};
+
+/// Collect all NodeIds in the subtree rooted at `node` (inclusive), via DFS.
+pub fn collect_subtree(node: NodeId, tree: &DocumentTree) -> Vec<NodeId> {
+    let mut result = vec![node];
+    let mut stack = vec![node];
+
+    while let Some(current) = stack.pop() {
+        for child in tree.children_iter(current) {
+            result.push(child);
+            stack.push(child);
+        }
+    }
+
+    result
+}
diff --git a/vectorless-core/vectorless-primitives/src/types.rs b/vectorless-core/vectorless-primitives/src/types.rs
new file mode 100644
index 00000000..4bc37123
--- /dev/null
+++ b/vectorless-core/vectorless-primitives/src/types.rs
@@ -0,0 +1,187 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Return types for document navigation primitives.
+
+/// Information about a node in the document tree.
+#[derive(Debug, Clone)]
+pub struct NodeInfo {
+    /// Numeric identifier (usable as `"n{id}"` in Python).
+    pub id: u64,
+    /// Section title.
+    pub title: String,
+    /// Depth in the tree (0 = root).
+    pub depth: usize,
+    /// Number of direct children.
+    pub child_count: usize,
+    /// Number of leaf descendants.
+    pub leaf_count: usize,
+    /// Questions this subtree can answer.
+    pub question_hints: Vec<String>,
+    /// Topic tags for routing.
+    pub topic_tags: Vec<String>,
+}
+
+/// A regex match within a node's content.
+#[derive(Debug, Clone)]
+pub struct MatchResult {
+    /// Node containing the match.
+    pub node_id: u64,
+    /// Title of the matched node.
+    pub title: String,
+    /// The matching line of content.
+    pub snippet: String,
+    /// 1-based line number within the node's content.
+    pub line_number: usize,
+}
+
+/// A node found by title or content search.
+#[derive(Debug, Clone)]
+pub struct FindResult {
+    /// Numeric identifier.
+    pub node_id: u64,
+    /// Section title.
+    pub title: String,
+    /// Depth in the tree.
+    pub depth: usize,
+    /// Number of leaf descendants.
+    pub leaf_count: usize,
+}
+
+/// Word/line/character count for a node's content.
+#[derive(Debug, Clone)]
+pub struct WordCount {
+    /// Number of lines.
+    pub lines: usize,
+    /// Number of whitespace-separated words.
+    pub words: usize,
+    /// Number of characters.
+    pub chars: usize,
+}
+
+/// Evidence collected from a node during navigation.
+#[derive(Debug, Clone)]
+pub struct CollectedEvidence {
+    /// Node the evidence was collected from.
+    pub node_id: u64,
+    /// Title of the node.
+    pub title: String,
+    /// Full content of the node.
+    pub content: String,
+    /// Navigation path (e.g., "root / Chapter 1 / Section 1.2").
+    pub source_path: String,
+}
+
+/// A topic entry from the reasoning index.
+#[derive(Debug, Clone)]
+pub struct TopicEntryInfo {
+    /// Node associated with this entry.
+    pub node_id: u64,
+    /// Relevance weight.
+    pub weight: f32,
+    /// Depth in the tree.
+    pub depth: usize,
+}
+
+/// A section summary from the reasoning index.
+#[derive(Debug, Clone)]
+pub struct SectionSummaryInfo {
+    /// Node this summary belongs to.
+    pub node_id: u64,
+    /// Section title.
+    pub title: String,
+    /// LLM-generated summary.
+    pub summary: String,
+    /// Depth in the tree.
+    pub depth: usize,
+}
+
+// ---------------------------------------------------------------------------
+// P1: New types for extended agent tools
+// ---------------------------------------------------------------------------
+
+/// A single entry in the table of contents.
+#[derive(Debug, Clone)]
+pub struct TocEntry {
+    /// Numeric identifier (usable as `"n{id}"` in Python).
+    pub id: u64,
+    /// Section title.
+    pub title: String,
+    /// Depth in the tree (1 = top-level section, 0 = root which is skipped).
+    pub depth: usize,
+    /// Number of direct children.
+    pub child_count: usize,
+}
+
+/// Statistics about a single node.
+#[derive(Debug, Clone)]
+pub struct NodeStats {
+    /// Numeric identifier.
+    pub id: u64,
+    /// Section title.
+    pub title: String,
+    /// Depth in the tree.
+    pub depth: usize,
+    /// Number of direct children.
+    pub child_count: usize,
+    /// Number of leaf descendants.
+    pub leaf_count: usize,
+    /// Character count of the node's content.
+    pub char_count: usize,
+    /// Word count of the node's content.
+    pub word_count: usize,
+    /// Whether this node has no children.
+    pub is_leaf: bool,
+}
+
+/// A node found by semantic similarity.
+#[derive(Debug, Clone)]
+pub struct SimilarResult {
+    /// Numeric identifier.
+    pub id: u64,
+    /// Section title.
+    pub title: String,
+    /// Combined relevance score.
+    pub relevance: f32,
+    /// Keywords shared with the reference node.
+    pub shared_keywords: Vec<String>,
+}
+
+/// One top-level section in a [`DocCardInfo`].
+#[derive(Debug, Clone)]
+pub struct SectionCardInfo {
+    /// Section title.
+    pub title: String,
+    /// One-sentence description of this section.
+    pub description: String,
+    /// Number of leaf nodes in this section's subtree.
+    pub leaf_count: usize,
+}
+
+/// Document-level overview card.
+#[derive(Debug, Clone)]
+pub struct DocCardInfo {
+    /// Document title.
+    pub title: String,
+    /// Document overview summary.
+    pub overview: String,
+    /// Questions this document can answer.
+    pub question_hints: Vec<String>,
+    /// Topic keywords.
+    pub topic_tags: Vec<String>,
+    /// Top-level section summaries.
+    pub sections: Vec<SectionCardInfo>,
+    /// Total leaf nodes in the document.
+    pub total_leaves: usize,
+}
+
+/// A key concept extracted from the document.
+#[derive(Debug, Clone)]
+pub struct ConceptInfo {
+    /// Concept name (e.g., "capacitor derating").
+    pub name: String,
+    /// One-sentence explanation.
+    pub summary: String,
+    /// Which sections this concept appears in.
+    pub sections: Vec<String>,
+}
diff --git a/vectorless-core/vectorless-py/Cargo.toml b/vectorless-core/vectorless-py/Cargo.toml
index 967540ce..9c7b66a1 100644
--- a/vectorless-core/vectorless-py/Cargo.toml
+++ b/vectorless-core/vectorless-py/Cargo.toml
@@ -15,8 +15,9 @@ crate-type = ["cdylib"]
 [dependencies]
 pyo3 = { workspace = true }
 pyo3-async-runtimes = { workspace = true }
-tokio = { version = "1", features = ["rt-multi-thread"] }
+tokio = { version = "1", features = ["rt-multi-thread", "sync"] }
 vectorless-engine = { path = "../vectorless-engine" }
+vectorless-primitives = { path = "../vectorless-primitives" }
 
 [lints]
 workspace = true
diff --git a/vectorless-core/vectorless-py/src/document.rs b/vectorless-core/vectorless-py/src/document.rs
index af200d02..38959868 100644
--- a/vectorless-core/vectorless-py/src/document.rs
+++ b/vectorless-core/vectorless-py/src/document.rs
@@ -1,16 +1,30 @@
 // Copyright (c) 2026 vectorless developers
 // SPDX-License-Identifier: Apache-2.0
 
-//! DocumentInfo Python wrapper.
+//! Document types for Python bindings.
+
+use std::sync::Arc;
 
 use pyo3::prelude::*;
+use pyo3_async_runtimes::tokio::future_into_py;
+use tokio::sync::Mutex;
+
+use vectorless_primitives::{
+    CollectedEvidence, ConceptInfo, DocCardInfo, DocumentNavigator, FindResult, MatchResult,
+    NodeInfo, NodeStats, SectionCardInfo, SectionSummaryInfo, SimilarResult, TocEntry,
+    TopicEntryInfo, WordCount,
+};
 
-use ::vectorless_engine::DocumentInfo;
+use super::error::VectorlessError;
+
+// =========================================================================
+// PyDocumentInfo (existing — returned by ingest)
+// =========================================================================
 
 /// Information about an understood document.
 #[pyclass(name = "DocumentInfo")]
 pub struct PyDocumentInfo {
-    pub(crate) inner: DocumentInfo,
+    pub(crate) inner: vectorless_engine::DocumentInfo,
 }
 
 #[pymethods]
@@ -76,3 +90,868 @@ pub struct PyConcept {
     #[pyo3(get)]
     pub sections: Vec<String>,
 }
+
+// =========================================================================
+// PyDocument — full navigable document
+// =========================================================================
+
+/// A navigable document with cursor state, evidence collection, and search.
+///
+/// All methods are **async** — use `await` to call them.
+///
+/// ```python
+/// doc = await engine.load_document(doc_id)
+/// children = await doc.ls()
+/// await doc.cd(children[0].id)
+/// print(await doc.pwd())
+/// print(await doc.cat(None))
+/// ```
+#[pyclass(name = "Document")]
+pub struct PyDocument {
+    inner: Arc<Mutex<DocumentNavigator>>,
+}
+
+impl PyDocument {
+    /// Create a PyDocument from a DocumentNavigator.
+    pub fn from_navigator(nav: DocumentNavigator) -> Self {
+        Self {
+            inner: Arc::new(Mutex::new(nav)),
+        }
+    }
+}
+
+// Helper: convert u64 id to Python string "n{id}"
+fn id_to_str(id: u64) -> String {
+    format!("n{id}")
+}
+
+fn to_py_err(e: impl std::fmt::Display) -> PyErr {
+    PyErr::from(VectorlessError::new(e.to_string(), "navigation"))
+}
+
+#[pymethods]
+impl PyDocument {
+    // ── Navigation ──────────────────────────────────────────────────────
+
+    /// List children of the current node.
+    fn ls<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let nav = nav.lock().await;
+            let result = nav.ls().await;
+            Ok(result.into_iter().map(PyNodeInfo::from).collect::<Vec<_>>())
+        })
+    }
+
+    /// Navigate to a node by its id string (e.g., "n42").
+    fn cd<'py>(&self, py: Python<'py>, node_id: String) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let mut nav = nav.lock().await;
+            nav.cd(&node_id).await.map_err(to_py_err)
+        })
+    }
+
+    /// Navigate to a child by title (fuzzy matching).
+    fn cd_by_title<'py>(&self, py: Python<'py>, title: String) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let mut nav = nav.lock().await;
+            nav.cd_by_title(&title).await.map_err(to_py_err)
+        })
+    }
+
+    /// Navigate up to the parent node.
+    fn cd_up<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let mut nav = nav.lock().await;
+            nav.cd_up().await.map_err(to_py_err)
+        })
+    }
+
+    /// Navigate back to the root node.
+    fn cd_root<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let mut nav = nav.lock().await;
+            nav.cd_root().await;
+            Ok(())
+        })
+    }
+
+    /// Return the current navigation path.
+    fn pwd<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let nav = nav.lock().await;
+            Ok(nav.pwd().await)
+        })
+    }
+
+    // ── Content ─────────────────────────────────────────────────────────
+
+    /// Read node content and collect as evidence. None = current node.
+    #[pyo3(signature = (node_id=None))]
+    fn cat<'py>(&self, py: Python<'py>, node_id: Option<String>) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let mut nav = nav.lock().await;
+            nav.cat(node_id.as_deref()).await.map_err(to_py_err)
+        })
+    }
+
+    /// Regex search across the current subtree. Returns up to 30 matches.
+    fn grep<'py>(&self, py: Python<'py>, pattern: String) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let nav = nav.lock().await;
+            nav.grep(&pattern)
+                .await
+                .map(|r| r.into_iter().map(PyMatchResult::from).collect::<Vec<_>>())
+                .map_err(to_py_err)
+        })
+    }
+
+    /// Search for nodes by keyword in title or content (case-insensitive).
+    fn find<'py>(&self, py: Python<'py>, keyword: String) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let nav = nav.lock().await;
+            Ok(nav
+                .find(&keyword)
+                .await
+                .into_iter()
+                .map(PyFindResult::from)
+                .collect::<Vec<_>>())
+        })
+    }
+
+    /// Preview the first N lines of a node without collecting evidence.
+    #[pyo3(signature = (node_id=None, n=10))]
+    fn head<'py>(
+        &self,
+        py: Python<'py>,
+        node_id: Option<String>,
+        n: usize,
+    ) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let nav = nav.lock().await;
+            nav.head(node_id.as_deref(), n).await.map_err(to_py_err)
+        })
+    }
+
+    /// Count lines, words, and characters in a node's content.
+    #[pyo3(signature = (node_id=None))]
+    fn wc<'py>(&self, py: Python<'py>, node_id: Option<String>) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let nav = nav.lock().await;
+            nav.wc(node_id.as_deref())
+                .await
+                .map(PyWordCount::from)
+                .map_err(to_py_err)
+        })
+    }
+
+    // ── Metadata ────────────────────────────────────────────────────────
+
+    /// Document-level summary.
+    fn summary<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let nav = nav.lock().await;
+            Ok(nav.summary().await.to_string())
+        })
+    }
+
+    /// Number of sections in the tree.
+    fn section_count<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let nav = nav.lock().await;
+            Ok(nav.section_count().await)
+        })
+    }
+
+    /// Document ID.
+    fn doc_id<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let nav = nav.lock().await;
+            Ok(nav.doc_id().await.to_string())
+        })
+    }
+
+    /// Document name.
+    fn doc_name<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let nav = nav.lock().await;
+            Ok(nav.doc_name().await.to_string())
+        })
+    }
+
+    // ── Reasoning Index ─────────────────────────────────────────────────
+
+    /// Look up topic entries for a keyword.
+    fn keyword_entries<'py>(
+        &self,
+        py: Python<'py>,
+        keyword: String,
+    ) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let nav = nav.lock().await;
+            Ok(nav
+                .keyword_entries(&keyword)
+                .await
+                .into_iter()
+                .map(PyTopicEntry::from)
+                .collect::<Vec<_>>())
+        })
+    }
+
+    /// Section summaries from the reasoning index.
+    fn topic_summary<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let nav = nav.lock().await;
+            Ok(nav
+                .topic_summary()
+                .await
+                .into_iter()
+                .map(PySectionSummary::from)
+                .collect::<Vec<_>>())
+        })
+    }
+
+    /// Find sections related to any of the given keywords.
+    fn related_sections<'py>(
+        &self,
+        py: Python<'py>,
+        keywords: Vec<String>,
+    ) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let nav = nav.lock().await;
+            Ok(nav
+                .related_sections(&keywords)
+                .await
+                .into_iter()
+                .map(id_to_str)
+                .collect::<Vec<_>>())
+        })
+    }
+
+    // ── Evidence ────────────────────────────────────────────────────────
+
+    /// Explicitly collect evidence from a node.
+    fn collect_evidence<'py>(
+        &self,
+        py: Python<'py>,
+        node_id: String,
+    ) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let mut nav = nav.lock().await;
+            nav.collect_evidence(&node_id).await.map_err(to_py_err)
+        })
+    }
+
+    /// Return all collected evidence.
+    fn evidence<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let nav = nav.lock().await;
+            Ok(nav
+                .evidence()
+                .await
+                .iter()
+                .cloned()
+                .map(PyCollectedEvidence::from)
+                .collect::<Vec<_>>())
+        })
+    }
+
+    /// Clear all collected evidence.
+    fn clear_evidence<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let mut nav = nav.lock().await;
+            nav.clear_evidence().await;
+            Ok(())
+        })
+    }
+
+    // ── Tree inspection ─────────────────────────────────────────────────
+
+    /// Root node id.
+    fn root_id<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let nav = nav.lock().await;
+            Ok(id_to_str(nav.root_id().await))
+        })
+    }
+
+    /// Current cursor node id.
+    fn current_id<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let nav = nav.lock().await;
+            Ok(id_to_str(nav.current_id().await))
+        })
+    }
+
+    /// List children of an arbitrary node.
+    fn children_of<'py>(&self, py: Python<'py>, node_id: String) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let nav = nav.lock().await;
+            nav.children_of(&node_id)
+                .await
+                .map(|r| r.into_iter().map(PyNodeInfo::from).collect::<Vec<_>>())
+                .map_err(to_py_err)
+        })
+    }
+
+    /// Parent of a node.
+    fn parent_of<'py>(&self, py: Python<'py>, node_id: String) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let nav = nav.lock().await;
+            nav.parent_of(&node_id)
+                .await
+                .map(|opt| opt.map(id_to_str))
+                .map_err(to_py_err)
+        })
+    }
+
+    /// Depth of a node in the tree.
+    fn depth_of<'py>(&self, py: Python<'py>, node_id: String) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let nav = nav.lock().await;
+            nav.depth_of(&node_id).await.map_err(to_py_err)
+        })
+    }
+
+    /// Title of a node.
+    fn node_title<'py>(&self, py: Python<'py>, node_id: String) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let nav = nav.lock().await;
+            nav.node_title(&node_id).await.map_err(to_py_err)
+        })
+    }
+
+    /// All node ids in the tree.
+    fn all_node_ids<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let nav = nav.lock().await;
+            Ok(nav
+                .all_node_ids()
+                .await
+                .into_iter()
+                .map(id_to_str)
+                .collect::<Vec<_>>())
+        })
+    }
+
+    // ── P1: Extended tools ────────────────────────────────────────────
+
+    /// Go back to the previous position (navigation history).
+    fn back<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let mut nav = nav.lock().await;
+            nav.back().await.map_err(to_py_err)
+        })
+    }
+
+    /// Return the full table of contents.
+    fn toc<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let nav = nav.lock().await;
+            Ok(nav
+                .toc()
+                .await
+                .into_iter()
+                .map(PyTocEntry::from)
+                .collect::<Vec<_>>())
+        })
+    }
+
+    /// Get statistics about a node (or current node if None).
+    #[pyo3(signature = (node_id=None))]
+    fn stats<'py>(&self, py: Python<'py>, node_id: Option<String>) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let nav = nav.lock().await;
+            nav.stats(node_id.as_deref())
+                .await
+                .map(PyNodeStats::from)
+                .map_err(to_py_err)
+        })
+    }
+
+    /// Search within a specific node's content (no cursor movement).
+    fn grep_node<'py>(
+        &self,
+        py: Python<'py>,
+        node_id: String,
+        pattern: String,
+    ) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let nav = nav.lock().await;
+            nav.grep_node(&node_id, &pattern)
+                .await
+                .map(|r| r.into_iter().map(PyMatchResult::from).collect::<Vec<_>>())
+                .map_err(to_py_err)
+        })
+    }
+
+    /// Find semantically similar nodes.
+    fn similar<'py>(&self, py: Python<'py>, node_id: String) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let nav = nav.lock().await;
+            Ok(nav
+                .similar(&node_id)
+                .await
+                .into_iter()
+                .map(PySimilarResult::from)
+                .collect::<Vec<_>>())
+        })
+    }
+
+    /// Get the pre-computed overview for a section.
+    fn section_overview<'py>(
+        &self,
+        py: Python<'py>,
+        node_id: String,
+    ) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let nav = nav.lock().await;
+            nav.section_overview(&node_id).await.map_err(to_py_err)
+        })
+    }
+
+    /// List sibling nodes at the same level as a given node.
+    fn siblings<'py>(
+        &self,
+        py: Python<'py>,
+        node_id: Option<String>,
+    ) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let nav = nav.lock().await;
+            nav.siblings(node_id.as_deref())
+                .await
+                .map(|v| v.into_iter().map(PyNodeInfo::from).collect::<Vec<_>>())
+                .map_err(to_py_err)
+        })
+    }
+
+    /// List ancestors from root to a given node, inclusive.
+    fn ancestors<'py>(
+        &self,
+        py: Python<'py>,
+        node_id: Option<String>,
+    ) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let nav = nav.lock().await;
+            nav.ancestors(node_id.as_deref())
+                .await
+                .map(|v| v.into_iter().map(PyNodeInfo::from).collect::<Vec<_>>())
+                .map_err(to_py_err)
+        })
+    }
+
+    /// Document-level overview card.
+    fn doc_card<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let nav = nav.lock().await;
+            Ok(nav.doc_card().await.map(PyDocCard::from))
+        })
+    }
+
+    /// Key concepts extracted from the document.
+    fn concepts<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let nav = nav.lock().await;
+            Ok(nav
+                .concepts()
+                .await
+                .into_iter()
+                .map(PyConceptInfo::from)
+                .collect::<Vec<_>>())
+        })
+    }
+
+    /// Find a section by exact title (case-insensitive).
+    fn find_section<'py>(&self, py: Python<'py>, title: String) -> PyResult<Bound<'py, PyAny>> {
+        let nav = Arc::clone(&self.inner);
+        future_into_py(py, async move {
+            let nav = nav.lock().await;
+            Ok(nav.find_section(&title).await.map(PyFindResult::from))
+        })
+    }
+}
+
+// =========================================================================
+// Helper types
+// =========================================================================
+
+/// Information about a node in the document tree.
+#[pyclass(name = "NodeInfo")]
+#[derive(Clone)]
+pub struct PyNodeInfo {
+    #[pyo3(get)]
+    pub id: String,
+    #[pyo3(get)]
+    pub title: String,
+    #[pyo3(get)]
+    pub depth: usize,
+    #[pyo3(get)]
+    pub child_count: usize,
+    #[pyo3(get)]
+    pub leaf_count: usize,
+    #[pyo3(get)]
+    pub question_hints: Vec<String>,
+    #[pyo3(get)]
+    pub topic_tags: Vec<String>,
+}
+
+impl From<NodeInfo> for PyNodeInfo {
+    fn from(v: NodeInfo) -> Self {
+        Self {
+            id: id_to_str(v.id),
+            title: v.title,
+            depth: v.depth,
+            child_count: v.child_count,
+            leaf_count: v.leaf_count,
+            question_hints: v.question_hints,
+            topic_tags: v.topic_tags,
+        }
+    }
+}
+
+/// A regex match within node content.
+#[pyclass(name = "MatchResult")]
+#[derive(Clone)]
+pub struct PyMatchResult {
+    #[pyo3(get)]
+    pub node_id: String,
+    #[pyo3(get)]
+    pub title: String,
+    #[pyo3(get)]
+    pub snippet: String,
+    #[pyo3(get)]
+    pub line_number: usize,
+}
+
+impl From<MatchResult> for PyMatchResult {
+    fn from(v: MatchResult) -> Self {
+        Self {
+            node_id: id_to_str(v.node_id),
+            title: v.title,
+            snippet: v.snippet,
+            line_number: v.line_number,
+        }
+    }
+}
+
+/// A node found by search.
+#[pyclass(name = "FindResult")]
+#[derive(Clone)]
+pub struct PyFindResult {
+    #[pyo3(get)]
+    pub node_id: String,
+    #[pyo3(get)]
+    pub title: String,
+    #[pyo3(get)]
+    pub depth: usize,
+    #[pyo3(get)]
+    pub leaf_count: usize,
+}
+
+impl From<FindResult> for PyFindResult {
+    fn from(v: FindResult) -> Self {
+        Self {
+            node_id: id_to_str(v.node_id),
+            title: v.title,
+            depth: v.depth,
+            leaf_count: v.leaf_count,
+        }
+    }
+}
+
+/// Word/line/character count.
+#[pyclass(name = "WordCount")]
+#[derive(Clone)]
+pub struct PyWordCount {
+    #[pyo3(get)]
+    pub lines: usize,
+    #[pyo3(get)]
+    pub words: usize,
+    #[pyo3(get)]
+    pub chars: usize,
+}
+
+impl From<WordCount> for PyWordCount {
+    fn from(v: WordCount) -> Self {
+        Self {
+            lines: v.lines,
+            words: v.words,
+            chars: v.chars,
+        }
+    }
+}
+
+/// Evidence collected during navigation.
+#[pyclass(name = "CollectedEvidence")]
+#[derive(Clone)]
+pub struct PyCollectedEvidence {
+    #[pyo3(get)]
+    pub node_id: String,
+    #[pyo3(get)]
+    pub title: String,
+    #[pyo3(get)]
+    pub content: String,
+    #[pyo3(get)]
+    pub source_path: String,
+}
+
+impl From<CollectedEvidence> for PyCollectedEvidence {
+    fn from(v: CollectedEvidence) -> Self {
+        Self {
+            node_id: id_to_str(v.node_id),
+            title: v.title,
+            content: v.content,
+            source_path: v.source_path,
+        }
+    }
+}
+
+/// A topic entry from the reasoning index.
+#[pyclass(name = "TopicEntry")]
+#[derive(Clone)]
+pub struct PyTopicEntry {
+    #[pyo3(get)]
+    pub node_id: String,
+    #[pyo3(get)]
+    pub weight: f32,
+    #[pyo3(get)]
+    pub depth: usize,
+}
+
+impl From<TopicEntryInfo> for PyTopicEntry {
+    fn from(v: TopicEntryInfo) -> Self {
+        Self {
+            node_id: id_to_str(v.node_id),
+            weight: v.weight,
+            depth: v.depth,
+        }
+    }
+}
+
+/// A section summary from the reasoning index.
+#[pyclass(name = "SectionSummary")]
+#[derive(Clone)]
+pub struct PySectionSummary {
+    #[pyo3(get)]
+    pub node_id: String,
+    #[pyo3(get)]
+    pub title: String,
+    #[pyo3(get)]
+    pub summary: String,
+    #[pyo3(get)]
+    pub depth: usize,
+}
+
+impl From<SectionSummaryInfo> for PySectionSummary {
+    fn from(v: SectionSummaryInfo) -> Self {
+        Self {
+            node_id: id_to_str(v.node_id),
+            title: v.title,
+            summary: v.summary,
+            depth: v.depth,
+        }
+    }
+}
+
+// =========================================================================
+// P1: New helper types
+// =========================================================================
+
+/// A single entry in the table of contents.
+#[pyclass(name = "TocEntry")]
+#[derive(Clone)]
+pub struct PyTocEntry {
+    #[pyo3(get)]
+    pub id: String,
+    #[pyo3(get)]
+    pub title: String,
+    #[pyo3(get)]
+    pub depth: usize,
+    #[pyo3(get)]
+    pub child_count: usize,
+}
+
+impl From<TocEntry> for PyTocEntry {
+    fn from(v: TocEntry) -> Self {
+        Self {
+            id: id_to_str(v.id),
+            title: v.title,
+            depth: v.depth,
+            child_count: v.child_count,
+        }
+    }
+}
+
+/// Statistics about a node.
+#[pyclass(name = "NodeStats")]
+#[derive(Clone)]
+pub struct PyNodeStats {
+    #[pyo3(get)]
+    pub id: String,
+    #[pyo3(get)]
+    pub title: String,
+    #[pyo3(get)]
+    pub depth: usize,
+    #[pyo3(get)]
+    pub child_count: usize,
+    #[pyo3(get)]
+    pub leaf_count: usize,
+    #[pyo3(get)]
+    pub char_count: usize,
+    #[pyo3(get)]
+    pub word_count: usize,
+    #[pyo3(get)]
+    pub is_leaf: bool,
+}
+
+impl From<NodeStats> for PyNodeStats {
+    fn from(v: NodeStats) -> Self {
+        Self {
+            id: id_to_str(v.id),
+            title: v.title,
+            depth: v.depth,
+            child_count: v.child_count,
+            leaf_count: v.leaf_count,
+            char_count: v.char_count,
+            word_count: v.word_count,
+            is_leaf: v.is_leaf,
+        }
+    }
+}
+
+/// A node found by semantic similarity.
+#[pyclass(name = "SimilarResult")]
+#[derive(Clone)]
+pub struct PySimilarResult {
+    #[pyo3(get)]
+    pub id: String,
+    #[pyo3(get)]
+    pub title: String,
+    #[pyo3(get)]
+    pub relevance: f32,
+    #[pyo3(get)]
+    pub shared_keywords: Vec<String>,
+}
+
+impl From<SimilarResult> for PySimilarResult {
+    fn from(v: SimilarResult) -> Self {
+        Self {
+            id: id_to_str(v.id),
+            title: v.title,
+            relevance: v.relevance,
+            shared_keywords: v.shared_keywords,
+        }
+    }
+}
+
+/// A top-level section in a document card.
+#[pyclass(name = "SectionCard")]
+#[derive(Clone)]
+pub struct PySectionCard {
+    #[pyo3(get)]
+    pub title: String,
+    #[pyo3(get)]
+    pub description: String,
+    #[pyo3(get)]
+    pub leaf_count: usize,
+}
+
+impl From<SectionCardInfo> for PySectionCard {
+    fn from(v: SectionCardInfo) -> Self {
+        Self {
+            title: v.title,
+            description: v.description,
+            leaf_count: v.leaf_count,
+        }
+    }
+}
+
+/// Document-level overview card.
+#[pyclass(name = "DocCard")]
+#[derive(Clone)]
+pub struct PyDocCard {
+    #[pyo3(get)]
+    pub title: String,
+    #[pyo3(get)]
+    pub overview: String,
+    #[pyo3(get)]
+    pub question_hints: Vec<String>,
+    #[pyo3(get)]
+    pub topic_tags: Vec<String>,
+    #[pyo3(get)]
+    pub sections: Vec<PySectionCard>,
+    #[pyo3(get)]
+    pub total_leaves: usize,
+}
+
+impl From<DocCardInfo> for PyDocCard {
+    fn from(v: DocCardInfo) -> Self {
+        Self {
+            title: v.title,
+            overview: v.overview,
+            question_hints: v.question_hints,
+            topic_tags: v.topic_tags,
+            sections: v.sections.into_iter().map(PySectionCard::from).collect(),
+            total_leaves: v.total_leaves,
+        }
+    }
+}
+
+/// A key concept extracted from the document.
+#[pyclass(name = "ConceptInfo")]
+#[derive(Clone)]
+pub struct PyConceptInfo {
+    #[pyo3(get)]
+    pub name: String,
+    #[pyo3(get)]
+    pub summary: String,
+    #[pyo3(get)]
+    pub sections: Vec<String>,
+}
+
+impl From<ConceptInfo> for PyConceptInfo {
+    fn from(v: ConceptInfo) -> Self {
+        Self {
+            name: v.name,
+            summary: v.summary,
+            sections: v.sections,
+        }
+    }
+}
diff --git a/vectorless-core/vectorless-py/src/engine.rs b/vectorless-core/vectorless-py/src/engine.rs
index 66c4e30c..da6e7949 100644
--- a/vectorless-core/vectorless-py/src/engine.rs
+++ b/vectorless-core/vectorless-py/src/engine.rs
@@ -11,7 +11,7 @@ use tokio::runtime::Runtime;
 use ::vectorless_engine::{Engine, EngineBuilder, IngestInput};
 
 use super::answer::PyAnswer;
-use super::document::PyDocumentInfo;
+use super::document::{PyDocument, PyDocumentInfo};
 use super::error::VectorlessError;
 use super::error::to_py_err;
 use super::graph::PyDocumentGraph;
@@ -55,6 +55,24 @@ async fn run_clear(engine: Arc<Engine>) -> PyResult<usize> {
     engine.clear().await.map_err(to_py_err)
 }
 
+async fn run_load_document(engine: Arc<Engine>, doc_id: String) -> PyResult<PyDocument> {
+    let doc = engine.load_document(&doc_id).await.map_err(to_py_err)?;
+    match doc {
+        Some(d) => {
+            let navigator = vectorless_primitives::DocumentNavigator::new(d);
+            Ok(PyDocument::from_navigator(navigator))
+        }
+        None => Err(PyErr::from(VectorlessError::new(
+            format!("Document not found: {doc_id}"),
+            "navigation",
+        ))),
+    }
+}
+
+async fn run_list_document_ids(engine: Arc<Engine>) -> PyResult<Vec<String>> {
+    engine.list_document_ids().await.map_err(to_py_err)
+}
+
 async fn run_get_graph(engine: Arc<Engine>) -> PyResult<Option<PyDocumentGraph>> {
     let graph = engine.get_graph().await.map_err(to_py_err)?;
     Ok(graph.map(|g| PyDocumentGraph { inner: g }))
@@ -226,6 +244,23 @@ impl PyEngine {
         future_into_py(py, run_exists(engine, doc_id))
     }
 
+    /// Load a full navigable Document by ID.
+    ///
+    /// Returns a Document with navigation methods (ls, cd, cat, grep, find, etc.).
+    ///
+    /// Raises:
+    ///     VectorlessError: If the document is not found or loading fails.
+    fn load_document<'py>(&self, py: Python<'py>, doc_id: String) -> PyResult<Bound<'py, PyAny>> {
+        let engine = Arc::clone(&self.inner);
+        future_into_py(py, run_load_document(engine, doc_id))
+    }
+
+    /// List all document IDs in the workspace.
+    fn list_document_ids<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        let engine = Arc::clone(&self.inner);
+        future_into_py(py, run_list_document_ids(engine))
+    }
+
     /// Remove all documents.
     ///
     /// Returns:
diff --git a/vectorless-core/vectorless-py/src/lib.rs b/vectorless-core/vectorless-py/src/lib.rs
index e6ad77ea..9d1f8b03 100644
--- a/vectorless-core/vectorless-py/src/lib.rs
+++ b/vectorless-core/vectorless-py/src/lib.rs
@@ -15,7 +15,11 @@ mod metrics;
 
 use answer::{PyAnswer, PyEvidence, PyReasoningTrace, PyTraceStep};
 use config::PyConfig;
-use document::{PyConcept, PyDocumentInfo};
+use document::{
+    PyCollectedEvidence, PyConcept, PyConceptInfo, PyDocCard, PyDocument, PyDocumentInfo,
+    PyFindResult, PyMatchResult, PyNodeInfo, PyNodeStats, PySectionCard, PySectionSummary,
+    PySimilarResult, PyTocEntry, PyTopicEntry, PyWordCount,
+};
 use engine::PyEngine;
 use error::VectorlessError;
 use graph::{PyDocumentGraph, PyDocumentGraphNode, PyEdgeEvidence, PyGraphEdge, PyWeightedKeyword};
@@ -35,8 +39,22 @@ use metrics::{PyLlmMetricsReport, PyMetricsReport, PyRetrievalMetricsReport};
 fn _vectorless(m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_class::<VectorlessError>()?;
     m.add_class::<PyEngine>()?;
+    m.add_class::<PyDocument>()?;
     m.add_class::<PyDocumentInfo>()?;
     m.add_class::<PyConcept>()?;
+    m.add_class::<PyNodeInfo>()?;
+    m.add_class::<PyMatchResult>()?;
+    m.add_class::<PyFindResult>()?;
+    m.add_class::<PyWordCount>()?;
+    m.add_class::<PyCollectedEvidence>()?;
+    m.add_class::<PyTopicEntry>()?;
+    m.add_class::<PySectionSummary>()?;
+    m.add_class::<PyTocEntry>()?;
+    m.add_class::<PyNodeStats>()?;
+    m.add_class::<PySimilarResult>()?;
+    m.add_class::<PySectionCard>()?;
+    m.add_class::<PyDocCard>()?;
+    m.add_class::<PyConceptInfo>()?;
     m.add_class::<PyAnswer>()?;
     m.add_class::<PyEvidence>()?;
     m.add_class::<PyReasoningTrace>()?;
diff --git a/vectorless-core/vectorless-storage/src/workspace.rs b/vectorless-core/vectorless-storage/src/workspace.rs
new file mode 100644
index 00000000..adb2d4de
--- /dev/null
+++ b/vectorless-core/vectorless-storage/src/workspace.rs
@@ -0,0 +1,669 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Async workspace management for document collections.
+//!
+//! This module provides the primary workspace implementation for document
+//! persistence, using async I/O for integration with runtimes like Tokio.
+//!
+//! # Features
+//!
+//! - **Async I/O** - All operations are async for non-blocking performance
+//! - **LRU Cache** - Automatic caching with configurable size
+//! - **Thread-Safe** - Fully thread-safe with `Arc<RwLock>`
+//! - **Pluggable Backend** - Use file storage, in-memory, or custom backends
+//!
+//! # Example
+//!
+//! ```rust,ignore
+//! use vectorless::storage::Workspace;
+//!
+//! #[tokio::main]
+//! async fn main() -> Result<()> {
+//!     let workspace = Workspace::new("./workspace").await?;
+//!
+//!     // Add a document
+//!     workspace.add(&doc).await?;
+//!
+//!     // Load with caching
+//!     let loaded = workspace.load_and_cache("doc-1").await?;
+//!
+//!     Ok(())
+//! }
+//! ```
+
+use std::collections::HashMap;
+use std::path::PathBuf;
+use std::sync::Arc;
+
+use serde::{Deserialize, Serialize};
+use tokio::sync::RwLock;
+use tracing::{debug, info, warn};
+
+use super::backend::{FileBackend, StorageBackend};
+use super::cache::DocumentCache;
+use super::persistence::{PersistedDocument, load_document_from_bytes, save_document_to_bytes};
+use vectorless_error::Error;
+use vectorless_error::Result;
+
+const META_KEY: &str = "meta";
+const CATALOG_KEY: &str = "catalog";
+const DEFAULT_CACHE_SIZE: usize = 100;
+
+/// Lightweight metadata entry for the async workspace index.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct DocumentMetaEntry {
+    /// Document ID.
+    pub id: String,
+    /// Document name/title.
+    pub doc_name: String,
+    /// Document description.
+    #[serde(default)]
+    pub doc_description: Option<String>,
+    /// Document type (pdf, md, etc.).
+    pub doc_type: String,
+    /// Source file path.
+    #[serde(default)]
+    pub path: Option<String>,
+    /// Page count (for PDFs).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub page_count: Option<usize>,
+    /// Line count (for markdown).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub line_count: Option<usize>,
+}
+
+/// Options for async workspace creation.
+#[derive(Debug, Clone)]
+pub struct WorkspaceOptions {
+    /// LRU cache size (default: 100).
+    pub cache_size: usize,
+}
+
+impl Default for WorkspaceOptions {
+    fn default() -> Self {
+        Self {
+            cache_size: DEFAULT_CACHE_SIZE,
+        }
+    }
+}
+
+impl WorkspaceOptions {
+    /// Create new options with defaults.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Set the cache size.
+    pub fn with_cache_size(mut self, size: usize) -> Self {
+        self.cache_size = size;
+        self
+    }
+}
+
+/// Inner state for the async workspace.
+struct WorkspaceInner {
+    /// Storage backend.
+    backend: Arc<dyn StorageBackend>,
+    /// Root path (for file-based backends).
+    root: Option<PathBuf>,
+    /// Document metadata index.
+    meta_index: HashMap<String, DocumentMetaEntry>,
+    /// DocCard catalog — lightweight document summaries for Orchestrator analysis.
+    catalog: HashMap<String, vectorless_document::DocCard>,
+    /// LRU cache for loaded documents.
+    cache: DocumentCache,
+    /// Cross-document relationship graph (cached).
+    document_graph: Option<vectorless_graph::DocumentGraph>,
+}
+
+/// An async workspace for managing indexed documents.
+///
+/// Uses `tokio::sync::RwLock` for async-safe concurrent access.
+/// All operations are async and can be safely called from multiple tasks.
+///
+/// # Thread Safety
+///
+/// The async workspace is fully thread-safe and can be cloned cheaply
+/// (it uses `Arc` internally).
+#[derive(Clone)]
+pub struct Workspace {
+    inner: Arc<RwLock<WorkspaceInner>>,
+}
+
+impl std::fmt::Debug for Workspace {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("Workspace").finish()
+    }
+}
+
+impl Workspace {
+    /// Create a new async workspace with a storage backend.
+    pub async fn with_backend(backend: Arc<dyn StorageBackend>) -> Result<Self> {
+        Self::with_backend_and_options(backend, WorkspaceOptions::default()).await
+    }
+
+    /// Create an async workspace with backend and options.
+    pub async fn with_backend_and_options(
+        backend: Arc<dyn StorageBackend>,
+        options: WorkspaceOptions,
+    ) -> Result<Self> {
+        let mut inner = WorkspaceInner {
+            backend,
+            root: None,
+            meta_index: HashMap::new(),
+            catalog: HashMap::new(),
+            cache: DocumentCache::with_capacity(options.cache_size),
+            document_graph: None,
+        };
+
+        Self::load_meta_index(&mut inner)?;
+        Self::load_catalog_index(&mut inner)?;
+
+        Ok(Self {
+            inner: Arc::new(RwLock::new(inner)),
+        })
+    }
+
+    /// Create a new file-based async workspace at the given path.
+    pub async fn new(path: impl Into<PathBuf>) -> Result<Self> {
+        Self::with_options(path, WorkspaceOptions::default()).await
+    }
+
+    /// Create a new async workspace with custom cache size.
+    pub async fn with_cache_size(path: impl Into<PathBuf>, cache_size: usize) -> Result<Self> {
+        Self::with_options(
+            path,
+            WorkspaceOptions {
+                cache_size,
+                ..Default::default()
+            },
+        )
+        .await
+    }
+
+    /// Create a new async workspace with custom options.
+    pub async fn with_options(path: impl Into<PathBuf>, options: WorkspaceOptions) -> Result<Self> {
+        let root = path.into();
+        let backend = Arc::new(FileBackend::new(&root)?);
+
+        let mut inner = WorkspaceInner {
+            backend,
+            root: Some(root),
+            meta_index: HashMap::new(),
+            catalog: HashMap::new(),
+            cache: DocumentCache::with_capacity(options.cache_size),
+            document_graph: None,
+        };
+
+        Self::load_meta_index(&mut inner)?;
+        Self::load_catalog_index(&mut inner)?;
+
+        Ok(Self {
+            inner: Arc::new(RwLock::new(inner)),
+        })
+    }
+
+    /// Get the workspace root path (if file-based).
+    pub async fn path(&self) -> Option<PathBuf> {
+        let inner = self.inner.read().await;
+        inner.root.clone()
+    }
+
+    /// List all document IDs in the workspace.
+    pub async fn list_documents(&self) -> Vec<String> {
+        let inner = self.inner.read().await;
+        inner.meta_index.keys().cloned().collect()
+    }
+
+    /// Get metadata for a document.
+    pub async fn get_meta(&self, id: &str) -> Option<DocumentMetaEntry> {
+        let inner = self.inner.read().await;
+        inner.meta_index.get(id).cloned()
+    }
+
+    /// Check if a document exists.
+    pub async fn contains(&self, id: &str) -> bool {
+        let inner = self.inner.read().await;
+        inner.meta_index.contains_key(id)
+    }
+
+    /// Add a document to the workspace.
+    pub async fn add(&self, doc: &PersistedDocument) -> Result<()> {
+        let mut inner = self.inner.write().await;
+
+        let doc_id = doc.meta.id.clone();
+        let key = Self::doc_key(&doc_id);
+
+        // Serialize and save via backend
+        let bytes = save_document_to_bytes(doc)?;
+        inner.backend.put(&key, &bytes)?;
+
+        // Update meta index
+        let meta_entry = DocumentMetaEntry {
+            id: doc_id.clone(),
+            doc_name: doc.meta.name.clone(),
+            doc_description: doc.meta.description.clone(),
+            doc_type: doc.meta.format.clone(),
+            path: doc
+                .meta
+                .source_path
+                .as_ref()
+                .map(|p| p.to_string_lossy().to_string()),
+            page_count: if doc.pages.is_empty() {
+                None
+            } else {
+                Some(doc.pages.len())
+            },
+            line_count: doc.meta.line_count,
+        };
+
+        inner.meta_index.insert(doc_id.clone(), meta_entry);
+        Self::save_meta_index(&inner)?;
+
+        // Update catalog with DocCard
+        if let Some(card) = doc
+            .navigation_index
+            .as_ref()
+            .and_then(|nav| nav.doc_card().cloned())
+        {
+            inner.catalog.insert(doc_id.clone(), card);
+            Self::save_catalog_index(&inner)?;
+        }
+
+        // Remove from cache if present
+        let _ = inner.cache.remove(&doc_id);
+
+        info!("Saved document {} to async workspace", doc_id);
+
+        // Invalidate document graph since documents changed
+        inner.document_graph = None;
+
+        Ok(())
+    }
+
+    /// Load a document from the workspace.
+    ///
+    /// Uses LRU cache: returns cached version if available,
+    /// otherwise loads from backend and caches it.
+    pub async fn load(&self, id: &str) -> Result<Option<PersistedDocument>> {
+        // First check if document exists (read lock)
+        {
+            let inner = self.inner.read().await;
+            if !inner.meta_index.contains_key(id) {
+                return Ok(None);
+            }
+
+            // Check LRU cache
+            if let Some(cached) = inner.cache.get(id)? {
+                debug!("Cache hit for document {}", id);
+                return Ok(Some(cached));
+            }
+        }
+
+        // Load from backend (need read lock for backend access)
+        let inner = self.inner.read().await;
+        let key = Self::doc_key(id);
+
+        match inner.backend.get(&key)? {
+            Some(bytes) => {
+                let doc = load_document_from_bytes(&bytes)?;
+
+                // Note: We can't modify the cache with only a read lock
+                // For now, we return the document without caching
+                // A more sophisticated implementation would use a separate cache structure
+
+                debug!("Loaded document {} from backend", id);
+                Ok(Some(doc))
+            }
+            None => {
+                warn!("Document {} in meta index but not in backend", id);
+                Ok(None)
+            }
+        }
+    }
+
+    /// Load a document and cache it (requires write lock for caching).
+    pub async fn load_and_cache(&self, id: &str) -> Result<Option<PersistedDocument>> {
+        // First check if document exists (read lock)
+        {
+            let inner = self.inner.read().await;
+            if !inner.meta_index.contains_key(id) {
+                return Ok(None);
+            }
+
+            // Check LRU cache
+            if let Some(cached) = inner.cache.get(id)? {
+                debug!("Cache hit for document {}", id);
+                return Ok(Some(cached));
+            }
+        }
+
+        // Load from backend and cache (write lock)
+        let inner = self.inner.write().await;
+        let key = Self::doc_key(id);
+
+        match inner.backend.get(&key)? {
+            Some(bytes) => {
+                let doc = load_document_from_bytes(&bytes)?;
+
+                // Add to cache
+                inner.cache.put(id.to_string(), doc.clone())?;
+
+                debug!("Loaded and cached document {}", id);
+                Ok(Some(doc))
+            }
+            None => {
+                warn!("Document {} in meta index but not in backend", id);
+                Ok(None)
+            }
+        }
+    }
+
+    /// Remove a document from the workspace.
+    pub async fn remove(&self, id: &str) -> Result<bool> {
+        let mut inner = self.inner.write().await;
+
+        if !inner.meta_index.contains_key(id) {
+            return Ok(false);
+        }
+
+        let key = Self::doc_key(id);
+        inner.backend.delete(&key)?;
+
+        inner.meta_index.remove(id);
+
+        // Remove from cache and catalog
+        let _ = inner.cache.remove(id);
+        inner.catalog.remove(id);
+
+        Self::save_meta_index(&inner)?;
+        Self::save_catalog_index(&inner)?;
+
+        info!("Removed document {} from async workspace", id);
+
+        // Invalidate document graph since documents changed
+        inner.document_graph = None;
+
+        Ok(true)
+    }
+
+    /// Get the number of documents in the workspace.
+    pub async fn len(&self) -> usize {
+        let inner = self.inner.read().await;
+        inner.meta_index.len()
+    }
+
+    /// Check if the workspace is empty.
+    pub async fn is_empty(&self) -> bool {
+        let inner = self.inner.read().await;
+        inner.meta_index.is_empty()
+    }
+
+    /// Find a document ID by its source path.
+    ///
+    /// Returns the first document whose `source_path` matches.
+    /// Used for incremental indexing to check if a file has already been indexed.
+    pub async fn find_by_source_path(&self, path: &std::path::Path) -> Option<String> {
+        let target = path.to_string_lossy().to_string();
+        let inner = self.inner.read().await;
+        for (_, entry) in &inner.meta_index {
+            if entry.path.as_deref() == Some(target.as_str()) {
+                return Some(entry.id.clone());
+            }
+        }
+        None
+    }
+
+    /// Get the number of items currently in the LRU cache.
+    pub async fn cache_len(&self) -> usize {
+        let inner = self.inner.read().await;
+        inner.cache.len()
+    }
+
+    /// Get cache utilization (0.0 to 1.0).
+    pub async fn cache_utilization(&self) -> f64 {
+        let inner = self.inner.read().await;
+        inner.cache.utilization()
+    }
+
+    /// Get cache statistics.
+    pub async fn cache_stats(&self) -> super::cache::CacheStats {
+        let inner = self.inner.read().await;
+        inner.cache.stats()
+    }
+
+    /// Clear the LRU cache.
+    pub async fn clear_cache(&self) -> Result<()> {
+        let inner = self.inner.write().await;
+        inner.cache.clear()?;
+        debug!("Cleared async document cache");
+        Ok(())
+    }
+
+    // =========================================================================
+    // Document Graph Methods
+    // =========================================================================
+
+    /// Storage key for the document graph.
+    const GRAPH_KEY: &'static str = "_graph";
+
+    /// Get the document graph, loading from backend if not cached.
+    pub async fn get_graph(&self) -> Result<Option<vectorless_graph::DocumentGraph>> {
+        // Check cache first
+        {
+            let inner = self.inner.read().await;
+            if inner.document_graph.is_some() {
+                return Ok(inner.document_graph.clone());
+            }
+        }
+
+        // Load from backend
+        let inner = self.inner.read().await;
+        match inner.backend.get(Self::GRAPH_KEY)? {
+            Some(bytes) => {
+                let graph: vectorless_graph::DocumentGraph = serde_json::from_slice(&bytes)
+                    .map_err(|e| {
+                        vectorless_error::Error::Serialization(format!(
+                            "Failed to deserialize graph: {}",
+                            e
+                        ))
+                    })?;
+                debug!("Loaded document graph from backend");
+                Ok(Some(graph))
+            }
+            None => Ok(None),
+        }
+    }
+
+    /// Persist the document graph to the backend.
+    pub async fn set_graph(&self, graph: &vectorless_graph::DocumentGraph) -> Result<()> {
+        let mut inner = self.inner.write().await;
+        let bytes = serde_json::to_vec(graph).map_err(|e| {
+            vectorless_error::Error::Serialization(format!("Failed to serialize graph: {}", e))
+        })?;
+        inner.backend.put(Self::GRAPH_KEY, &bytes)?;
+        inner.document_graph = Some(graph.clone());
+        info!(
+            "Persisted document graph ({} nodes, {} edges)",
+            graph.node_count(),
+            graph.edge_count()
+        );
+        Ok(())
+    }
+
+    /// Invalidate the cached document graph (e.g. after add/remove).
+    pub async fn invalidate_graph(&self) -> Result<()> {
+        let mut inner = self.inner.write().await;
+        inner.document_graph = None;
+        // Also remove from backend so stale graphs don't persist
+        let _ = inner.backend.delete(Self::GRAPH_KEY);
+        debug!("Invalidated document graph cache");
+        Ok(())
+    }
+
+    /// Get the storage key for a document.
+    fn doc_key(id: &str) -> String {
+        id.to_string()
+    }
+
+    /// Load the meta index from backend.
+    fn load_meta_index(inner: &mut WorkspaceInner) -> Result<()> {
+        match inner.backend.get(META_KEY)? {
+            Some(bytes) => {
+                let meta: HashMap<String, DocumentMetaEntry> = serde_json::from_slice(&bytes)
+                    .map_err(|e| Error::Parse(format!("Failed to parse meta index: {}", e)))?;
+                inner.meta_index = meta;
+                info!(
+                    "Loaded {} document(s) from async workspace index",
+                    inner.meta_index.len()
+                );
+            }
+            None => {
+                // Try to rebuild from existing keys
+                Self::rebuild_meta_index(inner)?;
+            }
+        }
+        Ok(())
+    }
+
+    /// Save the meta index to backend.
+    fn save_meta_index(inner: &WorkspaceInner) -> Result<()> {
+        let bytes = serde_json::to_vec_pretty(&inner.meta_index)
+            .map_err(|e| Error::Parse(format!("Failed to serialize meta index: {}", e)))?;
+        inner.backend.put(META_KEY, &bytes)?;
+        Ok(())
+    }
+
+    /// Load the DocCard catalog from backend.
+    fn load_catalog_index(inner: &mut WorkspaceInner) -> Result<()> {
+        match inner.backend.get(CATALOG_KEY)? {
+            Some(bytes) => {
+                let catalog: HashMap<String, vectorless_document::DocCard> =
+                    serde_json::from_slice(&bytes).map_err(|e| {
+                        Error::Parse(format!("Failed to parse catalog index: {}", e))
+                    })?;
+                inner.catalog = catalog;
+                info!("Loaded DocCard catalog: {} entries", inner.catalog.len());
+            }
+            None => {
+                // Rebuild from existing documents
+                Self::rebuild_catalog(inner)?;
+            }
+        }
+        Ok(())
+    }
+
+    /// Save the DocCard catalog to backend.
+    fn save_catalog_index(inner: &WorkspaceInner) -> Result<()> {
+        let bytes = serde_json::to_vec_pretty(&inner.catalog)
+            .map_err(|e| Error::Parse(format!("Failed to serialize catalog: {}", e)))?;
+        inner.backend.put(CATALOG_KEY, &bytes)?;
+        Ok(())
+    }
+
+    /// Rebuild the DocCard catalog from existing documents.
+    fn rebuild_catalog(inner: &mut WorkspaceInner) -> Result<()> {
+        let keys = inner.backend.keys()?;
+        let reserved = ["meta", "_graph", "catalog"];
+        let doc_keys: Vec<_> = keys
+            .iter()
+            .filter(|k| !reserved.contains(&k.as_str()))
+            .collect();
+
+        for key in doc_keys {
+            if let Some(bytes) = inner.backend.get(key)? {
+                if let Ok(doc) = load_document_from_bytes(&bytes) {
+                    if let Some(card) = doc
+                        .navigation_index
+                        .as_ref()
+                        .and_then(|nav| nav.doc_card().cloned())
+                    {
+                        inner.catalog.insert(doc.meta.id.clone(), card);
+                    }
+                }
+            }
+        }
+
+        if !inner.catalog.is_empty() {
+            Self::save_catalog_index(inner)?;
+            info!("Rebuilt DocCard catalog: {} entries", inner.catalog.len());
+        }
+
+        Ok(())
+    }
+
+    /// Get all DocCards from the catalog.
+    pub async fn list_catalog(&self) -> Vec<(String, vectorless_document::DocCard)> {
+        let inner = self.inner.read().await;
+        inner
+            .catalog
+            .iter()
+            .map(|(id, card)| (id.clone(), card.clone()))
+            .collect()
+    }
+
+    /// Get a single DocCard by document ID.
+    pub async fn get_doc_card(&self, id: &str) -> Option<vectorless_document::DocCard> {
+        let inner = self.inner.read().await;
+        inner.catalog.get(id).cloned()
+    }
+
+    /// Rebuild the meta index from existing documents.
+    fn rebuild_meta_index(inner: &mut WorkspaceInner) -> Result<()> {
+        let keys = inner.backend.keys()?;
+        let reserved = ["meta", "_graph", "catalog"];
+        let doc_keys: Vec<_> = keys
+            .iter()
+            .filter(|k| !reserved.contains(&k.as_str()))
+            .collect();
+
+        for key in doc_keys {
+            if let Some(bytes) = inner.backend.get(key)? {
+                if let Ok(doc) = load_document_from_bytes(&bytes) {
+                    let doc_id = doc.meta.id.clone();
+                    let meta_entry = DocumentMetaEntry {
+                        id: doc_id.clone(),
+                        doc_name: doc.meta.name,
+                        doc_description: doc.meta.description,
+                        doc_type: doc.meta.format,
+                        path: doc
+                            .meta
+                            .source_path
+                            .as_ref()
+                            .map(|p| p.to_string_lossy().to_string()),
+                        page_count: if doc.pages.is_empty() {
+                            None
+                        } else {
+                            Some(doc.pages.len())
+                        },
+                        line_count: doc.meta.line_count,
+                    };
+                    inner.meta_index.insert(doc_id, meta_entry);
+                }
+            }
+        }
+
+        if !inner.meta_index.is_empty() {
+            Self::save_meta_index(inner)?;
+            info!(
+                "Rebuilt async index from {} document(s)",
+                inner.meta_index.len()
+            );
+        }
+
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use vectorless_document::DocumentTree;
+
+    fn create_test_doc(id: &str) -> PersistedDocument {
+        let meta = super::super::persistence::DocumentMeta::new(id, "Test Doc", "md");
+        let tree = DocumentTree::new("Root", "Content");
+        PersistedDocument::new(meta, tree)
+    }
+}
diff --git a/vectorless/__init__.py b/vectorless/__init__.py
index 7b5f67a4..6464e735 100644
--- a/vectorless/__init__.py
+++ b/vectorless/__init__.py
@@ -5,36 +5,48 @@
     from vectorless import Engine
 
     engine = Engine(api_key="sk-...", model="gpt-4o")
-    doc = await engine.ingest("./report.pdf")
-    answer = await engine.ask("What is the revenue?", doc_ids=[doc.doc_id])
-    print(answer.content)
+    doc = await engine.compile("./report.pdf")
+    result = await engine.ask("What is the revenue?", doc_ids=[doc.doc_id])
+    print(result.answer)
 """
 
-# Core Engine and types from Rust
+# Primary API — Python Engine wrapping Rust compile + Python strategy
+from vectorless.engine import Engine
+
+# Configuration utilities
+from vectorless.config import EngineConfig, load_config, load_config_from_env, load_config_from_file
+
+# Events
+from vectorless.events import EventEmitter
+
+# Rust types re-exported for convenience
 from vectorless._vectorless import (
     Answer,
+    CollectedEvidence,
     Concept,
     Config,
     DocumentGraph,
+    DocumentGraphNode,
     DocumentInfo,
     EdgeEvidence,
-    Engine,
     Evidence,
+    FindResult,
     GraphEdge,
+    LlmMetricsReport,
+    MatchResult,
     MetricsReport,
+    NodeInfo,
     ReasoningTrace,
+    RetrievalMetricsReport,
+    SectionSummary,
     TraceStep,
+    TopicEntry,
     VectorlessError,
     WeightedKeyword,
+    WordCount,
     __version__,
 )
 
-# Configuration utilities
-from vectorless.config import EngineConfig, load_config, load_config_from_env, load_config_from_file
-
-# Events
-from vectorless.events import EventEmitter
-
 __all__ = [
     # Primary API
     "Engine",
@@ -49,6 +61,13 @@
     # Document types
     "DocumentInfo",
     "Concept",
+    "NodeInfo",
+    "MatchResult",
+    "FindResult",
+    "WordCount",
+    "CollectedEvidence",
+    "TopicEntry",
+    "SectionSummary",
     # Answer types
     "Answer",
     "Evidence",
@@ -56,10 +75,13 @@
     "TraceStep",
     # Graph types
     "DocumentGraph",
+    "DocumentGraphNode",
     "GraphEdge",
     "EdgeEvidence",
     "WeightedKeyword",
     # Metrics
+    "LlmMetricsReport",
+    "RetrievalMetricsReport",
     "MetricsReport",
     # Error and version
     "VectorlessError",
diff --git a/vectorless/_compat/__init__.py b/vectorless/_compat/__init__.py
deleted file mode 100644
index aa38e73c..00000000
--- a/vectorless/_compat/__init__.py
+++ /dev/null
@@ -1,35 +0,0 @@
-"""Framework integrations — optional, loaded on demand."""
-
-
-def get_langchain_retriever():
-    """Get the LangChain VectorlessRetriever class.
-
-    Raises:
-        ImportError: If langchain-core is not installed.
-    """
-    try:
-        from vectorless._compat.langchain import VectorlessRetriever
-
-        return VectorlessRetriever
-    except ImportError:
-        raise ImportError(
-            "LangChain integration requires langchain-core. "
-            "Install with: pip install vectorless[langchain]"
-        )
-
-
-def get_llamaindex_retriever():
-    """Get the LlamaIndex VectorlessRetriever class.
-
-    Raises:
-        ImportError: If llama-index-core is not installed.
-    """
-    try:
-        from vectorless._compat.llamaindex import VectorlessRetriever
-
-        return VectorlessRetriever
-    except ImportError:
-        raise ImportError(
-            "LlamaIndex integration requires llama-index-core. "
-            "Install with: pip install vectorless[llamaindex]"
-        )
diff --git a/vectorless/_compat/langchain.py b/vectorless/_compat/langchain.py
deleted file mode 100644
index 21a6720f..00000000
--- a/vectorless/_compat/langchain.py
+++ /dev/null
@@ -1,117 +0,0 @@
-"""LangChain BaseRetriever integration for Vectorless."""
-
-from __future__ import annotations
-
-from typing import Any, List, Optional
-
-from langchain_core.callbacks import CallbackManagerForRetrieverRun
-from langchain_core.documents import Document
-from langchain_core.retrievers import BaseRetriever
-
-from vectorless._async_utils import run_async
-from vectorless.session import Session
-
-
-class VectorlessRetriever(BaseRetriever):
-    """LangChain retriever backed by Vectorless.
-
-    Usage::
-
-        from vectorless._compat import get_langchain_retriever
-
-        VectorlessRetriever = get_langchain_retriever()
-
-        retriever = VectorlessRetriever(
-            api_key="sk-...",
-            model="gpt-4o",
-            endpoint="https://api.openai.com/v1",
-            doc_ids=["doc-123"],
-            top_k=3,
-        )
-
-        docs = retriever.invoke("What is the revenue?")
-
-    Or with an existing Session (avoids re-initializing the engine)::
-
-        from vectorless import Session
-
-        session = Session(api_key="sk-...", model="gpt-4o")
-        retriever = VectorlessRetriever(session=session, doc_ids=["doc-123"])
-    """
-
-    api_key: str = ""
-    model: str = ""
-    endpoint: str = ""
-    doc_ids: List[str] = []
-    top_k: int = 3
-    workspace_scope: bool = False
-    session: Optional[Session] = None
-
-    class Config:
-        arbitrary_types_allowed = True
-
-    def _get_session(self) -> Session:
-        """Get or lazily create a cached Session instance."""
-        if self.session is None:
-            self.session = Session(
-                api_key=self.api_key or None,
-                model=self.model or None,
-                endpoint=self.endpoint or None,
-            )
-        return self.session
-
-    def _get_relevant_documents(
-        self,
-        query: str,
-        *,
-        run_manager: Optional[CallbackManagerForRetrieverRun] = None,
-    ) -> List[Document]:
-        """Synchronous retrieval."""
-        session = self._get_session()
-        response = run_async(
-            session.ask(
-                query,
-                doc_ids=self.doc_ids if self.doc_ids else None,
-                workspace_scope=self.workspace_scope,
-            )
-        )
-        return self._to_documents(response)
-
-    async def _aget_relevant_documents(
-        self,
-        query: str,
-        *,
-        run_manager: Optional[CallbackManagerForRetrieverRun] = None,
-    ) -> List[Document]:
-        """Async retrieval."""
-        session = self._get_session()
-        response = await session.ask(
-            query,
-            doc_ids=self.doc_ids if self.doc_ids else None,
-            workspace_scope=self.workspace_scope,
-        )
-        return self._to_documents(response)
-
-    @staticmethod
-    def _to_documents(response: Any) -> List[Document]:
-        """Convert Vectorless QueryResponse to LangChain Documents."""
-        documents = []
-        for item in response.items:
-            metadata = {
-                "doc_id": item.doc_id,
-                "score": item.score,
-                "confidence": item.confidence,
-                "node_ids": item.node_ids,
-                "evidence_count": len(item.evidence),
-            }
-            if item.metrics:
-                metadata["llm_calls"] = item.metrics.llm_calls
-                metadata["rounds_used"] = item.metrics.rounds_used
-                metadata["nodes_visited"] = item.metrics.nodes_visited
-            documents.append(
-                Document(
-                    page_content=item.content,
-                    metadata=metadata,
-                )
-            )
-        return documents
diff --git a/vectorless/_compat/llamaindex.py b/vectorless/_compat/llamaindex.py
deleted file mode 100644
index 0de96307..00000000
--- a/vectorless/_compat/llamaindex.py
+++ /dev/null
@@ -1,91 +0,0 @@
-"""LlamaIndex retriever integration for Vectorless."""
-
-from __future__ import annotations
-
-from typing import Any, List, Optional
-
-from vectorless._async_utils import run_async
-from vectorless.session import Session
-
-
-class VectorlessRetriever:
-    """LlamaIndex-compatible retriever backed by Vectorless.
-
-    Usage::
-
-        from vectorless._compat import get_llamaindex_retriever
-
-        VectorlessRetriever = get_llamaindex_retriever()
-
-        retriever = VectorlessRetriever(
-            api_key="sk-...",
-            model="gpt-4o",
-            endpoint="https://api.openai.com/v1",
-            doc_ids=["doc-123"],
-        )
-
-        nodes = retriever.retrieve("What is the revenue?")
-    """
-
-    def __init__(
-        self,
-        api_key: str = "",
-        model: str = "",
-        endpoint: str = "",
-        doc_ids: Optional[List[str]] = None,
-        top_k: int = 3,
-        workspace_scope: bool = False,
-        session: Optional[Session] = None,
-    ) -> None:
-        if session is not None:
-            self._session = session
-        else:
-            self._session = Session(
-                api_key=api_key or None,
-                model=model or None,
-                endpoint=endpoint or None,
-            )
-        self._doc_ids = doc_ids or []
-        self._top_k = top_k
-        self._workspace_scope = workspace_scope
-
-    def retrieve(self, query: str) -> List[Any]:
-        """Synchronous retrieval, returns LlamaIndex NodeWithScore objects."""
-        response = run_async(self._query(query))
-        return self._to_nodes(response)
-
-    async def aretrieve(self, query: str) -> List[Any]:
-        """Async retrieval, returns LlamaIndex NodeWithScore objects."""
-        response = await self._query(query)
-        return self._to_nodes(response)
-
-    async def _query(self, query: str) -> Any:
-        return await self._session.ask(
-            query,
-            doc_ids=self._doc_ids if self._doc_ids else None,
-            workspace_scope=self._workspace_scope,
-        )
-
-    @staticmethod
-    def _to_nodes(response: Any) -> List[Any]:
-        """Convert Vectorless QueryResponse to LlamaIndex NodeWithScore."""
-        from llama_index.core.schema import NodeWithScore, TextNode
-
-        nodes = []
-        for item in response.items:
-            metadata = {
-                "doc_id": item.doc_id,
-                "confidence": item.confidence,
-                "node_ids": item.node_ids,
-            }
-            text_node = TextNode(
-                text=item.content,
-                metadata=metadata,
-            )
-            nodes.append(
-                NodeWithScore(
-                    node=text_node,
-                    score=item.score,
-                )
-            )
-        return nodes
diff --git a/vectorless/_internal/__init__.py b/vectorless/_internal/__init__.py
new file mode 100644
index 00000000..f37614ad
--- /dev/null
+++ b/vectorless/_internal/__init__.py
@@ -0,0 +1 @@
+"""Internal utilities — NOT part of the public API."""
diff --git a/vectorless/_async_utils.py b/vectorless/_internal/_async_utils.py
similarity index 100%
rename from vectorless/_async_utils.py
rename to vectorless/_internal/_async_utils.py
diff --git a/vectorless/_core.py b/vectorless/_internal/_core.py
similarity index 62%
rename from vectorless/_core.py
rename to vectorless/_internal/_core.py
index 53ea7ce5..8701aa3c 100644
--- a/vectorless/_core.py
+++ b/vectorless/_internal/_core.py
@@ -1,48 +1,62 @@
 """Internal re-exports from the Rust PyO3 module.
 
-This module is NOT part of the public API. Use ``vectorless.Engine`` instead.
+This module is NOT part of the public API.
+The public Engine is ``vectorless.engine.Engine`` (Python strategy layer).
+Here ``Engine`` refers to the raw Rust engine used internally for compile/document management.
 """
 
 from vectorless._vectorless import (
     Answer,
+    CollectedEvidence,
     Concept,
     Config,
     DocumentGraph,
-    DocumentGraphEdge,
     DocumentGraphNode,
     DocumentInfo,
     EdgeEvidence,
     Engine,
     Evidence,
+    FindResult,
     GraphEdge,
     LlmMetricsReport,
+    MatchResult,
     MetricsReport,
+    NodeInfo,
     ReasoningTrace,
     RetrievalMetricsReport,
+    SectionSummary,
     TraceStep,
+    TopicEntry,
     VectorlessError,
     WeightedKeyword,
+    WordCount,
     __version__,
 )
 
 __all__ = [
     "Answer",
+    "CollectedEvidence",
     "Concept",
     "Config",
     "DocumentGraph",
-    "DocumentGraphEdge",
     "DocumentGraphNode",
     "DocumentInfo",
     "EdgeEvidence",
     "Engine",
     "Evidence",
+    "FindResult",
     "GraphEdge",
     "LlmMetricsReport",
+    "MatchResult",
     "MetricsReport",
+    "NodeInfo",
     "ReasoningTrace",
     "RetrievalMetricsReport",
+    "SectionSummary",
     "TraceStep",
+    "TopicEntry",
     "VectorlessError",
     "WeightedKeyword",
+    "WordCount",
     "__version__",
 ]
diff --git a/vectorless/ask/__init__.py b/vectorless/ask/__init__.py
new file mode 100644
index 00000000..05096dc6
--- /dev/null
+++ b/vectorless/ask/__init__.py
@@ -0,0 +1,56 @@
+"""Ask pipeline — query understanding, multi-agent retrieval, and answer synthesis."""
+
+from vectorless.ask.dispatcher import dispatch
+from vectorless.ask.evaluate import evaluate
+from vectorless.ask.orchestrator import Orchestrator
+from vectorless.ask.plan import Complexity, QueryIntent, QueryPlan, SubQuery
+from vectorless.ask.types import (
+    DispatchEntry,
+    DocCard,
+    EvalResult,
+    Evidence,
+    Metrics,
+    OrchestratorState,
+    Output,
+    Scope,
+    Specified,
+    TraceStep,
+    WorkerMetrics,
+    WorkerOutput,
+    WorkerState,
+    Workspace,
+)
+from vectorless.ask.understand import understand
+from vectorless.ask.worker import Worker
+
+__all__ = [
+    # Core output types
+    "Output",
+    "Evidence",
+    "Metrics",
+    "TraceStep",
+    # Worker types
+    "WorkerOutput",
+    "WorkerMetrics",
+    "WorkerState",
+    # Orchestrator types
+    "Orchestrator",
+    "OrchestratorState",
+    "DispatchEntry",
+    "DocCard",
+    "EvalResult",
+    # Scope types
+    "Scope",
+    "Specified",
+    "Workspace",
+    # Query understanding
+    "QueryIntent",
+    "QueryPlan",
+    "SubQuery",
+    "Complexity",
+    # Agents
+    "Worker",
+    "dispatch",
+    "evaluate",
+    "understand",
+]
diff --git a/vectorless/ask/dispatcher.py b/vectorless/ask/dispatcher.py
new file mode 100644
index 00000000..486b598d
--- /dev/null
+++ b/vectorless/ask/dispatcher.py
@@ -0,0 +1,61 @@
+"""Dispatcher — unified entry point for retrieval.
+
+Mirrors vectorless-core/vectorless-agent/src/dispatcher.rs.
+
+All queries go through dispatch():
+1. Query understanding -> QueryPlan
+2. Scope resolution -> Specified | Workspace
+3. Orchestrator.run() (always)
+4. Return Output
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any, Callable
+
+from vectorless.ask.types import DocCard, Output, Specified, Workspace
+from vectorless.ask.orchestrator import Orchestrator
+from vectorless.ask.understand import understand
+from vectorless.llm_client import LLMClient
+
+logger = logging.getLogger(__name__)
+
+
+async def dispatch(
+    query: str,
+    scope: Specified | Workspace,
+    llm: LLMClient,
+    doc_loader: Callable[[str], Any],
+    event_callback: Any = None,
+) -> Output:
+    """Unified entry point — mirrors Rust dispatcher::dispatch().
+
+    All queries go through Orchestrator:
+    - Specified -> skip_analysis=True -> spawn Workers directly
+    - Workspace -> skip_analysis=False -> analyze -> dispatch -> evaluate -> replan
+    """
+    # Step 1: Query understanding
+    logger.info("dispatch: query understanding started")
+    query_plan = await understand(query, llm)
+    logger.info(
+        "dispatch: query understanding complete (intent=%s, complexity=%s)",
+        query_plan.intent.value,
+        query_plan.complexity.value,
+    )
+
+    # Step 2: Determine skip_analysis from scope
+    skip_analysis = isinstance(scope, Specified)
+    doc_cards = scope.docs
+
+    # Step 3: Orchestrator (always)
+    orchestrator = Orchestrator(
+        query=query,
+        doc_cards=doc_cards,
+        doc_loader=doc_loader,
+        llm_client=llm,
+        skip_analysis=skip_analysis,
+        query_plan=query_plan,
+        event_callback=event_callback,
+    )
+    return await orchestrator.run()
diff --git a/vectorless/ask/evaluate.py b/vectorless/ask/evaluate.py
new file mode 100644
index 00000000..e1f8faaa
--- /dev/null
+++ b/vectorless/ask/evaluate.py
@@ -0,0 +1,220 @@
+"""Evaluate cross-document evidence sufficiency via LLM.
+
+Mirrors vectorless-agent/src/orchestrator/evaluate.rs.
+
+Two evaluation modes:
+1. Cross-doc evaluation (Orchestrator level) — simple SUFFICIENT/INSUFFICIENT text parse
+2. Structured evaluation (Worker level) — JSON with coverage/quality/missing_aspects
+
+Both use the same Evidence type from types.py.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import re
+
+from vectorless.ask.types import EvalResult, Evidence
+from vectorless.llm_client import LLMClient
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Cross-doc evaluation prompt — mirrors Rust orchestrator/evaluate.rs
+# ---------------------------------------------------------------------------
+
+def _build_cross_eval_prompt(query: str, evidence: list[Evidence]) -> tuple[str, str]:
+    """Build (system, user) prompt for cross-document sufficiency evaluation.
+
+    Uses the same SUFFICIENT/INSUFFICIENT text format as Rust — simple and robust.
+    """
+    evidence_summary = _format_evidence_summary(evidence)
+    system = (
+        "You evaluate whether collected evidence contains information that can answer or "
+        "relate to the user's question. The evidence is raw document text — it does not need to be "
+        "a complete or perfect answer. If the evidence mentions or addresses the key concepts from "
+        "the question, it is sufficient.\n"
+        "\n"
+        "Respond with ONLY 'SUFFICIENT' or 'INSUFFICIENT' followed by a one-line reason.\n"
+        "\n"
+        "Guidelines:\n"
+        "- If the evidence text contains any information directly related to the question's key terms, "
+        "respond SUFFICIENT.\n"
+        "- If the evidence is completely unrelated or empty, respond INSUFFICIENT.\n"
+        "- Default to SUFFICIENT unless the evidence is clearly irrelevant."
+    )
+    user = (
+        f"Question: {query}\n\n"
+        f"Collected evidence:\n"
+        f"{evidence_summary}\n\n"
+        f"Is this sufficient?"
+    )
+    return system, user
+
+
+def _format_evidence_summary(evidence: list[Evidence]) -> str:
+    """Format evidence with source attribution for evaluation.
+
+    Mirrors Rust format_evidence_summary — includes doc_name for cross-doc context.
+    """
+    if not evidence:
+        return "(no evidence)"
+    return "\n\n".join(
+        f"[{e.node_title}] (from {e.doc_name or 'unknown'})\n{e.content}"
+        for e in evidence
+    )
+
+
+# ---------------------------------------------------------------------------
+# Parse cross-doc evaluation response — mirrors Rust parse_sufficiency_response
+# ---------------------------------------------------------------------------
+
+def _parse_sufficiency_response(response: str) -> bool:
+    """Parse the sufficiency check response. Returns True if SUFFICIENT."""
+    upper = response.strip().upper()
+    return upper.startswith("SUFFICIENT") and not upper.startswith("INSUFFICIENT")
+
+
+def _extract_missing_info(response: str) -> str:
+    """Extract missing info description from an INSUFFICIENT response."""
+    reason = response.strip()
+    for prefix in ("INSUFFICIENT", "Insufficient"):
+        if reason.startswith(prefix):
+            reason = reason[len(prefix):]
+            break
+    reason = reason.lstrip("-: ")
+    return reason if reason else "Evidence does not fully address the query."
+
+
+# ---------------------------------------------------------------------------
+# Structured evaluation prompt (for detailed analysis)
+# ---------------------------------------------------------------------------
+
+_EVAL_SYSTEM = """\
+You evaluate whether collected evidence can answer the user's question.
+
+Analyze the evidence and respond with a JSON object:
+{
+  "sufficient": true/false,
+  "coverage": 0.0-1.0,
+  "quality": 0.0-1.0,
+  "missing_aspects": ["aspect 1", "aspect 2"],
+  "relevant_ids": ["node_title_1", "node_title_2"]
+}
+
+Guidelines:
+- "sufficient": true if evidence addresses ALL key aspects of the question
+- "coverage": fraction of the question's key aspects that the evidence addresses
+- "quality": average relevance of the evidence items (1.0 = all directly relevant, 0.0 = all irrelevant)
+- "missing_aspects": specific topics/angles the question asks about but evidence does not cover
+- "relevant_ids": node_titles of evidence items that are actually relevant (not tangential)
+
+Be strict: if any major aspect of the question is unaddressed, mark sufficient=false.
+Be generous on coverage: if evidence partially addresses an aspect, count it as 0.5 coverage for that aspect.
+"""
+
+
+def _build_structured_eval_prompt(query: str, evidence: list[Evidence]) -> tuple[str, str]:
+    """Build (system, user) prompt for structured evaluation."""
+    evidence_text = _format_evidence_summary(evidence)
+    user = (
+        f"Question: {query}\n\n"
+        f"Evidence items:\n{evidence_text}\n\n"
+        f"Evaluate the evidence."
+    )
+    return _EVAL_SYSTEM, user
+
+
+def _parse_structured_response(response: str) -> EvalResult:
+    """Parse LLM JSON response into EvalResult."""
+    try:
+        data = json.loads(response)
+    except json.JSONDecodeError:
+        match = re.search(r"\{.*\}", response, re.DOTALL)
+        if match:
+            data = json.loads(match.group())
+        else:
+            logger.warning("Failed to parse eval response as JSON, falling back to text")
+            return _parse_text_fallback(response)
+
+    sufficient = bool(data.get("sufficient", False))
+    coverage = float(data.get("coverage", 0.5))
+    quality = float(data.get("quality", 0.5))
+    missing = [str(a) for a in data.get("missing_aspects", [])]
+    relevant_ids = [str(i) for i in data.get("relevant_ids", [])]
+
+    coverage = max(0.0, min(1.0, coverage))
+    quality = max(0.0, min(1.0, quality))
+
+    missing_info = "; ".join(missing) if missing else ""
+
+    return EvalResult(
+        sufficient=sufficient,
+        missing_info=missing_info,
+        coverage=coverage,
+        quality_score=quality,
+        missing_aspects=missing,
+        relevant_evidence_ids=relevant_ids,
+    )
+
+
+def _parse_text_fallback(response: str) -> EvalResult:
+    """Fallback parsing when JSON parsing fails."""
+    text = response.strip().upper()
+    sufficient = text.startswith("SUFFICIENT")
+
+    missing_info = ""
+    missing_aspects: list[str] = []
+    if not sufficient:
+        missing_info = _extract_missing_info(response)
+        missing_aspects = [missing_info] if missing_info else []
+
+    return EvalResult(
+        sufficient=sufficient,
+        missing_info=missing_info,
+        coverage=0.5 if sufficient else 0.3,
+        quality_score=0.5,
+        missing_aspects=missing_aspects,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+async def evaluate(
+    llm: LLMClient,
+    query: str,
+    evidence: list[Evidence],
+    *,
+    structured: bool = False,
+) -> EvalResult:
+    """Evaluate whether collected evidence is sufficient to answer the query.
+
+    Two modes:
+    - structured=False (default): cross-doc SUFFICIENT/INSUFFICIENT — mirrors Rust
+    - structured=True: JSON with coverage/quality scores
+
+    Propagates LLM errors — no fallback.
+    """
+    if structured:
+        system, user = _build_structured_eval_prompt(query, evidence)
+        response = await llm.complete(system, user)
+        return _parse_structured_response(response)
+
+    # Cross-doc evaluation (mirrors Rust orchestrator/evaluate.rs)
+    system, user = _build_cross_eval_prompt(query, evidence)
+    response = await llm.complete(system, user)
+
+    sufficient = _parse_sufficiency_response(response)
+    missing_info = "" if sufficient else _extract_missing_info(response)
+
+    return EvalResult(
+        sufficient=sufficient,
+        missing_info=missing_info,
+        coverage=0.7 if sufficient else 0.3,
+        quality_score=0.7 if sufficient else 0.3,
+        missing_aspects=[missing_info] if missing_info else [],
+    )
diff --git a/vectorless/ask/orchestrator.py b/vectorless/ask/orchestrator.py
new file mode 100644
index 00000000..668b6fd3
--- /dev/null
+++ b/vectorless/ask/orchestrator.py
@@ -0,0 +1,537 @@
+"""Orchestrator agent — coordinates multi-document retrieval.
+
+Mirrors vectorless-core/vectorless-agent/src/orchestrator/.
+
+The Orchestrator is always the entry point for retrieval:
+- User specified doc_ids → skip_analysis=True → spawn Workers directly
+- Workspace (unspecified) → analyze DocCards → select docs → spawn Workers
+
+Both paths produce the same Output type and share the same finalize logic.
+
+Flow:
+    Orchestrator.run()
+      Phase 1: analyze() → AnalyzeOutcome (dispatches or early return)
+      Phase 2: supervisor loop → dispatch Workers → evaluate → replan
+      Phase 3: finalize_output() → rerank → Output
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import re
+from dataclasses import dataclass
+from typing import Any
+
+from vectorless.ask.types import (
+    DispatchEntry,
+    DocCard,
+    Evidence,
+    EvalResult,
+    Metrics,
+    OrchestratorState,
+    Output,
+    WorkerOutput,
+)
+from vectorless.ask.evaluate import evaluate
+from vectorless.ask.worker import Worker
+from vectorless.llm_client import LLMClient
+from vectorless.ask.plan import QueryPlan
+from vectorless.ask.prompts import (
+    OrchestratorAnalysisParams,
+    orchestrator_analysis,
+    orchestrator_replan_prompt,
+    parse_dispatch_plan,
+    parse_replan_response,
+)
+from vectorless.rerank.synthesize import RerankOutput, process as rerank_process
+
+logger = logging.getLogger(__name__)
+
+MAX_SUPERVISOR_ITERATIONS = 3
+
+
+# ---------------------------------------------------------------------------
+# Analyze outcome — mirrors Rust AnalyzeOutcome
+# ---------------------------------------------------------------------------
+
+@dataclass
+class _AnalyzeOutcome:
+    """Result of the analyze phase."""
+    dispatches: list[DispatchEntry]
+    llm_calls: int
+
+
+# ---------------------------------------------------------------------------
+# Supervisor outcome — mirrors Rust SupervisorOutcome
+# ---------------------------------------------------------------------------
+
+@dataclass
+class _SupervisorOutcome:
+    """Outcome of the supervisor loop."""
+    iteration: int
+    eval_sufficient: bool
+    llm_calls: int
+
+
+# ---------------------------------------------------------------------------
+# Orchestrator — mirrors Rust orchestrator/mod.rs
+# ---------------------------------------------------------------------------
+
+class Orchestrator:
+    """Coordinates multi-document retrieval with Workers.
+
+    Holds all execution context. Calling run() produces an Output.
+
+    Usage::
+
+        orch = Orchestrator(
+            query="Compare revenue across years",
+            doc_cards=[card1, card2],
+            doc_loader=load_fn,
+            llm_client=llm,
+            query_plan=plan,
+        )
+        output = await orch.run()
+    """
+
+    def __init__(
+        self,
+        query: str,
+        doc_cards: list[DocCard],
+        doc_loader: Any,  # async callable: (doc_id: str) -> PyDocument
+        llm_client: LLMClient,
+        *,
+        skip_analysis: bool = False,
+        query_plan: QueryPlan | None = None,
+        max_rounds: int = 15,
+        max_llm_calls: int = 0,
+        event_callback: Any = None,  # async callable: (dict) -> None
+    ) -> None:
+        self._query = query
+        self._doc_cards = doc_cards
+        self._doc_loader = doc_loader
+        self._llm = llm_client
+        self._skip_analysis = skip_analysis
+        self._query_plan = query_plan
+        self._max_rounds = max_rounds
+        self._max_llm_calls = max_llm_calls
+        self._emit = event_callback or _noop_emit
+
+    async def run(self) -> Output:
+        """Execute the Orchestrator: analyze → supervisor loop → finalize.
+
+        Returns the final Output with answer, evidence, metrics, and trace.
+        """
+        query = self._query
+        cards = self._doc_cards
+        llm = self._llm
+        state = OrchestratorState()
+
+        intent_context = ""
+        if self._query_plan:
+            intent_context = self._query_plan.intent_context()
+
+        logger.info(
+            "Orchestrator starting (docs=%d, skip_analysis=%s)",
+            len(cards), self._skip_analysis,
+        )
+
+        # --- Phase 1: Analyze ---
+        analyze_result = await self._analyze(
+            query, cards, llm, state, self._skip_analysis, intent_context,
+        )
+
+        if analyze_result is None:
+            # No results or already answered
+            return state.into_output("")
+
+        state.total_llm_calls += analyze_result.llm_calls
+        initial_dispatches = analyze_result.dispatches
+
+        # --- Phase 2: Supervisor loop ---
+        outcome = await self._supervisor_loop(
+            query, initial_dispatches, cards, llm, state,
+        )
+        state.total_llm_calls += outcome.llm_calls
+
+        confidence = _compute_confidence(
+            eval_sufficient=outcome.eval_sufficient,
+            replan_rounds=outcome.iteration,
+            no_evidence=not state.all_evidence,
+        )
+
+        # --- Phase 3: Finalize — rerank + assemble Output ---
+        if state.all_evidence:
+            return await self._finalize_output(
+                state,
+                self._query_plan.intent if self._query_plan else None,
+                confidence,
+            )
+
+        logger.info("No evidence collected — returning empty output")
+        return state.into_output("")
+
+    # -----------------------------------------------------------------------
+    # Phase 1: Analyze — mirrors Rust orchestrator/analyze.rs
+    # -----------------------------------------------------------------------
+
+    async def _analyze(
+        self,
+        query: str,
+        cards: list[DocCard],
+        llm: LLMClient,
+        state: OrchestratorState,
+        skip_analysis: bool,
+        intent_context: str,
+    ) -> _AnalyzeOutcome | None:
+        """Analyze documents and produce a dispatch plan.
+
+        Returns None if no results / already answered, or _AnalyzeOutcome.
+        """
+        if skip_analysis:
+            logger.debug("Phase 1: skipping (user-specified documents)")
+            return _AnalyzeOutcome(
+                dispatches=[
+                    DispatchEntry(
+                        doc_idx=i,
+                        reason="User-specified document",
+                        task=query,
+                    )
+                    for i in range(len(cards))
+                ],
+                llm_calls=0,
+            )
+
+        # Build doc cards text
+        doc_cards_text = _format_doc_cards(cards)
+
+        # Cross-document keyword search
+        keywords = _extract_keywords(query)
+        find_text = await self._cross_doc_find(cards, keywords)
+
+        # Build analysis prompt with query understanding context
+        system, user = orchestrator_analysis(OrchestratorAnalysisParams(
+            query=query,
+            doc_cards=doc_cards_text,
+            find_results=find_text,
+            intent_context=intent_context,
+        ))
+
+        try:
+            analysis_output = await llm.complete(system, user)
+        except Exception as e:
+            logger.error("Orchestrator analysis LLM call failed: %s", e)
+            return None
+
+        logger.info(
+            "Phase 1: analysis complete (response_len=%d)", len(analysis_output),
+        )
+
+        dispatches = parse_dispatch_plan(analysis_output, len(cards))
+
+        if dispatches is None:
+            logger.info("Analysis indicates already answered")
+            return None
+
+        if not dispatches:
+            logger.info("No relevant documents found")
+            return None
+
+        state.analyze_done = True
+        return _AnalyzeOutcome(dispatches=dispatches, llm_calls=1)
+
+    # -----------------------------------------------------------------------
+    # Cross-document search — mirrors Rust orchestrator cross-doc find
+    # -----------------------------------------------------------------------
+
+    async def _cross_doc_find(
+        self,
+        cards: list[DocCard],
+        keywords: list[str],
+    ) -> str:
+        """Search across documents for keywords using navigation index."""
+        results: list[str] = []
+        for card in cards:
+            try:
+                doc = await self._doc_loader(card.doc_id)
+            except Exception:
+                continue
+            for kw in keywords[:5]:
+                try:
+                    hits = await doc.find(kw)
+                    if hits:
+                        for hit in hits[:3]:
+                            results.append(
+                                f"[{card.name}] '{kw}' → {hit.title} "
+                                f"(depth {hit.depth}, {hit.leaf_count} leaves)"
+                            )
+                except Exception:
+                    pass
+        return "\n".join(results) if results else "(no cross-document matches)"
+
+    # -----------------------------------------------------------------------
+    # Phase 2: Supervisor loop — mirrors Rust orchestrator/supervisor.rs
+    # -----------------------------------------------------------------------
+
+    async def _supervisor_loop(
+        self,
+        query: str,
+        initial_dispatches: list[DispatchEntry],
+        cards: list[DocCard],
+        llm: LLMClient,
+        state: OrchestratorState,
+    ) -> _SupervisorOutcome:
+        """Run: dispatch → evaluate → replan loop."""
+        current_dispatches = initial_dispatches
+        iteration = 0
+        eval_sufficient = False
+        llm_calls = 0
+
+        while iteration < MAX_SUPERVISOR_ITERATIONS:
+            # Dispatch current plan
+            if current_dispatches:
+                logger.info(
+                    "Dispatching %d Workers (iteration=%d)",
+                    len(current_dispatches), iteration,
+                )
+                await self._dispatch_and_collect(
+                    query, current_dispatches, cards, llm, state,
+                )
+
+            # No evidence — nothing to evaluate
+            if not state.all_evidence:
+                logger.info("No evidence collected from any Worker")
+                break
+
+            # Skip evaluation for user-specified documents (no replan needed)
+            if self._skip_analysis:
+                eval_sufficient = bool(state.all_evidence)
+                break
+
+            # Evaluate sufficiency
+            try:
+                eval_result = await evaluate(llm, query, state.all_evidence)
+            except Exception as e:
+                logger.error("Cross-doc evaluation failed: %s", e)
+                break
+            llm_calls += 1
+
+            if eval_result.sufficient:
+                eval_sufficient = True
+                logger.info(
+                    "Evidence sufficient (evidence=%d, iteration=%d)",
+                    len(state.all_evidence), iteration,
+                )
+                break
+
+            # Insufficient — replan
+            logger.info(
+                "Evidence insufficient (evidence=%d, iteration=%d) — replanning",
+                len(state.all_evidence), iteration,
+            )
+
+            try:
+                new_dispatches = await self._replan(
+                    query, eval_result.missing_info, state, cards, llm,
+                )
+            except Exception as e:
+                logger.error("Replan failed: %s", e)
+                break
+
+            if not new_dispatches:
+                logger.info("Replan produced no new dispatches — exiting")
+                break
+
+            current_dispatches = new_dispatches
+            iteration += 1
+
+        return _SupervisorOutcome(
+            iteration=iteration,
+            eval_sufficient=eval_sufficient,
+            llm_calls=llm_calls,
+        )
+
+    # -----------------------------------------------------------------------
+    # Dispatch and collect — mirrors Rust orchestrator/dispatch.rs
+    # -----------------------------------------------------------------------
+
+    async def _dispatch_and_collect(
+        self,
+        query: str,
+        dispatches: list[DispatchEntry],
+        cards: list[DocCard],
+        llm: LLMClient,
+        state: OrchestratorState,
+    ) -> None:
+        """Dispatch Workers in parallel and collect results."""
+        intent_context = ""
+        if self._query_plan:
+            intent_context = f"{self._query_plan.intent.value} — {self._query_plan.strategy_hint}"
+
+        async def run_worker(dispatch: DispatchEntry) -> tuple[int, WorkerOutput]:
+            idx = dispatch.doc_idx
+            if idx >= len(cards):
+                logger.warning("Document index %d out of range, skipping", idx)
+                return (idx, WorkerOutput())
+
+            card = cards[idx]
+
+            try:
+                doc = await self._doc_loader(card.doc_id)
+            except Exception as e:
+                logger.warning("Failed to load document %s: %s", card.doc_id, e)
+                return (idx, WorkerOutput())
+
+            worker = Worker(
+                document=doc,
+                query=query,
+                llm_client=llm,
+                max_rounds=self._max_rounds,
+                max_llm_calls=self._max_llm_calls,
+                task=dispatch.task,
+                intent_context=intent_context,
+            )
+
+            result = await worker.run()
+            logger.info(
+                "Worker completed for doc %d (%s): evidence=%d, rounds=%d",
+                idx, card.name, len(result.evidence), result.metrics.rounds_used,
+            )
+            return (idx, result)
+
+        tasks = [run_worker(d) for d in dispatches]
+        results = await asyncio.gather(*tasks, return_exceptions=True)
+
+        for item in results:
+            if isinstance(item, Exception):
+                logger.warning("Worker failed: %s", item)
+                continue
+            idx, output = item
+            state.collect_result(idx, output)
+
+    # -----------------------------------------------------------------------
+    # Replan — mirrors Rust orchestrator/replan.rs
+    # -----------------------------------------------------------------------
+
+    async def _replan(
+        self,
+        query: str,
+        missing_info: str,
+        state: OrchestratorState,
+        cards: list[DocCard],
+        llm: LLMClient,
+    ) -> list[DispatchEntry]:
+        """Replan dispatch targets based on missing information."""
+        evidence_summary = _format_evidence_context(state.all_evidence)
+        doc_cards_text = _format_doc_cards(cards)
+
+        system, user = orchestrator_replan_prompt(
+            query=query,
+            missing_info=missing_info,
+            evidence_summary=evidence_summary,
+            dispatched_indices=state.dispatched,
+            doc_cards=doc_cards_text,
+        )
+
+        try:
+            response = await llm.complete(system, user)
+        except Exception as e:
+            logger.error("Replan LLM call failed: %s", e)
+            return []
+
+        state.total_llm_calls += 1
+        return parse_replan_response(response, len(cards), state.dispatched)
+
+    # -----------------------------------------------------------------------
+    # Finalize — mirrors Rust orchestrator::finalize_output
+    # -----------------------------------------------------------------------
+
+    async def _finalize_output(
+        self,
+        state: OrchestratorState,
+        intent: Any,  # QueryIntent or None
+        confidence: float,
+    ) -> Output:
+        """Rerank evidence and assemble the final Output."""
+        from vectorless.ask.plan import QueryIntent
+
+        effective_intent = intent or QueryIntent.FACTUAL
+
+        reranked = rerank_process(
+            evidence=state.all_evidence,
+            intent=effective_intent,
+            confidence=confidence,
+        )
+
+        state.total_llm_calls += reranked.llm_calls
+
+        output = state.into_output(reranked.answer)
+        output.confidence = reranked.confidence
+
+        logger.info(
+            "Orchestrator complete (evidence=%d, llm_calls=%d, confidence=%.2f)",
+            len(output.evidence), output.metrics.llm_calls, output.confidence,
+        )
+
+        return output
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _noop_emit(event: dict) -> Any:
+    """No-op event emitter."""
+    return asyncio.ensure_future(asyncio.sleep(0))
+
+
+def _compute_confidence(
+    eval_sufficient: bool,
+    replan_rounds: int,
+    no_evidence: bool,
+) -> float:
+    """Compute confidence from LLM evaluate() outcome.
+
+    Mirrors Rust compute_confidence.
+    """
+    if no_evidence:
+        return 0.0
+    if eval_sufficient:
+        return max(0.5, 0.95 - replan_rounds * 0.15)
+    return max(0.1, 0.4 - replan_rounds * 0.1)
+
+
+def _extract_keywords(query: str) -> list[str]:
+    """Extract simple keywords from a query."""
+    stop_words = {
+        "what", "is", "the", "a", "an", "how", "does", "do", "are",
+        "in", "on", "at", "to", "for", "of", "with", "and", "or",
+        "this", "that", "it", "from", "by", "was", "were", "be",
+        "can", "could", "would", "should", "will", "has", "have",
+        "had", "not", "but", "if", "then", "than", "so", "as",
+    }
+    words = re.findall(r"\b\w+\b", query.lower())
+    return [w for w in words if w not in stop_words and len(w) > 2]
+
+
+def _format_doc_cards(cards: list[DocCard]) -> str:
+    """Format document cards for the analysis prompt."""
+    lines = []
+    for i, card in enumerate(cards, 1):
+        concepts = f" (concepts: {', '.join(card.concepts[:5])})" if card.concepts else ""
+        lines.append(
+            f"[{i}] {card.name} — {card.summary} "
+            f"({card.section_count} sections){concepts}"
+        )
+    return "\n".join(lines)
+
+
+def _format_evidence_context(evidence: list[Evidence]) -> str:
+    """Format collected evidence for the replan prompt."""
+    if not evidence:
+        return "(no evidence collected)"
+    return "\n\n".join(
+        f"[{e.node_title}] (from {e.doc_name or 'unknown'})\n{e.content}"
+        for e in evidence
+    )
diff --git a/vectorless/ask/plan.py b/vectorless/ask/plan.py
new file mode 100644
index 00000000..de5dc47f
--- /dev/null
+++ b/vectorless/ask/plan.py
@@ -0,0 +1,56 @@
+"""Query plan types — mirrors vectorless-query types."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from enum import Enum
+
+
+class QueryIntent(str, Enum):
+    """Detected intent of a user query."""
+    FACTUAL = "factual"
+    ANALYTICAL = "analytical"
+    NAVIGATIONAL = "navigational"
+    SUMMARY = "summary"
+
+
+class Complexity(str, Enum):
+    """Estimated query complexity."""
+    SIMPLE = "simple"
+    MODERATE = "moderate"
+    COMPLEX = "complex"
+
+
+@dataclass
+class SubQuery:
+    """A decomposed sub-query from a complex query."""
+    query: str
+    intent: QueryIntent = QueryIntent.FACTUAL
+    target_docs: list[str] | None = None
+
+
+@dataclass
+class QueryPlan:
+    """Structured analysis of a user query.
+
+    Produced by query understanding, consumed by Orchestrator and Workers.
+    """
+    original: str
+    intent: QueryIntent = QueryIntent.FACTUAL
+    keywords: list[str] = field(default_factory=list)
+    key_concepts: list[str] = field(default_factory=list)
+    strategy_hint: str = ""
+    complexity: Complexity = Complexity.SIMPLE
+    rewritten: list[str] = field(default_factory=list)
+    sub_queries: list[SubQuery] = field(default_factory=list)
+
+    def intent_context(self) -> str:
+        """Format intent context string for prompts."""
+        parts = [f"Query intent: {self.intent.value} (complexity: {self.complexity.value})"]
+        if self.key_concepts:
+            parts.append(f"Key concepts: {', '.join(self.key_concepts)}")
+        if self.strategy_hint:
+            parts.append(f"Retrieval strategy: {self.strategy_hint}")
+        if self.rewritten:
+            parts.append(f"Rewritten queries for matching: {'; '.join(self.rewritten)}")
+        return "\n" + "\n".join(parts)
diff --git a/vectorless/ask/prompts.py b/vectorless/ask/prompts.py
new file mode 100644
index 00000000..3203c1aa
--- /dev/null
+++ b/vectorless/ask/prompts.py
@@ -0,0 +1,558 @@
+"""Prompt templates for the retrieval agent.
+
+Extracted from vectorless-core/vectorless-agent/src/prompts.rs,
+vectorless-core/vectorless-agent/src/worker/planning.rs, and
+vectorless-core/vectorless-agent/src/orchestrator/replan.rs.
+
+Templates:
+1. worker_navigation   — Worker nav loop, every round
+2. worker_dispatch     — Worker first round (when Orchestrator dispatches)
+3. orchestrator_analysis — Orchestrator Phase 1
+4. check_sufficiency   — evidence sufficiency evaluation
+5. build_plan_prompt   — Phase 1.5 navigation planning
+6. build_replan_prompt — Worker re-plan after insufficient check
+7. orchestrator_replan — Orchestrator re-dispatch after insufficient evidence
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+from vectorless.ask.types import DispatchEntry
+
+
+# ---------------------------------------------------------------------------
+# 1. Worker Navigation (used every round in the nav loop)
+# ---------------------------------------------------------------------------
+
+@dataclass
+class NavigationParams:
+    query: str
+    task: str | None = None
+    breadcrumb: str = "root"
+    evidence_summary: str = "(none)"
+    missing_info: str = ""
+    last_feedback: str = ""
+    remaining: int = 15
+    max_rounds: int = 15
+    history: str = "(no history yet)"
+    visited_titles: str = "(none)"
+    plan: str = ""
+    intent_context: str = ""
+    keyword_hints: str = ""
+
+
+_WORKER_NAVIGATION_SYSTEM = """\
+You are a document navigation assistant. You navigate inside a document to find \
+information that answers the user's question.
+
+Available commands:
+- ls                List children at current position (with summaries and leaf counts)
+- cd <name>         Enter a child node (supports relative paths like Section/Sub and absolute paths like /root/Section)
+- cd ..             Go back to parent node
+- back              Go back to previously visited position
+- cat <name>        Read a child node's content (automatically collected as evidence)
+- cat               Read the current node's content (useful at leaf nodes)
+- head <name>       Preview first 20 lines of a node (does NOT collect evidence)
+- find <keyword>    Search for a keyword in the document index (also supports multi-word like 'Lab C')
+- findtree <pattern> Search for nodes by title pattern (case-insensitive)
+- grep <pattern>    Regex search across all content in current subtree
+- toc [depth]       Show table of contents (optional max depth filter)
+- stats <name>      Show node statistics (size, depth, children, leaf status)
+- grep_node <node> <pattern>  Regex search within a specific node
+- similar <name>    Find similar nodes by keyword overlap
+- overview <name>   Show pre-computed section overview/summary
+- siblings <name>   List sibling nodes at the same level
+- ancestors <name>  Show path from root to the given node
+- doc_card          Show document-level overview (title, summary, sections)
+- concepts          List key concepts extracted from the document
+- find_section <title>  Find a section by exact title (case-insensitive)
+- compare <a> <b>   Compare two nodes using LLM analysis (use node IDs)
+- trace <name>      Trace reasoning chain from a node using LLM
+- summarize <name>  Generate dynamic LLM summary of a node
+- wc <name>         Show content size (lines, words, chars)
+- pwd               Show current navigation path
+- check             Evaluate if collected evidence is sufficient
+- done              End navigation
+
+SEARCH STRATEGY (important — follow this priority order):
+- When keyword matches are shown below, navigate directly to the highest-weight matched node. \
+Do NOT explore other branches first — the keyword index has already identified the most relevant location.
+- When find results include content snippets that answer the question, cd to that node and cat it immediately.
+- Use find with the EXACT keyword from the list (single word, \
+not multi-word phrases). Example: if hint shows keyword 'performance' pointing to Performance section, \
+use find performance, NOT find "performance guide".
+- Use ls only when you have no keyword hints or need to discover the structure of an unknown section.
+- Use findtree when you know a section title pattern but not the exact name.
+
+NAVIGATION EFFICIENCY (critical — every round counts):
+- Prefer cd with absolute paths (/root/Section/Subsection) or relative paths (Section/Sub) \
+to reach target nodes in ONE command instead of multiple cd steps.
+- Do NOT ls before cd if keyword hints or find results already tell you which node to enter.
+- Do NOT cd into nodes one level at a time when you can use a multi-segment path.
+
+Rules:
+- Output exactly ONE command per response, nothing else.
+- Content from cat is automatically saved as evidence — don't re-cat the same node.
+- Do not cat or cd into nodes you have already visited.
+- If the current branch has nothing relevant, use cd .. to go back.
+- If you're at the root and no children seem relevant, use done.
+
+STOPPING RULES (critical — follow these strictly):
+- After cat collects evidence, immediately check: does the collected text contain information \
+  that answers or relates to the user's question? If YES, output done. Do NOT continue searching.
+- Do NOT run grep after cat — cat already collected the full content. grep is for locating \
+  content BEFORE cat, not after.
+- If ls shows '(no navigation data)' or no children, you are at a leaf node. Use cat to read it \
+  or cd .. to go back. Do NOT ls again.
+- When remaining rounds are low (≤2), prefer done over exploring new branches."""
+
+
+def worker_navigation(params: NavigationParams) -> tuple[str, str]:
+    """Build (system, user) prompt pair for a navigation round."""
+    query = params.query
+    breadcrumb = params.breadcrumb
+    evidence_summary = params.evidence_summary
+    remaining = params.remaining
+    max_rounds = params.max_rounds
+
+    task_section = (
+        f"\nYour specific task: {params.task}\n(This is a sub-task for the original query.)"
+        if params.task
+        else ""
+    )
+
+    missing_section = (
+        f"\nPotentially missing info: {params.missing_info}"
+        if params.missing_info
+        else ""
+    )
+
+    last_feedback_section = (
+        f"\nLast command result:\n{params.last_feedback}\n"
+        if params.last_feedback
+        else ""
+    )
+
+    history_section = (
+        ""
+        if params.history == "(no history yet)"
+        else f"\nPrevious rounds:\n{params.history}\n"
+    )
+
+    visited_section = (
+        ""
+        if params.visited_titles == "(none)"
+        else f"\nAlready visited (do not re-read these): {params.visited_titles}"
+    )
+
+    plan_section = (
+        ""
+        if not params.plan
+        else f"\nNavigation plan (follow this as guidance, adapt if needed):\n{params.plan}\n"
+    )
+
+    keyword_section = f"\n{params.keyword_hints}" if params.keyword_hints else ""
+
+    intent_section = (
+        f"\nQuery context: {params.intent_context}" if params.intent_context else ""
+    )
+
+    user = (
+        f"{last_feedback_section}"
+        f"User question: {query}{task_section}{intent_section}\n"
+        f"\nCurrent position: /{breadcrumb}\n"
+        f"Collected evidence:\n"
+        f"{evidence_summary}{missing_section}{keyword_section}{visited_section}{plan_section}\n"
+        f"{history_section}"
+        f"Remaining rounds: {remaining}/{max_rounds}\n"
+        f"\nCommand:"
+    )
+
+    return _WORKER_NAVIGATION_SYSTEM, user
+
+
+# ---------------------------------------------------------------------------
+# 2. Worker Dispatch (first-round prompt when Orchestrator dispatches)
+# ---------------------------------------------------------------------------
+
+@dataclass
+class WorkerDispatchParams:
+    original_query: str
+    task: str
+    doc_name: str
+    breadcrumb: str
+
+
+def worker_dispatch(params: WorkerDispatchParams) -> tuple[str, str]:
+    """Build (system, user) prompt pair for the first round of a dispatched Worker."""
+    doc_name = params.doc_name
+    original_query = params.original_query
+    task = params.task
+    breadcrumb = params.breadcrumb
+
+    system = (
+        f'You are a document navigation assistant. You are searching inside the document '
+        f'"{doc_name}" for specific information.\n'
+        f"\n"
+        f"Available commands: ls, cd <name> (supports Section/Sub paths and /root/Section absolute paths), "
+        f"cd .., back, cat, cat <name>, head <name>, find <keyword>, findtree <pattern>, grep <regex>, "
+        f"toc [depth], stats <name>, grep_node <node> <pattern>, similar <name>, overview <name>, "
+        f"siblings <name>, ancestors <name>, doc_card, concepts, find_section <title>, "
+        f"compare <a> <b>, trace <name>, summarize <name>, "
+        f"wc <name>, pwd, check, done\n"
+        f"\n"
+        f"SEARCH STRATEGY:\n"
+        f"- Prefer find <keyword> to jump directly to relevant sections over manual ls→cd exploration.\n"
+        f"- When find results include content snippets that answer your task, cd to that node and cat it immediately.\n"
+        f"- Use multi-segment paths (e.g. cd Research Labs/Lab A) to reach targets in ONE command.\n"
+        f"- Do NOT ls before cd if find results already tell you which node to enter.\n"
+        f"- Use findtree when you know a section title pattern but not the exact name.\n"
+        f"\n"
+        f"Rules:\n"
+        f"- Output exactly ONE command per response.\n"
+        f"- Content from cat is automatically saved as evidence.\n"
+        f"- After cat collects evidence, if it relates to your task, use done immediately.\n"
+        f"- Do NOT grep after cat — cat already collected the full content.\n"
+        f"- If ls shows no children, use cat to read the current node or cd .. to go back.\n"
+        f"- When evidence is sufficient, use done."
+    )
+
+    user = (
+        f"Original question: {original_query}\n"
+        f"Your task: {task}\n"
+        f"Document: {doc_name}\n"
+        f"Current position: /{breadcrumb}\n"
+        f"\nCommand:"
+    )
+
+    return system, user
+
+
+# ---------------------------------------------------------------------------
+# 3. Orchestrator Analysis (multi-doc Phase 1)
+# ---------------------------------------------------------------------------
+
+@dataclass
+class OrchestratorAnalysisParams:
+    query: str
+    doc_cards: str
+    find_results: str
+    intent_context: str
+
+
+def orchestrator_analysis(params: OrchestratorAnalysisParams) -> tuple[str, str]:
+    """Build (system, user) prompt pair for Orchestrator document analysis."""
+    system = (
+        "You are a multi-document retrieval coordinator. Analyze the user's question, "
+        "review the available documents, and decide which documents to search and what to look for in each.\n"
+        "\n"
+        "Output format — for each relevant document, output a block:\n"
+        "- doc: <number>\n"
+        "  reason: <why this document is relevant>\n"
+        "  task: <what specific information to find in this document>\n"
+        "\n"
+        "Only include documents that are likely to contain relevant information.\n"
+        "If the cross-document search results already fully answer the question, respond with just: ALREADY_ANSWERED"
+    )
+
+    user = (
+        f"Available documents:\n"
+        f"{params.doc_cards}\n"
+        f"\nCross-document search results:\n"
+        f"{params.find_results}\n"
+        f"{params.intent_context}\n"
+        f"\nUser question: {params.query}\n"
+        f"\nRelevant documents:"
+    )
+
+    return system, user
+
+
+# ---------------------------------------------------------------------------
+# 4. Check (evidence sufficiency evaluation)
+# ---------------------------------------------------------------------------
+
+def check_sufficiency(query: str, evidence_summary: str) -> tuple[str, str]:
+    """Build (system, user) prompt pair for evidence sufficiency evaluation."""
+    system = (
+        "You evaluate whether collected evidence contains information that can answer or "
+        "relate to the user's question. The evidence is raw document text — it does not need to be "
+        "a complete or perfect answer. If the evidence mentions or addresses the key concepts from "
+        "the question, it is sufficient.\n"
+        "\n"
+        "Respond with ONLY 'SUFFICIENT' or 'INSUFFICIENT' followed by a one-line reason.\n"
+        "\n"
+        "Guidelines:\n"
+        "- If the evidence text contains any information directly related to the question's key terms, "
+        "respond SUFFICIENT.\n"
+        "- If the evidence is completely unrelated or empty, respond INSUFFICIENT.\n"
+        "- Default to SUFFICIENT unless the evidence is clearly irrelevant."
+    )
+
+    user = (
+        f"Question: {query}\n\n"
+        f"Collected evidence:\n"
+        f"{evidence_summary}\n\n"
+        f"Is this sufficient?"
+    )
+
+    return system, user
+
+
+# ---------------------------------------------------------------------------
+# 5. Navigation Planning (Phase 1.5)
+# ---------------------------------------------------------------------------
+
+def build_plan_prompt(
+    query: str,
+    ls_output: str,
+    doc_name: str,
+    keyword_hints_section: str = "",
+    semantic_hints: str = "",
+    intent_signals: str = "",
+    task: str | None = None,
+) -> tuple[str, str]:
+    """Build the Phase 1.5 navigation planning prompt."""
+    task_section = f"\nYour specific task: {task}" if task else ""
+
+    system = (
+        "You are a document navigation planner. Given a user question, the top-level "
+        "document structure, keyword index matches, and semantic hints, output a brief navigation "
+        "plan: which sections to visit and in what order. Prioritize sections that matched keywords "
+        "or semantic hints. The plan should be 2-5 steps. Each step should be a specific action "
+        'like "cd to X, then cat Y" or "grep for Z in current subtree". '
+        "Pay attention to 'Can answer' and 'Topics' annotations in the structure listing — "
+        "they indicate what questions each section addresses. "
+        "Output only the plan, nothing else.\n\n"
+        'Example plan for "What is the Q1 revenue?":\n'
+        "1. cd to Revenue (matched keyword 'revenue')\n"
+        "2. ls to see sub-sections\n"
+        "3. cat Q1 Report\n"
+        "4. check\n"
+        "5. done"
+    )
+
+    user = (
+        f"Document: {doc_name}\n"
+        f"Top-level structure:\n{ls_output}{keyword_hints_section}{semantic_hints}{intent_signals}"
+        f"User question: {query}{task_section}\n\n"
+        f"Navigation plan:"
+    )
+
+    return system, user
+
+
+# ---------------------------------------------------------------------------
+# 6. Worker Re-plan (after insufficient check)
+# ---------------------------------------------------------------------------
+
+def build_replan_prompt(
+    query: str,
+    task: str | None,
+    path_str: str,
+    evidence_summary: str,
+    missing_info: str,
+    visited_titles: str,
+    current_children: str,
+    sibling_hints: str,
+    remaining: int,
+    max_rounds: int,
+) -> tuple[str, str]:
+    """Build a focused re-planning prompt when check returns INSUFFICIENT."""
+    task_section = f"\nOriginal sub-task: {task}" if task else ""
+
+    system = (
+        "You are re-planning a document navigation strategy. The previous plan did not "
+        "find sufficient evidence. Given what's been found and what's still missing, generate a "
+        "focused 2-3 step plan. Each step should be a specific action like "
+        '"cd to X, then cat Y" or "grep for Z in current subtree". '
+        "Prefer exploring unvisited branches. If current branch is exhausted, cd .. and try "
+        "a different path. Output only the plan, nothing else."
+    )
+
+    user = (
+        f"Original question: {query}{task_section}\n"
+        f"Current position: /{path_str}\n"
+        f"Evidence collected so far:\n{evidence_summary}\n"
+        f"What's missing: {missing_info}\n"
+        f"Already visited: {visited_titles}\n"
+        f"{current_children}"
+        f"{sibling_hints}"
+        f"Remaining rounds: {remaining}/{max_rounds}\n\n"
+        f"Revised navigation plan:"
+    )
+
+    return system, user
+
+
+# ---------------------------------------------------------------------------
+# 7. Orchestrator Replan (after insufficient cross-doc evidence)
+# ---------------------------------------------------------------------------
+
+def orchestrator_replan_prompt(
+    query: str,
+    missing_info: str,
+    evidence_summary: str,
+    dispatched_indices: list[int],
+    doc_cards: str,
+    keywords_text: str = "",
+) -> tuple[str, str]:
+    """Build the Orchestrator re-dispatch prompt after insufficient evidence."""
+    dispatched_text = (
+        ", ".join(f"doc {i + 1}" for i in dispatched_indices)
+        if dispatched_indices
+        else "None"
+    )
+
+    system = (
+        "You are a multi-document retrieval coordinator. The first round of evidence "
+        "collection was insufficient to fully answer the query. Review what was collected, "
+        "what's missing, and decide which additional documents to query.\n"
+        "\n"
+        "Output format — for each additional document to query, output a block:\n"
+        "- doc: <number>\n"
+        "  reason: <why this document may have the missing information>\n"
+        "  task: <what specific information to find>\n"
+        "\n"
+        "Only include documents not yet dispatched. If no additional documents are likely to help, "
+        "respond with: NO_ADDITIONAL_DOCS"
+    )
+
+    user = (
+        f"Original question: {query}\n"
+        f"\nMissing information: {missing_info}\n"
+        f"\nCollected evidence so far:\n"
+        f"{evidence_summary}\n"
+        f"\nAlready dispatched documents: {dispatched_text}\n"
+        f"\nAvailable documents (all):\n"
+        f"{doc_cards}{keywords_text}\n"
+        f"\nAdditional documents to query:"
+    )
+
+    return system, user
+
+
+# ---------------------------------------------------------------------------
+# Parsing utilities
+# ---------------------------------------------------------------------------
+
+def parse_dispatch_plan(llm_output: str, total_docs: int) -> list[DispatchEntry] | None:
+    """Parse the LLM output from orchestrator analysis into dispatch entries.
+
+    Returns None if the response is "ALREADY_ANSWERED".
+    Returns empty list if no valid dispatch entries found.
+    """
+    trimmed = llm_output.strip()
+
+    if trimmed.startswith("ALREADY_ANSWERED"):
+        return None
+
+    entries: list[DispatchEntry] = []
+    current_doc_idx: int | None = None
+    current_reason = ""
+    current_task = ""
+
+    for line in trimmed.splitlines():
+        line = line.strip()
+
+        if line.startswith("- doc:"):
+            # Flush previous entry
+            if current_doc_idx is not None:
+                entries.append(DispatchEntry(
+                    doc_idx=current_doc_idx,
+                    reason=current_reason,
+                    task=current_task,
+                ))
+                current_reason = ""
+                current_task = ""
+
+            rest = line[len("- doc:"):].strip().rstrip(",")
+            try:
+                doc_num = int(rest)
+            except ValueError:
+                continue
+            if 0 < doc_num <= total_docs:
+                current_doc_idx = doc_num - 1  # Convert to 0-based
+
+        elif line.startswith("reason:"):
+            current_reason = line[len("reason:"):].strip()
+
+        elif line.startswith("task:"):
+            current_task = line[len("task:"):].strip()
+
+    # Flush last entry
+    if current_doc_idx is not None:
+        entries.append(DispatchEntry(
+            doc_idx=current_doc_idx,
+            reason=current_reason,
+            task=current_task,
+        ))
+
+    return entries
+
+
+def parse_sufficiency_response(response: str) -> bool:
+    """Parse the sufficiency check response. Returns True if SUFFICIENT."""
+    upper = response.strip().upper()
+    return upper.startswith("SUFFICIENT") and not upper.startswith("INSUFFICIENT")
+
+
+def parse_replan_response(
+    response: str,
+    total_docs: int,
+    dispatched: list[int],
+) -> list[DispatchEntry]:
+    """Parse the Orchestrator replan response into dispatch entries.
+
+    Only includes documents not already dispatched.
+    """
+    trimmed = response.strip()
+
+    if trimmed.startswith("NO_ADDITIONAL_DOCS"):
+        return []
+
+    entries: list[DispatchEntry] = []
+    current_doc_idx: int | None = None
+    current_reason = ""
+    current_task = ""
+
+    for line in trimmed.splitlines():
+        line = line.strip()
+
+        if line.startswith("- doc:"):
+            # Flush previous
+            if current_doc_idx is not None:
+                entries.append(DispatchEntry(
+                    doc_idx=current_doc_idx,
+                    reason=current_reason,
+                    task=current_task,
+                ))
+                current_reason = ""
+                current_task = ""
+
+            rest = line[len("- doc:"):].strip().rstrip(",")
+            try:
+                doc_num = int(rest)
+            except ValueError:
+                continue
+            if 0 < doc_num <= total_docs:
+                idx = doc_num - 1
+                if idx not in dispatched:
+                    current_doc_idx = idx
+
+        elif line.startswith("reason:"):
+            current_reason = line[len("reason:"):].strip()
+
+        elif line.startswith("task:"):
+            current_task = line[len("task:"):].strip()
+
+    # Flush last
+    if current_doc_idx is not None:
+        entries.append(DispatchEntry(
+            doc_idx=current_doc_idx,
+            reason=current_reason,
+            task=current_task,
+        ))
+
+    return entries
diff --git a/vectorless/ask/tools.py b/vectorless/ask/tools.py
new file mode 100644
index 00000000..ff9ec048
--- /dev/null
+++ b/vectorless/ask/tools.py
@@ -0,0 +1,119 @@
+"""LLM-dependent agent tools: compare, trace, summarize.
+
+These tools combine Rust compute primitives with LLM cognitive operations.
+The Worker fetches content via Rust primitives, then these functions use LLM
+for analysis. This keeps the "thick boundary" — Rust does compute, Python does
+LLM-dependent strategy.
+"""
+
+from __future__ import annotations
+
+import logging
+
+from vectorless.llm_client import LLMClient
+
+logger = logging.getLogger(__name__)
+
+_MAX_COMPARE_CHARS = 2000
+_MAX_TRACE_CHARS = 3000
+_MAX_SUMMARIZE_CHARS = 3000
+
+
+def _truncate(content: str, max_len: int) -> str:
+    if len(content) > max_len:
+        return content[:max_len] + "..."
+    return content
+
+
+async def compare_nodes(
+    title_a: str,
+    content_a: str,
+    title_b: str,
+    content_b: str,
+    llm: LLMClient,
+    *,
+    query: str = "",
+) -> str:
+    """Compare two document sections using LLM."""
+    content_a = _truncate(content_a, _MAX_COMPARE_CHARS)
+    content_b = _truncate(content_b, _MAX_COMPARE_CHARS)
+
+    query_ctx = f"\nContext: the user asked: {query}" if query else ""
+
+    system = (
+        "You are a document analysis assistant. Compare the two sections below. "
+        "Identify key similarities and differences. Be specific and concise."
+    )
+    user = (
+        f"Section A: [{title_a}]\n{content_a}\n\n"
+        f"Section B: [{title_b}]\n{content_b}"
+        f"{query_ctx}\n\n"
+        f"Comparison:"
+    )
+
+    try:
+        return (await llm.complete(system, user)).strip()
+    except Exception as e:
+        logger.warning("Compare LLM call failed: %s", e)
+        return f"Comparison failed: {e}"
+
+
+async def trace_reasoning(
+    title: str,
+    content: str,
+    related_context: str,
+    llm: LLMClient,
+    *,
+    query: str = "",
+) -> str:
+    """Trace reasoning chain from a document section using LLM."""
+    content = _truncate(content, _MAX_TRACE_CHARS)
+
+    query_ctx = f"\nThe user asked: {query}" if query else ""
+
+    system = (
+        "You are a reasoning trace analyst. Given a document section, trace the logical "
+        "argument chain: identify premises, conclusions, supporting evidence, and logical "
+        "connections. If related sections are listed, note how they connect to the argument."
+    )
+    user = (
+        f"Section: [{title}]\n{content}"
+        f"{related_context}"
+        f"{query_ctx}\n\n"
+        f"Reasoning trace:"
+    )
+
+    try:
+        return (await llm.complete(system, user)).strip()
+    except Exception as e:
+        logger.warning("Trace LLM call failed: %s", e)
+        return f"Trace failed: {e}"
+
+
+async def summarize_section(
+    title: str,
+    content: str,
+    llm: LLMClient,
+    *,
+    query: str = "",
+) -> str:
+    """Generate a dynamic LLM summary of a document section."""
+    content = _truncate(content, _MAX_SUMMARIZE_CHARS)
+
+    query_ctx = f"\nFocus the summary for the question: {query}" if query else ""
+
+    system = (
+        "You are a document summarizer. Provide a concise summary of the section below. "
+        "Highlight key facts, conclusions, and data points."
+    )
+    user = (
+        f"Section: [{title}]\n{content}"
+        f"{query_ctx}\n\n"
+        f"Summary:"
+    )
+
+    try:
+        return (await llm.complete(system, user)).strip()
+    except Exception as e:
+        logger.warning("Summarize LLM call failed: %s", e)
+        return f"Summarize failed: {e}"
diff --git a/vectorless/ask/types.py b/vectorless/ask/types.py
new file mode 100644
index 00000000..c9566693
--- /dev/null
+++ b/vectorless/ask/types.py
@@ -0,0 +1,350 @@
+"""Agent types — mirrors vectorless-agent/src/config.rs and state.rs.
+
+Type hierarchy (matching Rust exactly):
+
+    Evidence          — single piece of collected evidence (mirrors rerank::types::Evidence)
+    TraceStep         — single reasoning step
+    WorkerMetrics     — per-Worker execution metrics
+    WorkerOutput      — Worker output: evidence only, no answer
+    Metrics           — aggregated Orchestrator-level metrics
+    Output            — final result of a retrieval operation
+
+    DocCard           — lightweight document metadata for analysis
+    DispatchEntry     — single dispatch target from Orchestrator analysis
+    EvalResult        — evidence sufficiency evaluation result
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+
+
+# ---------------------------------------------------------------------------
+# Evidence — mirrors vectorless-rerank/src/types::Evidence
+# ---------------------------------------------------------------------------
+
+@dataclass
+class Evidence:
+    """A single piece of evidence collected during navigation.
+
+    Replaces the old WorkerEvidence. The key difference:
+    - source_path: navigation breadcrumb (e.g. "Root/Chapter 1/Section 1.2")
+    - node_title: title of the node (replaces old 'title')
+    - doc_name: source document name (set by Orchestrator in multi-doc scenarios)
+    """
+
+    source_path: str
+    node_title: str
+    content: str
+    doc_name: str | None = None
+
+
+# ---------------------------------------------------------------------------
+# TraceStep — mirrors vectorless-document::TraceStep
+# ---------------------------------------------------------------------------
+
+@dataclass
+class TraceStep:
+    """A single step in the reasoning trace."""
+
+    action: str
+    observation: str
+    round: int
+
+
+# ---------------------------------------------------------------------------
+# Worker metrics — mirrors config::WorkerMetrics
+# ---------------------------------------------------------------------------
+
+@dataclass
+class WorkerMetrics:
+    """Metrics specific to a single Worker's execution."""
+
+    rounds_used: int = 0
+    llm_calls: int = 0
+    nodes_visited: int = 0
+    budget_exhausted: bool = False
+    plan_generated: bool = False
+    check_count: int = 0
+    evidence_chars: int = 0
+
+
+# ---------------------------------------------------------------------------
+# Worker output — mirrors config::WorkerOutput
+# ---------------------------------------------------------------------------
+
+@dataclass
+class WorkerOutput:
+    """Output from a single Worker — pure evidence, no answer synthesis.
+
+    Rerank handles all answer generation.
+    """
+
+    evidence: list[Evidence] = field(default_factory=list)
+    metrics: WorkerMetrics = field(default_factory=WorkerMetrics)
+    doc_name: str = ""
+    trace_steps: list[TraceStep] = field(default_factory=list)
+
+
+# ---------------------------------------------------------------------------
+# Aggregated metrics — mirrors config::Metrics
+# ---------------------------------------------------------------------------
+
+@dataclass
+class Metrics:
+    """Agent execution metrics — aggregated across all Workers."""
+
+    rounds_used: int = 0
+    llm_calls: int = 0
+    nodes_visited: int = 0
+    budget_exhausted: bool = False
+    plan_generated: bool = False
+    check_count: int = 0
+    evidence_chars: int = 0
+
+
+# ---------------------------------------------------------------------------
+# Output — mirrors config::Output (the final result)
+# ---------------------------------------------------------------------------
+
+@dataclass
+class Output:
+    """Final result of a retrieval operation.
+
+    This is what Engine.ask() returns — aligned with Rust config::Output.
+    """
+
+    answer: str
+    evidence: list[Evidence] = field(default_factory=list)
+    metrics: Metrics = field(default_factory=Metrics)
+    confidence: float = 0.0
+    trace_steps: list[TraceStep] = field(default_factory=list)
+
+    @staticmethod
+    def empty() -> Output:
+        """Create an empty output (no evidence found)."""
+        return Output(answer="")
+
+
+# ---------------------------------------------------------------------------
+# DocCard — lightweight document metadata for Orchestrator analysis
+# ---------------------------------------------------------------------------
+
+@dataclass
+class DocCard:
+    """Summary of an ingested document, used for Orchestrator analysis.
+
+    Built from DocumentInfo in Engine._ask_python().
+    """
+
+    doc_id: str
+    name: str
+    summary: str
+    section_count: int
+    concepts: list[str] = field(default_factory=list)
+
+
+# ---------------------------------------------------------------------------
+# DispatchEntry — mirrors prompts::DispatchEntry
+# ---------------------------------------------------------------------------
+
+@dataclass
+class DispatchEntry:
+    """A single dispatch target parsed from Orchestrator analysis."""
+
+    doc_idx: int
+    reason: str
+    task: str
+
+
+# ---------------------------------------------------------------------------
+# EvalResult — evidence sufficiency evaluation
+# ---------------------------------------------------------------------------
+
+@dataclass
+class EvalResult:
+    """Structured result of evidence sufficiency evaluation."""
+
+    sufficient: bool
+    missing_info: str
+    coverage: float = 0.0
+    quality_score: float = 0.0
+    missing_aspects: list[str] = field(default_factory=list)
+    relevant_evidence_ids: list[str] = field(default_factory=list)
+
+    @property
+    def needs_replan(self) -> bool:
+        """Whether the Orchestrator should replan and dispatch more Workers."""
+        return not self.sufficient and bool(self.missing_aspects)
+
+
+# ---------------------------------------------------------------------------
+# Scope — mirrors config::Scope
+# ---------------------------------------------------------------------------
+
+@dataclass
+class Specified:
+    """User specified one or more documents.
+
+    Orchestrator skips analysis, spawns Workers directly.
+    """
+
+    docs: list[DocCard]
+
+
+@dataclass
+class Workspace:
+    """Workspace scope — user didn't specify documents.
+
+    Orchestrator analyzes DocCards and selects relevant ones.
+    """
+
+    docs: list[DocCard]
+
+
+# Union type for scope
+Scope = Specified | Workspace
+
+
+# ---------------------------------------------------------------------------
+# WorkerState — mirrors state::WorkerState
+# ---------------------------------------------------------------------------
+
+@dataclass
+class WorkerState:
+    """Mutable navigation state for a Worker loop.
+
+    Created at loop start, destroyed at loop end. Never escapes the call.
+    """
+
+    breadcrumb: list[str] = field(default_factory=lambda: ["root"])
+    evidence: list[Evidence] = field(default_factory=list)
+    visited: set[str] = field(default_factory=set)
+    collected_nodes: set[str] = field(default_factory=set)
+    remaining: int = 15
+    max_rounds: int = 15
+    last_feedback: str = ""
+    missing_info: str = ""
+    history: list[str] = field(default_factory=list)
+    plan: str = ""
+    check_count: int = 0
+    plan_generated: bool = False
+    trace_steps: list[TraceStep] = field(default_factory=list)
+    llm_calls: int = 0
+
+    def dec_round(self) -> None:
+        if self.remaining > 0:
+            self.remaining -= 1
+
+    def set_feedback(self, feedback: str) -> None:
+        self.last_feedback = feedback
+
+    def add_evidence(self, ev: Evidence) -> None:
+        self.evidence.append(ev)
+
+    def has_evidence_for(self, node_id: str) -> bool:
+        return node_id in self.collected_nodes
+
+    def push_history(self, entry: str) -> None:
+        if len(self.history) >= MAX_HISTORY_ENTRIES:
+            self.history.pop(0)
+        self.history.append(entry)
+
+    def path_str(self) -> str:
+        return "/".join(self.breadcrumb)
+
+    def evidence_summary(self) -> str:
+        if not self.evidence:
+            return "(none)"
+        return "\n".join(
+            f"- [{e.node_title}] {len(e.content)} chars" for e in self.evidence
+        )
+
+    def evidence_for_check(self) -> str:
+        if not self.evidence:
+            return "(no evidence collected yet)"
+        return "\n\n".join(
+            f"[{e.node_title}]\n{e.content}" for e in self.evidence
+        )
+
+    def history_text(self) -> str:
+        if not self.history:
+            return "(no history yet)"
+        return "\n".join(
+            f"{i + 1}. {h}" for i, h in enumerate(self.history)
+        )
+
+    def into_worker_output(self, doc_name: str) -> WorkerOutput:
+        """Convert this state into a WorkerOutput (consuming the evidence).
+
+        Worker returns evidence only — no answer synthesis.
+        """
+        evidence_chars: int = sum(len(e.content) for e in self.evidence)
+        return WorkerOutput(
+            evidence=list(self.evidence),
+            metrics=WorkerMetrics(
+                rounds_used=self.max_rounds - self.remaining,
+                llm_calls=self.llm_calls,
+                nodes_visited=len(self.visited),
+                budget_exhausted=self.remaining == 0,
+                plan_generated=self.plan_generated,
+                check_count=self.check_count,
+                evidence_chars=evidence_chars,
+            ),
+            doc_name=doc_name,
+            trace_steps=list(self.trace_steps),
+        )
+
+
+MAX_HISTORY_ENTRIES: int = 6
+
+
+# ---------------------------------------------------------------------------
+# OrchestratorState — mirrors state::OrchestratorState
+# ---------------------------------------------------------------------------
+
+@dataclass
+class OrchestratorState:
+    """Mutable state for the Orchestrator loop.
+
+    Tracks which documents have been dispatched and collects Worker results.
+    """
+
+    dispatched: list[int] = field(default_factory=list)
+    sub_results: list[WorkerOutput] = field(default_factory=list)
+    all_evidence: list[Evidence] = field(default_factory=list)
+    analyze_done: bool = False
+    total_llm_calls: int = 0
+
+    def record_dispatch(self, doc_idx: int) -> None:
+        if doc_idx not in self.dispatched:
+            self.dispatched.append(doc_idx)
+
+    def collect_result(self, doc_idx: int, result: WorkerOutput) -> None:
+        """Collect a Worker result, including its LLM call count."""
+        for e in result.evidence:
+            if e.doc_name is None:
+                e.doc_name = result.doc_name
+        self.total_llm_calls += result.metrics.llm_calls
+        self.all_evidence.extend(result.evidence)
+        self.sub_results.append(result)
+        self.record_dispatch(doc_idx)
+
+    def into_output(self, answer: str) -> Output:
+        """Merge all sub-results into a single Output."""
+        trace_steps = [s for r in self.sub_results for s in r.trace_steps]
+        return Output(
+            answer=answer,
+            evidence=list(self.all_evidence),
+            metrics=Metrics(
+                llm_calls=self.total_llm_calls,
+                rounds_used=sum(r.metrics.rounds_used for r in self.sub_results),
+                nodes_visited=sum(r.metrics.nodes_visited for r in self.sub_results),
+                budget_exhausted=any(r.metrics.budget_exhausted for r in self.sub_results),
+                plan_generated=any(r.metrics.plan_generated for r in self.sub_results),
+                check_count=sum(r.metrics.check_count for r in self.sub_results),
+                evidence_chars=sum(r.metrics.evidence_chars for r in self.sub_results),
+            ),
+            confidence=0.0,
+            trace_steps=trace_steps,
+        )
diff --git a/vectorless/ask/understand.py b/vectorless/ask/understand.py
new file mode 100644
index 00000000..75c0d626
--- /dev/null
+++ b/vectorless/ask/understand.py
@@ -0,0 +1,159 @@
+"""Query understanding — LLM-driven analysis of user queries.
+
+Mirrors vectorless-core/vectorless-query/src/understand.rs.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import re
+
+from vectorless.llm_client import LLMClient
+from vectorless.ask.plan import Complexity, QueryIntent, QueryPlan, SubQuery
+
+logger = logging.getLogger(__name__)
+
+
+async def understand(
+    query: str,
+    llm: LLMClient,
+) -> QueryPlan:
+    """Analyze a user query using LLM to produce a structured QueryPlan.
+
+    Two-phase:
+    1. Extract keywords locally (BM25-style, no LLM)
+    2. Call LLM for intent classification, concepts, strategy, complexity
+
+    Raises on LLM failure — no silent degradation.
+    """
+    keywords = _extract_keywords(query)
+
+    system = (
+        'You are a query analysis engine. Analyze the user\'s query and respond with a JSON object containing:\n'
+        '\n'
+        '- "intent": one of "factual", "analytical", "navigational", "summary"\n'
+        '- "key_concepts": array of the main concepts/entities in the query (distinct from keywords)\n'
+        '- "strategy_hint": one of "focused" (single-topic), "exploratory" (broad scan), '
+        '"comparative" (cross-reference), or "summary" (aggregate)\n'
+        '- "complexity": one of "simple", "moderate", "complex"\n'
+        '- "rewritten": optional rewritten version of the query for better retrieval (null if not needed)\n'
+        '- "sub_queries": array of sub-query strings if the query can be decomposed (empty array if not)\n'
+        '\n'
+        'Respond with ONLY the JSON object, no additional text.'
+    )
+
+    user = f"Query: {query}\nExtracted keywords: [{', '.join(keywords)}]"
+
+    response = await llm.complete(system, user)
+
+    if not response.strip():
+        raise ValueError(
+            "Query understanding failed: LLM returned an empty response. "
+            "Check your API key, model, and endpoint configuration."
+        )
+
+    analysis = _parse_analysis(response)
+
+    return QueryPlan(
+        original=query,
+        intent=_parse_intent(analysis.get("intent", "factual")),
+        keywords=keywords,
+        key_concepts=analysis.get("key_concepts", []),
+        strategy_hint=analysis.get("strategy_hint", ""),
+        complexity=_parse_complexity(analysis.get("complexity", "simple")),
+        rewritten=_filter_rewritten(analysis.get("rewritten")),
+        sub_queries=_parse_sub_queries(analysis.get("sub_queries", [])),
+    )
+
+
+def _extract_keywords(query: str) -> list[str]:
+    """Extract keywords from query using stop word filtering."""
+    stop_words = {
+        "what", "is", "the", "a", "an", "how", "does", "do", "are",
+        "in", "on", "at", "to", "for", "of", "with", "and", "or",
+        "this", "that", "it", "from", "by", "was", "were", "be",
+        "can", "could", "would", "should", "will", "has", "have",
+        "had", "not", "but", "if", "then", "than", "so", "as",
+        "there", "their", "they", "its", "about", "which", "when",
+        "who", "whom", "all", "each", "every", "both", "few",
+        "more", "most", "other", "some", "such", "no", "nor",
+        "only", "own", "same", "too", "very", "just", "because",
+    }
+    words = re.findall(r"\b\w+\b", query.lower())
+    return list(dict.fromkeys(w for w in words if w not in stop_words and len(w) > 2))
+
+
+def _parse_analysis(response: str) -> dict:
+    """Parse LLM response as JSON, handling markdown-wrapped output."""
+    trimmed = response.strip()
+
+    # Try to extract JSON from markdown code blocks
+    if trimmed.startswith("```"):
+        match = re.search(r"```(?:json)?\s*\n?(.*?)```", trimmed, re.DOTALL)
+        if match:
+            trimmed = match.group(1).strip()
+
+    # Try to find a { ... } block
+    start = trimmed.find("{")
+    if start != -1:
+        depth = 0
+        for i in range(start, len(trimmed)):
+            if trimmed[i] == "{":
+                depth += 1
+            elif trimmed[i] == "}":
+                depth -= 1
+                if depth == 0:
+                    candidate = trimmed[start : i + 1]
+                    try:
+                        return json.loads(candidate)
+                    except json.JSONDecodeError:
+                        break
+
+    # Last resort
+    return json.loads(trimmed)
+
+
+def _parse_intent(raw: str) -> QueryIntent:
+    """Parse intent string to QueryIntent enum."""
+    mapping = {
+        "factual": QueryIntent.FACTUAL,
+        "analytical": QueryIntent.ANALYTICAL,
+        "navigational": QueryIntent.NAVIGATIONAL,
+        "summary": QueryIntent.SUMMARY,
+    }
+    return mapping.get(raw.lower(), QueryIntent.FACTUAL)
+
+
+def _parse_complexity(raw: str) -> Complexity:
+    """Parse complexity string to Complexity enum."""
+    mapping = {
+        "simple": Complexity.SIMPLE,
+        "moderate": Complexity.MODERATE,
+        "complex": Complexity.COMPLEX,
+    }
+    return mapping.get(raw.lower(), Complexity.SIMPLE)
+
+
+def _filter_rewritten(raw: str | None) -> list[str]:
+    """Extract rewritten queries from LLM response."""
+    if raw is None or not isinstance(raw, str) or not raw.strip():
+        return []
+    return [raw.strip()]
+
+
+def _parse_sub_queries(raw: list) -> list[SubQuery]:
+    """Parse sub_queries from LLM response."""
+    if not isinstance(raw, list):
+        return []
+    result = []
+    for item in raw:
+        if isinstance(item, str) and item.strip():
+            result.append(SubQuery(query=item.strip()))
+        elif isinstance(item, dict):
+            result.append(SubQuery(
+                query=item.get("query", ""),
+                intent=_parse_intent(item.get("intent", "factual")),
+                target_docs=item.get("target_docs"),
+            ))
+    return result
diff --git a/vectorless/ask/worker.py b/vectorless/ask/worker.py
new file mode 100644
index 00000000..3882b924
--- /dev/null
+++ b/vectorless/ask/worker.py
@@ -0,0 +1,1115 @@
+"""Worker agent — navigates a single document to collect evidence.
+
+The Worker uses an LLM-driven command loop:
+1. Phase 0: Initial `ls` to observe the top-level structure
+2. Phase 1.5 (optional): LLM generates a navigation plan from keyword hints
+3. Phase 2: Main loop — LLM picks a command → execute → record trace → repeat
+"""
+
+from __future__ import annotations
+
+import re
+import logging
+from dataclasses import dataclass
+from typing import Any
+
+from vectorless.ask.types import TraceStep, Evidence, WorkerOutput, WorkerState
+from vectorless.llm_client import LLMClient
+from vectorless.ask.tools import compare_nodes, summarize_section, trace_reasoning
+from vectorless.ask.prompts import (
+    NavigationParams,
+    WorkerDispatchParams,
+    build_plan_prompt,
+    build_replan_prompt,
+    check_sufficiency,
+    parse_sufficiency_response,
+    worker_dispatch,
+    worker_navigation,
+)
+
+logger = logging.getLogger(__name__)
+
+MAX_HISTORY_ENTRIES = 6
+
+
+# ---------------------------------------------------------------------------
+# Command parsing
+# ---------------------------------------------------------------------------
+
+@dataclass
+class Command:
+    """Parsed command from LLM output."""
+    kind: str  # ls, cd, cd_up, cat, find, findtree, grep, head, wc, pwd, check, done, ...
+    target: str = ""
+    target_b: str = ""  # second target for compare, pattern for grep_node
+    lines: int = 20
+
+
+def _strip_quotes(s: str) -> str:
+    """Strip surrounding quotes (straight and smart) from a string."""
+    trimmed = s.strip()
+    if len(trimmed) < 2:
+        return trimmed
+    first, last = trimmed[0], trimmed[-1]
+    matching = (
+        (first == '"' and last == '"')
+        or (first == "'" and last == "'")
+        or (first == "“" and last == "”")
+        or (first == "‘" and last == "’")
+    )
+    return trimmed[1:-1] if matching else trimmed
+
+
+def parse_command(llm_output: str) -> Command:
+    """Parse the first non-empty line of LLM output into a Command."""
+    line = ""
+    for l in llm_output.splitlines():
+        if l.strip():
+            line = l.strip()
+            break
+
+    # Remove common wrapping
+    line = line.strip().strip("`").strip()
+
+    parts = line.split()
+    if not parts:
+        return Command(kind="ls")
+
+    cmd = parts[0].lower()
+
+    if cmd == "ls":
+        return Command(kind="ls")
+    elif cmd == "cd":
+        if len(parts) >= 2 and parts[1] == "..":
+            return Command(kind="cd_up")
+        target = _strip_quotes(" ".join(parts[1:])) if len(parts) > 1 else ""
+        return Command(kind="cd", target=target)
+    elif cmd == "cat":
+        target = _strip_quotes(" ".join(parts[1:])) if len(parts) > 1 else "."
+        return Command(kind="cat", target=target)
+    elif cmd == "find":
+        keyword = _strip_quotes(" ".join(parts[1:])) if len(parts) > 1 else ""
+        return Command(kind="find", target=keyword)
+    elif cmd == "findtree":
+        pattern = _strip_quotes(" ".join(parts[1:])) if len(parts) > 1 else ""
+        return Command(kind="findtree", target=pattern)
+    elif cmd == "grep":
+        pattern = _strip_quotes(" ".join(parts[1:])) if len(parts) > 1 else ""
+        return Command(kind="grep", target=pattern)
+    elif cmd == "head":
+        if len(parts) >= 4 and parts[1] == "-n":
+            target = _strip_quotes(" ".join(parts[3:]))
+            try:
+                n = int(parts[2])
+            except ValueError:
+                n = 20
+            return Command(kind="head", target=target, lines=n)
+        target = _strip_quotes(" ".join(parts[1:])) if len(parts) > 1 else ""
+        return Command(kind="head", target=target)
+    elif cmd == "wc":
+        target = _strip_quotes(" ".join(parts[1:])) if len(parts) > 1 else ""
+        return Command(kind="wc", target=target)
+    elif cmd == "pwd":
+        return Command(kind="pwd")
+    elif cmd == "check":
+        return Command(kind="check")
+    elif cmd == "done":
+        return Command(kind="done")
+    elif cmd == "back":
+        return Command(kind="back")
+    elif cmd == "toc":
+        if len(parts) > 1:
+            try:
+                return Command(kind="toc", lines=int(parts[1]))
+            except ValueError:
+                pass
+        return Command(kind="toc", lines=0)  # 0 = no depth limit
+    elif cmd == "stats":
+        target = _strip_quotes(" ".join(parts[1:])) if len(parts) > 1 else ""
+        return Command(kind="stats", target=target)
+    elif cmd == "grep_node":
+        # grep_node <target> <pattern>
+        if len(parts) >= 3:
+            return Command(kind="grep_node", target=parts[1], target_b=_strip_quotes(" ".join(parts[2:])))
+        elif len(parts) == 2:
+            return Command(kind="grep_node", target=parts[1])
+        return Command(kind="grep_node")
+    elif cmd == "similar":
+        target = _strip_quotes(" ".join(parts[1:])) if len(parts) > 1 else ""
+        return Command(kind="similar", target=target)
+    elif cmd in ("section_overview", "overview"):
+        target = _strip_quotes(" ".join(parts[1:])) if len(parts) > 1 else ""
+        return Command(kind="section_overview", target=target)
+    elif cmd == "compare":
+        # compare <node_a> <node_b> — use node IDs for reliability
+        if len(parts) >= 3:
+            return Command(kind="compare", target=parts[1], target_b=parts[2])
+        elif len(parts) == 2:
+            return Command(kind="compare", target=parts[1])
+        return Command(kind="compare")
+    elif cmd == "trace":
+        target = _strip_quotes(" ".join(parts[1:])) if len(parts) > 1 else ""
+        return Command(kind="trace", target=target)
+    elif cmd == "summarize":
+        target = _strip_quotes(" ".join(parts[1:])) if len(parts) > 1 else ""
+        return Command(kind="summarize", target=target)
+    elif cmd == "siblings":
+        target = _strip_quotes(" ".join(parts[1:])) if len(parts) > 1 else ""
+        return Command(kind="siblings", target=target)
+    elif cmd == "ancestors":
+        target = _strip_quotes(" ".join(parts[1:])) if len(parts) > 1 else ""
+        return Command(kind="ancestors", target=target)
+    elif cmd in ("doc_card", "card"):
+        return Command(kind="doc_card")
+    elif cmd == "concepts":
+        return Command(kind="concepts")
+    elif cmd == "find_section":
+        target = _strip_quotes(" ".join(parts[1:])) if len(parts) > 1 else ""
+        return Command(kind="find_section", target=target)
+    else:
+        return Command(kind="ls")  # fallback: re-observe
+
+
+def _is_parse_failure(command: Command, raw_output: str) -> bool:
+    """Detect if the parsed command is a fallback (unrecognized input)."""
+    trimmed = raw_output.strip()
+    return command.kind == "ls" and not trimmed.startswith("ls") and trimmed != ""
+
+
+# ---------------------------------------------------------------------------
+# Step result
+# ---------------------------------------------------------------------------
+
+@dataclass
+class Step:
+    """Result of a single command execution."""
+    kind: str  # continue, done, force_done
+    reason: str = ""
+
+
+# ---------------------------------------------------------------------------
+# Worker helpers
+# ---------------------------------------------------------------------------
+
+async def _visited_titles(state: WorkerState, doc: Any) -> str:
+    """Format visited node titles for prompt context."""
+    titles = []
+    for node_id in state.visited:
+        try:
+            title = await doc.node_title(node_id)
+            if title:
+                titles.append(title)
+        except Exception:
+            pass
+    return ", ".join(titles) if titles else "(none)"
+
+
+async def _resolve_target(doc: Any, target: str, state: WorkerState) -> str | None:
+    """Resolve a command target (node ID, child title, or empty for current) to a node ID."""
+    if not target or target == ".":
+        return await doc.current_id()
+    if re.match(r"^n\d+$", target):
+        return target
+    children = await doc.ls()
+    for child in children:
+        if child.title.lower() == target.lower():
+            return child.id
+    for child in children:
+        if target.lower() in child.title.lower():
+            return child.id
+    return None
+
+
+# ---------------------------------------------------------------------------
+# Command execution
+# ---------------------------------------------------------------------------
+
+async def _execute_command(
+    command: Command,
+    doc: Any,
+    state: WorkerState,
+    query: str,
+    llm: LLMClient,
+) -> Step:
+    """Execute a parsed command against the PyDocument. Returns Step result."""
+    kind = command.kind
+
+    if kind == "ls":
+        children = await doc.ls()
+        if not children:
+            state.last_feedback = "(no navigation data)"
+        else:
+            lines = []
+            for i, child in enumerate(children, 1):
+                hints = getattr(child, "question_hints", [])
+                tags = getattr(child, "topic_tags", [])
+                annotations = []
+                if hints:
+                    for h in hints[:2]:
+                        annotations.append(f'question "{h}"')
+                if tags:
+                    for t in tags[:2]:
+                        annotations.append(f'topic "{t}"')
+                ann_str = f", {', '.join(annotations)}" if annotations else ""
+                lines.append(
+                    f"[{i}] {child.title} — "
+                    f"(depth {child.depth}, {child.leaf_count} leaves{ann_str})"
+                )
+            state.last_feedback = "\n".join(lines)
+        state.visited.add(await doc.current_id())
+        return Step(kind="continue")
+
+    elif kind == "cd":
+        target = command.target
+        if not target:
+            state.last_feedback = "Usage: cd <name>"
+            return Step(kind="continue")
+
+        # Try cd by node id first (if target looks like n42)
+        if re.match(r"^n\d+$", target):
+            try:
+                await doc.cd(target)
+                title = await doc.node_title(target)
+                state.breadcrumb.append(title)
+                current = await doc.current_id()
+                state.visited.add(current)
+                state.last_feedback = f"Entered '{title}'"
+                return Step(kind="continue")
+            except Exception:
+                pass
+
+        # Try cd_by_title
+        try:
+            await doc.cd_by_title(target)
+            current = await doc.current_id()
+            title = await doc.node_title(current)
+            state.breadcrumb.append(title)
+            state.visited.add(current)
+            state.last_feedback = f"Entered '{title}'"
+            return Step(kind="continue")
+        except Exception:
+            state.last_feedback = f"Node '{target}' not found. Use ls to list children."
+            return Step(kind="continue")
+
+    elif kind == "cd_up":
+        try:
+            await doc.cd_up()
+            if len(state.breadcrumb) > 1:
+                state.breadcrumb.pop()
+            state.last_feedback = f"Current position: /{state.path_str()}"
+        except Exception as e:
+            state.last_feedback = f"Cannot go up: {e}"
+        return Step(kind="continue")
+
+    elif kind == "cat":
+        target = command.target
+        node_id = None
+
+        if target == "." or target == "":
+            node_id = await doc.current_id()
+        elif re.match(r"^n\d+$", target):
+            node_id = target
+        else:
+            # Try to find by title among children
+            children = await doc.ls()
+            for child in children:
+                if child.title.lower() == target.lower():
+                    node_id = child.id
+                    break
+                if target.lower() in child.title.lower():
+                    node_id = child.id
+                    break
+            if node_id is None:
+                # Try find
+                results = await doc.find(target)
+                if results:
+                    node_id = results[0].node_id
+
+        if node_id is None:
+            state.last_feedback = f"Node '{target}' not found."
+            return Step(kind="continue")
+
+        if node_id in state.collected_nodes:
+            state.last_feedback = f"Already collected evidence from '{target}'. Use done if sufficient."
+            return Step(kind="continue")
+
+        try:
+            content = await doc.cat(node_id)
+            title = await doc.node_title(node_id)
+            pwd = await doc.pwd()
+
+            evidence = Evidence(
+                source_path=pwd,
+                node_title=title,
+                content=content,
+            )
+            state.evidence.append(evidence)
+            state.collected_nodes.add(node_id)
+            state.visited.add(node_id)
+
+            preview = content[:500] + "..." if len(content) > 500 else content
+            state.last_feedback = f"[{title}] collected as evidence:\n{preview}"
+            return Step(kind="continue")
+        except Exception as e:
+            state.last_feedback = f"Error reading node: {e}"
+            return Step(kind="continue")
+
+    elif kind == "find":
+        keyword = command.target
+        if not keyword:
+            state.last_feedback = "Usage: find <keyword>"
+            return Step(kind="continue")
+
+        try:
+            results = await doc.find(keyword)
+        except Exception:
+            results = []
+
+        if not results:
+            # Fallback: try keyword_entries for reasoning index hits
+            try:
+                entries = await doc.keyword_entries(keyword)
+                if entries:
+                    lines = [f"Results for '{keyword}':"]
+                    for entry in entries:
+                        title = await doc.node_title(entry.node_id)
+                        lines.append(
+                            f"  - {title} (depth {entry.depth}, weight {entry.weight:.2f})"
+                        )
+                    state.last_feedback = "\n".join(lines)
+                    return Step(kind="continue")
+            except Exception:
+                pass
+            state.last_feedback = f"No results for '{keyword}'."
+            return Step(kind="continue")
+
+        lines = [f"Results for '{keyword}':"]
+        for r in results[:10]:
+            lines.append(f"  - {r.title} (depth {r.depth}, {r.leaf_count} leaves)")
+        state.last_feedback = "\n".join(lines)
+        return Step(kind="continue")
+
+    elif kind == "findtree":
+        pattern = command.target
+        if not pattern:
+            state.last_feedback = "Usage: findtree <pattern>"
+            return Step(kind="continue")
+
+        try:
+            results = await doc.find(pattern)
+        except Exception:
+            results = []
+
+        if not results:
+            state.last_feedback = f"No nodes matching '{pattern}' in titles."
+            return Step(kind="continue")
+
+        lines = [f"Nodes matching '{pattern}':"]
+        for r in results[:15]:
+            lines.append(f"  - {r.title} (depth {r.depth})")
+        state.last_feedback = "\n".join(lines)
+        return Step(kind="continue")
+
+    elif kind == "grep":
+        pattern = command.target
+        if not pattern:
+            state.last_feedback = "Usage: grep <pattern>"
+            return Step(kind="continue")
+        try:
+            matches = await doc.grep(pattern)
+        except Exception as e:
+            state.last_feedback = f"grep error: {e}"
+            return Step(kind="continue")
+
+        if not matches:
+            state.last_feedback = f"No matches for /{pattern}/."
+            return Step(kind="continue")
+
+        lines = [f"Matches for /{pattern}/:"]
+        for m in matches[:15]:
+            lines.append(f"  - {m.title} (line {m.line_number}): {m.snippet[:100]}")
+        state.last_feedback = "\n".join(lines)
+        return Step(kind="continue")
+
+    elif kind == "head":
+        target = command.target
+        n = command.lines
+        node_id = None
+
+        if re.match(r"^n\d+$", target):
+            node_id = target
+        else:
+            children = await doc.ls()
+            for child in children:
+                if child.title.lower() == target.lower():
+                    node_id = child.id
+                    break
+
+        if node_id is None:
+            state.last_feedback = f"Node '{target}' not found."
+            return Step(kind="continue")
+
+        try:
+            content = await doc.head(node_id, n)
+            state.last_feedback = content
+        except Exception as e:
+            state.last_feedback = f"head error: {e}"
+        return Step(kind="continue")
+
+    elif kind == "wc":
+        target = command.target
+        node_id = None
+
+        if not target:
+            node_id = await doc.current_id()
+        elif re.match(r"^n\d+$", target):
+            node_id = target
+        else:
+            children = await doc.ls()
+            for child in children:
+                if child.title.lower() == target.lower():
+                    node_id = child.id
+                    break
+
+        if node_id is None:
+            state.last_feedback = f"Node '{target}' not found."
+            return Step(kind="continue")
+
+        try:
+            wc = await doc.wc(node_id)
+            state.last_feedback = f"{wc.lines} lines, {wc.words} words, {wc.chars} chars"
+        except Exception as e:
+            state.last_feedback = f"wc error: {e}"
+        return Step(kind="continue")
+
+    elif kind == "pwd":
+        try:
+            pwd = await doc.pwd()
+            state.last_feedback = f"/{pwd}"
+        except Exception as e:
+            state.last_feedback = f"pwd error: {e}"
+        return Step(kind="continue")
+
+    elif kind == "back":
+        try:
+            await doc.back()
+            pwd = await doc.pwd()
+            state.breadcrumb = [p for p in pwd.split("/") if p]
+            state.last_feedback = f"Current position: /{state.path_str()}"
+        except Exception as e:
+            state.last_feedback = f"Cannot go back: {e}"
+        return Step(kind="continue")
+
+    elif kind == "toc":
+        try:
+            if command.lines > 0:
+                entries = await doc.toc(command.lines)
+            else:
+                entries = await doc.toc()
+            if not entries:
+                state.last_feedback = "(empty table of contents)"
+            else:
+                lines = ["Table of contents:"]
+                for entry in entries:
+                    indent = "  " * entry.depth
+                    children = f" ({entry.child_count} children)" if entry.child_count > 0 else ""
+                    lines.append(f"{indent}- {entry.title}{children}")
+                state.last_feedback = "\n".join(lines)
+        except Exception as e:
+            state.last_feedback = f"toc error: {e}"
+        return Step(kind="continue")
+
+    elif kind == "stats":
+        node_id = await _resolve_target(doc, command.target, state)
+        if node_id is None:
+            state.last_feedback = f"Node '{command.target}' not found."
+            return Step(kind="continue")
+        try:
+            s = await doc.stats(node_id)
+            leaf = " (leaf)" if s.is_leaf else ""
+            state.last_feedback = (
+                f"[{s.title}] depth={s.depth}, children={s.child_count}, "
+                f"leaves={s.leaf_count}, chars={s.char_count}, words={s.word_count}{leaf}"
+            )
+        except Exception as e:
+            state.last_feedback = f"stats error: {e}"
+        return Step(kind="continue")
+
+    elif kind == "grep_node":
+        target = command.target
+        pattern = command.target_b
+        if not target or not pattern:
+            state.last_feedback = "Usage: grep_node <node> <pattern>"
+            return Step(kind="continue")
+        node_id = await _resolve_target(doc, target, state)
+        if node_id is None:
+            state.last_feedback = f"Node '{target}' not found."
+            return Step(kind="continue")
+        try:
+            matches = await doc.grep_node(node_id, pattern)
+            if not matches:
+                state.last_feedback = f"No matches for /{pattern}/ in this node."
+            else:
+                lines = [f"Matches for /{pattern}/:"]
+                for m in matches[:15]:
+                    lines.append(f"  - line {m.line_number}: {m.snippet[:100]}")
+                state.last_feedback = "\n".join(lines)
+        except Exception as e:
+            state.last_feedback = f"grep_node error: {e}"
+        return Step(kind="continue")
+
+    elif kind == "similar":
+        node_id = await _resolve_target(doc, command.target, state)
+        if node_id is None:
+            state.last_feedback = f"Node '{command.target}' not found."
+            return Step(kind="continue")
+        try:
+            results = await doc.similar(node_id)
+            if not results:
+                state.last_feedback = "No similar nodes found."
+            else:
+                lines = ["Similar nodes:"]
+                for r in results[:10]:
+                    kw = ", ".join(r.shared_keywords[:3])
+                    lines.append(f"  - {r.title} (relevance: {r.relevance:.2f}, shared: {kw})")
+                state.last_feedback = "\n".join(lines)
+        except Exception as e:
+            state.last_feedback = f"similar error: {e}"
+        return Step(kind="continue")
+
+    elif kind == "section_overview":
+        node_id = await _resolve_target(doc, command.target, state)
+        if node_id is None:
+            state.last_feedback = f"Node '{command.target}' not found."
+            return Step(kind="continue")
+        try:
+            overview = await doc.section_overview(node_id)
+            state.last_feedback = overview if overview else "(no overview available)"
+        except Exception as e:
+            state.last_feedback = f"overview error: {e}"
+        return Step(kind="continue")
+
+    elif kind == "compare":
+        target_a = command.target
+        target_b = command.target_b
+        if not target_a or not target_b:
+            state.last_feedback = "Usage: compare <node_a> <node_b>"
+            return Step(kind="continue")
+        node_a = await _resolve_target(doc, target_a, state)
+        node_b = await _resolve_target(doc, target_b, state)
+        if node_a is None:
+            state.last_feedback = f"Node '{target_a}' not found."
+            return Step(kind="continue")
+        if node_b is None:
+            state.last_feedback = f"Node '{target_b}' not found."
+            return Step(kind="continue")
+        try:
+            content_a = await doc.cat(node_a)
+            title_a = await doc.node_title(node_a)
+            content_b = await doc.cat(node_b)
+            title_b = await doc.node_title(node_b)
+            if node_a not in state.collected_nodes:
+                pwd_a = await doc.pwd()
+                state.evidence.append(Evidence(
+                    source_path=pwd_a, node_title=title_a, content=content_a,
+                ))
+                state.collected_nodes.add(node_a)
+            if node_b not in state.collected_nodes:
+                pwd_b = await doc.pwd()
+                state.evidence.append(Evidence(
+                    source_path=pwd_b, node_title=title_b, content=content_b,
+                ))
+                state.collected_nodes.add(node_b)
+            result = await compare_nodes(title_a, content_a, title_b, content_b, llm, query=query)
+            state.llm_calls += 1
+            state.last_feedback = f"Comparison of [{title_a}] vs [{title_b}]:\n{result}"
+        except Exception as e:
+            state.last_feedback = f"compare error: {e}"
+        return Step(kind="continue")
+
+    elif kind == "trace":
+        node_id = await _resolve_target(doc, command.target, state)
+        if node_id is None:
+            state.last_feedback = f"Node '{command.target}' not found."
+            return Step(kind="continue")
+        try:
+            content = await doc.cat(node_id)
+            title = await doc.node_title(node_id)
+            if node_id not in state.collected_nodes:
+                pwd = await doc.pwd()
+                state.evidence.append(Evidence(
+                    source_path=pwd, node_title=title, content=content,
+                ))
+                state.collected_nodes.add(node_id)
+            related_context = ""
+            try:
+                similar = await doc.similar(node_id)
+                if similar:
+                    related_lines = [f"  - {s.title} (relevance: {s.relevance:.2f})" for s in similar[:5]]
+                    related_context = "\nRelated sections:\n" + "\n".join(related_lines)
+            except Exception:
+                pass
+            result = await trace_reasoning(title, content, related_context, llm, query=query)
+            state.llm_calls += 1
+            state.last_feedback = f"Reasoning trace for [{title}]:\n{result}"
+        except Exception as e:
+            state.last_feedback = f"trace error: {e}"
+        return Step(kind="continue")
+
+    elif kind == "summarize":
+        node_id = await _resolve_target(doc, command.target, state)
+        if node_id is None:
+            state.last_feedback = f"Node '{command.target}' not found."
+            return Step(kind="continue")
+        try:
+            content = await doc.cat(node_id)
+            title = await doc.node_title(node_id)
+            if node_id not in state.collected_nodes:
+                pwd = await doc.pwd()
+                state.evidence.append(Evidence(
+                    source_path=pwd, node_title=title, content=content,
+                ))
+                state.collected_nodes.add(node_id)
+            result = await summarize_section(title, content, llm, query=query)
+            state.llm_calls += 1
+            state.last_feedback = f"Summary of [{title}]:\n{result}"
+        except Exception as e:
+            state.last_feedback = f"summarize error: {e}"
+        return Step(kind="continue")
+
+    elif kind == "siblings":
+        node_id = await _resolve_target(doc, command.target, state)
+        if node_id is None:
+            state.last_feedback = f"Node '{command.target}' not found."
+            return Step(kind="continue")
+        try:
+            siblings = await doc.siblings(node_id)
+            if not siblings:
+                state.last_feedback = "(no sibling nodes)"
+            else:
+                lines = ["Sibling nodes:"]
+                for s in siblings:
+                    lines.append(
+                        f"  - {s.title} (depth {s.depth}, {s.leaf_count} leaves)"
+                    )
+                state.last_feedback = "\n".join(lines)
+        except Exception as e:
+            state.last_feedback = f"siblings error: {e}"
+        return Step(kind="continue")
+
+    elif kind == "ancestors":
+        node_id = await _resolve_target(doc, command.target, state)
+        if node_id is None:
+            state.last_feedback = f"Node '{command.target}' not found."
+            return Step(kind="continue")
+        try:
+            ancestors = await doc.ancestors(node_id)
+            if not ancestors:
+                state.last_feedback = "(at root, no ancestors)"
+            else:
+                lines = ["Path from root:"]
+                for a in ancestors:
+                    lines.append(
+                        f"  {'  ' * a.depth}→ {a.title} (depth {a.depth}, {a.child_count} children)"
+                    )
+                state.last_feedback = "\n".join(lines)
+        except Exception as e:
+            state.last_feedback = f"ancestors error: {e}"
+        return Step(kind="continue")
+
+    elif kind == "doc_card":
+        try:
+            card = await doc.doc_card()
+            if card is None:
+                state.last_feedback = "(no document card available)"
+            else:
+                lines = [
+                    f"Document: {card.title}",
+                    f"Overview: {card.overview}",
+                    f"Total leaves: {card.total_leaves}",
+                ]
+                if card.question_hints:
+                    lines.append(f"Can answer: {', '.join(card.question_hints[:5])}")
+                if card.topic_tags:
+                    lines.append(f"Topics: {', '.join(card.topic_tags[:5])}")
+                if card.sections:
+                    lines.append("Top-level sections:")
+                    for s in card.sections:
+                        lines.append(f"  - {s.title}: {s.description} ({s.leaf_count} leaves)")
+                state.last_feedback = "\n".join(lines)
+        except Exception as e:
+            state.last_feedback = f"doc_card error: {e}"
+        return Step(kind="continue")
+
+    elif kind == "concepts":
+        try:
+            concepts = await doc.concepts()
+            if not concepts:
+                state.last_feedback = "(no concepts extracted)"
+            else:
+                lines = ["Key concepts:"]
+                for c in concepts:
+                    sections = ", ".join(c.sections[:3])
+                    lines.append(f"  - {c.name}: {c.summary} (in: {sections})")
+                state.last_feedback = "\n".join(lines)
+        except Exception as e:
+            state.last_feedback = f"concepts error: {e}"
+        return Step(kind="continue")
+
+    elif kind == "find_section":
+        title = command.target
+        if not title:
+            state.last_feedback = "Usage: find_section <title>"
+            return Step(kind="continue")
+        try:
+            result = await doc.find_section(title)
+            if result is None:
+                state.last_feedback = f"No section with title '{title}'."
+            else:
+                state.last_feedback = (
+                    f"Found: {result.title} (id={result.node_id}, "
+                    f"depth {result.depth}, {result.leaf_count} leaves)"
+                )
+        except Exception as e:
+            state.last_feedback = f"find_section error: {e}"
+        return Step(kind="continue")
+
+    elif kind == "check":
+        evidence_text = state.evidence_for_check()
+        system, user = check_sufficiency(query, evidence_text)
+
+        try:
+            response = await llm.complete(system, user)
+        except Exception as e:
+            logger.warning("Check LLM call failed: %s", e)
+            state.last_feedback = "Could not evaluate sufficiency."
+            return Step(kind="continue")
+
+        state.llm_calls += 1
+        state.check_count += 1
+        sufficient = parse_sufficiency_response(response)
+
+        if sufficient:
+            state.last_feedback = "Evidence is sufficient. Use done to finish."
+            return Step(kind="done")
+        else:
+            # Extract missing info
+            reason = response.strip()
+            for prefix in ("INSUFFICIENT", "Insufficient"):
+                if reason.startswith(prefix):
+                    reason = reason[len(prefix):]
+                    break
+            reason = reason.lstrip("-: ")
+            if reason:
+                state.missing_info = reason
+            state.last_feedback = f"Evidence not yet sufficient: {response.strip()}"
+            return Step(kind="continue")
+
+    elif kind == "done":
+        state.last_feedback = "Navigation complete."
+        return Step(kind="done")
+
+    else:
+        state.last_feedback = f"Unknown command: {kind}"
+        return Step(kind="continue")
+
+
+# ---------------------------------------------------------------------------
+# Worker
+# ---------------------------------------------------------------------------
+
+class Worker:
+    """Navigates a single document to collect evidence for a query.
+
+    Usage::
+
+        worker = Worker(document=doc, query="What is the revenue?", llm_client=llm)
+        result = await worker.run()
+    """
+
+    def __init__(
+        self,
+        document: Any,
+        query: str,
+        llm_client: LLMClient,
+        *,
+        max_rounds: int = 15,
+        max_llm_calls: int = 0,
+        task: str | None = None,
+        intent_context: str = "",
+    ) -> None:
+        self._doc = document
+        self._query = query
+        self._llm = llm_client
+        self._max_rounds = max_rounds
+        self._max_llm_calls = max_llm_calls
+        self._task = task
+        self._intent_context = intent_context
+
+    async def run(self) -> WorkerOutput:
+        """Execute the Worker navigation loop and return collected evidence."""
+        doc = self._doc
+        query = self._query
+        llm = self._llm
+        task = self._task
+        max_rounds = self._max_rounds
+        max_llm = self._max_llm_calls
+        intent_context = self._intent_context
+
+        state = WorkerState(remaining=max_rounds, max_rounds=max_rounds)
+
+        # Phase 0: initial ls to observe environment
+        root_id = await doc.root_id()
+        state.visited.add(root_id)
+
+        try:
+            children = await doc.ls()
+            if children:
+                lines = []
+                for i, child in enumerate(children, 1):
+                    lines.append(
+                        f"[{i}] {child.title} — "
+                        f"(depth {child.depth}, {child.leaf_count} leaves)"
+                    )
+                state.last_feedback = "\n".join(lines)
+            else:
+                state.last_feedback = "(no children at root)"
+        except Exception as e:
+            state.last_feedback = f"Initial ls failed: {e}"
+
+        # Phase 1.5: optional navigation planning
+        keyword_hints = ""
+        try:
+            keyword_hints = await self._build_keyword_hints(doc, query)
+        except Exception:
+            pass
+
+        if keyword_hints:
+            await self._generate_plan(doc, query, task, state, keyword_hints, llm)
+
+        # Phase 2: main navigation loop
+        use_dispatch = task is not None
+
+        while state.remaining > 0:
+            if max_llm > 0 and state.llm_calls >= max_llm:
+                logger.info("LLM call budget exhausted (%d/%d)", state.llm_calls, max_llm)
+                break
+
+            # Build prompt
+            if use_dispatch and state.remaining == max_rounds:
+                system, user = worker_dispatch(WorkerDispatchParams(
+                    original_query=query,
+                    task=task or query,
+                    doc_name=await doc.doc_name(),
+                    breadcrumb=state.path_str(),
+                ))
+            else:
+                visited_titles = await _visited_titles(state, doc)
+                system, user = worker_navigation(NavigationParams(
+                    query=query,
+                    task=task,
+                    breadcrumb=state.path_str(),
+                    evidence_summary=state.evidence_summary(),
+                    missing_info=state.missing_info,
+                    last_feedback=state.last_feedback,
+                    remaining=state.remaining,
+                    max_rounds=state.max_rounds,
+                    history=state.history_text(),
+                    visited_titles=visited_titles,
+                    plan=state.plan,
+                    intent_context=intent_context,
+                    keyword_hints=keyword_hints,
+                ))
+
+            # LLM decision
+            round_num = max_rounds - state.remaining + 1
+            try:
+                llm_output = await llm.complete(system, user)
+            except Exception as e:
+                logger.error("LLM call failed at round %d: %s", round_num, e)
+                break
+            state.llm_calls += 1
+
+            # Parse command
+            command = parse_command(llm_output)
+            is_failure = _is_parse_failure(command, llm_output)
+
+            if is_failure:
+                raw_preview = llm_output.strip()[:200]
+                if len(llm_output.strip()) > 200:
+                    raw_preview += "..."
+                state.last_feedback = (
+                    f"Your output was not recognized as a valid command:\n"
+                    f'"{raw_preview}"\n\n'
+                    f"Please output exactly one command "
+                    f"(ls, cd, cat, head, find, grep, toc, stats, similar, overview, "
+                    f"siblings, ancestors, doc_card, concepts, find_section, "
+                    f"compare, trace, summarize, wc, pwd, check, or done)."
+                )
+                state.push_history("(unrecognized) → parse failure")
+                continue
+
+            is_check = command.kind == "check"
+
+            # Execute
+            step = await _execute_command(command, doc, state, query, llm)
+
+            # Re-plan after insufficient check
+            if is_check:
+                await self._handle_replan(query, task, doc, state, llm, max_llm)
+
+            # Record history and trace
+            cmd_str = command.kind
+            if command.target:
+                cmd_str += f" {command.target}"
+
+            feedback_preview = state.last_feedback
+            if len(feedback_preview) > 120:
+                feedback_preview = feedback_preview[:120] + "..."
+            state.push_history(f"{cmd_str} → {feedback_preview}")
+
+            round_num_done = max_rounds - state.remaining
+            state.trace_steps.append(TraceStep(
+                action=cmd_str,
+                observation=state.last_feedback[:200],
+                round=round_num_done,
+            ))
+
+            # Check termination
+            if step.kind == "done":
+                break
+            elif step.kind == "force_done":
+                break
+            else:
+                if not is_check:
+                    state.remaining -= 1
+
+        doc_name = ""
+        try:
+            doc_name = await doc.doc_name()
+        except Exception:
+            pass
+
+        return state.into_worker_output(doc_name)
+
+    async def _build_keyword_hints(self, doc: Any, query: str) -> str:
+        """Build keyword hints from the document's reasoning index."""
+        # Extract simple keywords from the query
+        stop_words = {
+            "what", "is", "the", "a", "an", "how", "does", "do", "are",
+            "in", "on", "at", "to", "for", "of", "with", "and", "or",
+            "this", "that", "it", "from", "by", "was", "were", "be",
+        }
+        words = re.findall(r"\b\w+\b", query.lower())
+        keywords = [w for w in words if w not in stop_words and len(w) > 2]
+
+        if not keywords:
+            return ""
+
+        hints = []
+        for kw in keywords[:5]:  # limit keywords
+            try:
+                entries = await doc.keyword_entries(kw)
+                for entry in entries[:3]:
+                    title = await doc.node_title(entry.node_id)
+                    hints.append(
+                        f"  - '{kw}' → {title} (weight {entry.weight:.2f})"
+                    )
+            except Exception:
+                pass
+
+        if not hints:
+            return ""
+
+        return "Keyword matches (use find <keyword> to jump directly):\n" + "\n".join(hints) + "\n"
+
+    async def _generate_plan(
+        self,
+        doc: Any,
+        query: str,
+        task: str | None,
+        state: WorkerState,
+        keyword_hints: str,
+        llm: LLMClient,
+    ) -> None:
+        """Phase 1.5: generate a navigation plan from keyword hints."""
+        ls_output = state.last_feedback
+        doc_name = await doc.doc_name()
+
+        system, user = build_plan_prompt(
+            query=query,
+            ls_output=ls_output,
+            doc_name=doc_name,
+            keyword_hints_section=f"\n{keyword_hints}" if keyword_hints else "",
+            task=task,
+        )
+
+        try:
+            plan = await llm.complete(system, user)
+            state.llm_calls += 1
+            plan_text = plan.strip()
+            if plan_text:
+                state.plan = plan_text
+                state.plan_generated = True
+        except Exception as e:
+            logger.warning("Plan generation failed: %s", e)
+
+    async def _handle_replan(
+        self,
+        query: str,
+        task: str | None,
+        doc: Any,
+        state: WorkerState,
+        llm: LLMClient,
+        max_llm: int,
+    ) -> None:
+        """Dynamic re-planning after an insufficient check."""
+        if not state.missing_info:
+            return
+
+        if state.remaining < 3:
+            state.plan = ""
+            state.missing_info = ""
+            return
+
+        if max_llm > 0 and state.llm_calls >= max_llm:
+            state.plan = ""
+            state.missing_info = ""
+            return
+
+        # Build sibling hints
+        sibling_hints = ""
+        current_children = "Current position is a leaf node — consider cd .. to go back.\n"
+
+        try:
+            children = await doc.ls()
+            if children:
+                items = [f"  - {c.title} ({c.leaf_count} leaves)" for c in children]
+                current_children = f"Children at current position:\n" + "\n".join(items) + "\n"
+        except Exception:
+            pass
+
+        system, user = build_replan_prompt(
+            query=query,
+            task=task,
+            path_str=state.path_str(),
+            evidence_summary=state.evidence_summary(),
+            missing_info=state.missing_info,
+            visited_titles=await _visited_titles(state, doc),
+            current_children=current_children,
+            sibling_hints=sibling_hints,
+            remaining=state.remaining,
+            max_rounds=state.max_rounds,
+        )
+
+        try:
+            new_plan = await llm.complete(system, user)
+            state.llm_calls += 1
+            plan_text = new_plan.strip()
+            if plan_text:
+                logger.info("Re-plan generated: %s", plan_text[:200])
+                state.plan = plan_text
+        except Exception as e:
+            logger.warning("Re-plan LLM call failed: %s", e)
+
+        state.missing_info = ""
diff --git a/vectorless/cli/commands/add.py b/vectorless/cli/commands/add.py
index 79368d09..e52bcd31 100644
--- a/vectorless/cli/commands/add.py
+++ b/vectorless/cli/commands/add.py
@@ -1,4 +1,4 @@
-"""add command — index documents (maps to engine.index)."""
+"""add command — compile documents (maps to engine.compile)."""
 
 import asyncio
 import os
@@ -11,21 +11,21 @@
 from vectorless.cli.output import format_json
 
 
-def _create_session(workspace_dir: str):
-    """Create a Session from workspace config.
+def _create_engine(workspace_dir: str):
+    """Create an Engine from workspace config.
 
     Args:
         workspace_dir: Path to .vectorless/ directory.
 
     Returns:
-        Configured Session instance.
+        Configured Engine instance.
     """
-    from vectorless.session import Session
+    from vectorless.engine import Engine
 
     config_path = os.path.join(workspace_dir, "config.toml")
     if os.path.exists(config_path):
-        return Session.from_config_file(config_path)
-    return Session.from_env()
+        return Engine.from_config_file(config_path)
+    return Engine.from_env()
 
 
 def add_cmd(
@@ -37,27 +37,22 @@ def add_cmd(
     jobs: int = 1,
     verbose: bool = False,
 ) -> None:
-    """Index a document or directory.
+    """Compile a document or directory.
 
     Args:
         path: File or directory path.
-        recursive: Index directory recursively.
+        recursive: Compile directory recursively.
         fmt: Force format ("markdown" | "pdf" | None for auto-detect).
-        force: Force re-index existing documents.
-        jobs: Number of parallel indexing jobs.
+        force: Force re-compile existing documents.
+        jobs: Number of parallel compile jobs.
         verbose: Show detailed progress.
-
-    Uses:
-        Engine.index(IndexContext)
-        IndexContext.from_path / from_paths / from_dir
-        IndexOptions(mode="force" if force else "default")
     """
     workspace = get_workspace_path()
 
     try:
-        session = _create_session(workspace)
+        session = _create_engine(workspace)
     except Exception as e:
-        raise click.ClickException(f"Failed to create session: {e}") from e
+        raise click.ClickException(f"Failed to create engine: {e}") from e
 
     target = Path(path).resolve()
     format_hint = fmt or "markdown"
@@ -86,16 +81,16 @@ async def _run():
                 )
 
             if verbose:
-                click.echo(f"Found {len(file_paths)} document(s) to index")
+                click.echo(f"Found {len(file_paths)} document(s) to compile")
 
-            results = await session.index_batch(
+            results = await session.compile_batch(
                 file_paths, mode="force" if force else "default", jobs=jobs
             )
 
             succeeded = [r for r in results if not r.has_failures()]
             failed = [r for r in results if r.has_failures()]
 
-            click.echo(f"Indexed {len(succeeded)}/{len(results)} document(s) successfully")
+            click.echo(f"Compiled {len(succeeded)}/{len(results)} document(s) successfully")
             if failed:
                 click.echo(f"Failed: {len(failed)} document(s)")
                 for f_result in failed:
@@ -107,18 +102,18 @@ async def _run():
                     for item in r.items:
                         click.echo(f"  {item.name} ({item.doc_id})")
         else:
-            result = await session.index(
+            result = await session.compile(
                 path=str(target),
                 format=format_hint,
                 mode="force" if force else "default",
             )
 
             if result.doc_id:
-                click.echo(f"Indexed: {result.doc_id}")
+                click.echo(f"Compiled: {result.doc_id}")
             else:
                 # Batch result from single file
                 for item in result.items:
-                    click.echo(f"Indexed: {item.name} ({item.doc_id})")
+                    click.echo(f"Compiled: {item.name} ({item.doc_id})")
 
             if result.has_failures():
                 for item in result.failed:
@@ -140,4 +135,4 @@ async def _run():
     except click.ClickException:
         raise
     except Exception as e:
-        raise click.ClickException(f"Indexing failed: {e}") from e
+        raise click.ClickException(f"Compile failed: {e}") from e
diff --git a/vectorless/cli/commands/ask.py b/vectorless/cli/commands/ask.py
index c8e35132..90c227ac 100644
--- a/vectorless/cli/commands/ask.py
+++ b/vectorless/cli/commands/ask.py
@@ -10,14 +10,14 @@
 from vectorless.cli.output import OutputFormat, format_query_result
 
 
-def _create_session(workspace_dir: str):
-    """Create a Session from workspace config."""
-    from vectorless.session import Session
+def _create_engine(workspace_dir: str):
+    """Create an Engine from workspace config."""
+    from vectorless.engine import Engine
 
     config_path = os.path.join(workspace_dir, "config.toml")
     if os.path.exists(config_path):
-        return Session.from_config_file(config_path)
-    return Session.from_env()
+        return Engine.from_config_file(config_path)
+    return Engine.from_env()
 
 
 # Module-level mutable state for the REPL
@@ -53,7 +53,7 @@ def _handle_repl_command(
 
     Args:
         line: Raw input line.
-        session: Session instance.
+        session: Engine instance.
         workspace: Workspace path.
 
     Returns:
@@ -88,7 +88,7 @@ def _handle_repl_command(
                 click.echo("No document target set (querying all documents)")
         return True
     elif cmd == ".stats":
-        click.echo(f"Session statistics:")
+        click.echo("Engine statistics:")
         click.echo(f"  Queries: {_total_queries}")
         click.echo(f"  LLM calls (from query metrics): {_total_llm_calls}")
 
@@ -145,9 +145,9 @@ def ask_cmd(*, doc_id: Optional[str] = None, verbose: bool = False) -> None:
     workspace = get_workspace_path()
 
     try:
-        session = _create_session(workspace)
+        session = _create_engine(workspace)
     except Exception as e:
-        raise click.ClickException(f"Failed to create session: {e}") from e
+        raise click.ClickException(f"Failed to create engine: {e}") from e
 
     _print_welcome()
 
@@ -185,9 +185,7 @@ async def _run():
             response = asyncio.run(_run())
 
             # Accumulate metrics
-            for item in response.items:
-                if item.metrics:
-                    _total_llm_calls += item.metrics.llm_calls
+            _total_llm_calls += response.metrics.llm_calls
 
             output = format_query_result(
                 response, fmt=OutputFormat.TEXT, verbose=_verbose
diff --git a/vectorless/cli/commands/info.py b/vectorless/cli/commands/info.py
index c6fbced3..29c106d6 100644
--- a/vectorless/cli/commands/info.py
+++ b/vectorless/cli/commands/info.py
@@ -9,14 +9,14 @@
 from vectorless.cli.output import format_json
 
 
-def _create_session(workspace_dir: str):
-    """Create a Session from workspace config."""
-    from vectorless.session import Session
+def _create_engine(workspace_dir: str):
+    """Create an Engine from workspace config."""
+    from vectorless.engine import Engine
 
     config_path = os.path.join(workspace_dir, "config.toml")
     if os.path.exists(config_path):
-        return Session.from_config_file(config_path)
-    return Session.from_env()
+        return Engine.from_config_file(config_path)
+    return Engine.from_env()
 
 
 def info_cmd(doc_id: str) -> None:
@@ -46,9 +46,9 @@ def info_cmd(doc_id: str) -> None:
     workspace = get_workspace_path()
 
     try:
-        session = _create_session(workspace)
+        session = _create_engine(workspace)
     except Exception as e:
-        raise click.ClickException(f"Failed to create session: {e}") from e
+        raise click.ClickException(f"Failed to create engine: {e}") from e
 
     async def _run():
         return await session.list_documents()
diff --git a/vectorless/cli/commands/list_cmd.py b/vectorless/cli/commands/list_cmd.py
index bf7e2301..b7771001 100644
--- a/vectorless/cli/commands/list_cmd.py
+++ b/vectorless/cli/commands/list_cmd.py
@@ -9,14 +9,14 @@
 from vectorless.cli.output import format_documents_table, format_json
 
 
-def _create_session(workspace_dir: str):
-    """Create a Session from workspace config."""
-    from vectorless.session import Session
+def _create_engine(workspace_dir: str):
+    """Create an Engine from workspace config."""
+    from vectorless.engine import Engine
 
     config_path = os.path.join(workspace_dir, "config.toml")
     if os.path.exists(config_path):
-        return Session.from_config_file(config_path)
-    return Session.from_env()
+        return Engine.from_config_file(config_path)
+    return Engine.from_env()
 
 
 def list_cmd(*, fmt: str = "table") -> None:
@@ -34,9 +34,9 @@ def list_cmd(*, fmt: str = "table") -> None:
     workspace = get_workspace_path()
 
     try:
-        session = _create_session(workspace)
+        session = _create_engine(workspace)
     except Exception as e:
-        raise click.ClickException(f"Failed to create session: {e}") from e
+        raise click.ClickException(f"Failed to create engine: {e}") from e
 
     async def _run():
         return await session.list_documents()
diff --git a/vectorless/cli/commands/query.py b/vectorless/cli/commands/query.py
index 7fe383f6..dd893e3e 100644
--- a/vectorless/cli/commands/query.py
+++ b/vectorless/cli/commands/query.py
@@ -7,24 +7,23 @@
 import click
 
 from vectorless.cli.workspace import get_workspace_path
-from vectorless.cli.output import OutputFormat, format_query_result, format_json
+from vectorless.cli.output import OutputFormat, format_query_result
 
 
-def _create_session(workspace_dir: str):
-    """Create a Session from workspace config."""
-    from vectorless.session import Session
+def _create_engine(workspace_dir: str):
+    """Create an Engine from workspace config."""
+    from vectorless.engine import Engine
 
     config_path = os.path.join(workspace_dir, "config.toml")
     if os.path.exists(config_path):
-        return Session.from_config_file(config_path)
-    return Session.from_env()
+        return Engine.from_config_file(config_path)
+    return Engine.from_env()
 
 
 def query_cmd(
     question: str,
     *,
     doc_ids: tuple[str, ...] = (),
-    workspace_scope: bool = False,
     fmt: str = "text",
     verbose: bool = False,
     timeout_secs: Optional[int] = None,
@@ -34,29 +33,21 @@ def query_cmd(
     Args:
         question: Natural-language question.
         doc_ids: Limit to specific document IDs.
-        workspace_scope: Query across all documents.
         fmt: Output format — "text" or "json".
         verbose: Show Agent navigation steps.
         timeout_secs: Per-operation timeout in seconds.
-
-    Uses:
-        Engine.query(QueryContext(question)
-            .with_doc_ids([...])  or  .with_workspace()
-            .with_timeout_secs(n))
-        -> QueryResult
     """
     workspace = get_workspace_path()
 
     try:
-        session = _create_session(workspace)
+        session = _create_engine(workspace)
     except Exception as e:
-        raise click.ClickException(f"Failed to create session: {e}") from e
+        raise click.ClickException(f"Failed to create engine: {e}") from e
 
     async def _run():
         return await session.ask(
             question,
             doc_ids=list(doc_ids) if doc_ids else None,
-            workspace_scope=workspace_scope,
             timeout_secs=timeout_secs,
         )
 
@@ -71,13 +62,11 @@ async def _run():
 
     # Show metrics in verbose mode
     if verbose:
-        for item in result.items:
-            if item.metrics:
-                m = item.metrics
-                click.echo(
-                    f"\nMetrics ({item.doc_id}): "
-                    f"LLM calls={m.llm_calls}, "
-                    f"rounds={m.rounds_used}, "
-                    f"nodes_visited={m.nodes_visited}, "
-                    f"evidence={m.evidence_count}"
-                )
+        m = result.metrics
+        click.echo(
+            f"\nMetrics: "
+            f"LLM calls={m.llm_calls}, "
+            f"rounds={m.rounds_used}, "
+            f"nodes_visited={m.nodes_visited}, "
+            f"evidence_chars={m.evidence_chars}"
+        )
diff --git a/vectorless/cli/commands/remove.py b/vectorless/cli/commands/remove.py
index 219aea05..412218f0 100644
--- a/vectorless/cli/commands/remove.py
+++ b/vectorless/cli/commands/remove.py
@@ -8,14 +8,14 @@
 from vectorless.cli.workspace import get_workspace_path
 
 
-def _create_session(workspace_dir: str):
-    """Create a Session from workspace config."""
-    from vectorless.session import Session
+def _create_engine(workspace_dir: str):
+    """Create an Engine from workspace config."""
+    from vectorless.engine import Engine
 
     config_path = os.path.join(workspace_dir, "config.toml")
     if os.path.exists(config_path):
-        return Session.from_config_file(config_path)
-    return Session.from_env()
+        return Engine.from_config_file(config_path)
+    return Engine.from_env()
 
 
 def remove_cmd(doc_id: str) -> None:
@@ -30,9 +30,9 @@ def remove_cmd(doc_id: str) -> None:
     workspace = get_workspace_path()
 
     try:
-        session = _create_session(workspace)
+        session = _create_engine(workspace)
     except Exception as e:
-        raise click.ClickException(f"Failed to create session: {e}") from e
+        raise click.ClickException(f"Failed to create engine: {e}") from e
 
     async def _run():
         return await session.remove_document(doc_id)
diff --git a/vectorless/cli/commands/stats.py b/vectorless/cli/commands/stats.py
index fa891516..35ca30fd 100644
--- a/vectorless/cli/commands/stats.py
+++ b/vectorless/cli/commands/stats.py
@@ -9,14 +9,14 @@
 from vectorless.cli.workspace import get_workspace_path, get_data_dir, get_cache_dir
 
 
-def _create_session(workspace_dir: str):
-    """Create a Session from workspace config."""
-    from vectorless.session import Session
+def _create_engine(workspace_dir: str):
+    """Create an Engine from workspace config."""
+    from vectorless.engine import Engine
 
     config_path = os.path.join(workspace_dir, "config.toml")
     if os.path.exists(config_path):
-        return Session.from_config_file(config_path)
-    return Session.from_env()
+        return Engine.from_config_file(config_path)
+    return Engine.from_env()
 
 
 def _dir_size(path: str) -> int:
@@ -59,9 +59,9 @@ def stats_cmd() -> None:
     workspace = get_workspace_path()
 
     try:
-        session = _create_session(workspace)
+        session = _create_engine(workspace)
     except Exception as e:
-        raise click.ClickException(f"Failed to create session: {e}") from e
+        raise click.ClickException(f"Failed to create engine: {e}") from e
 
     async def _run():
         documents = await session.list_documents()
diff --git a/vectorless/cli/commands/tree.py b/vectorless/cli/commands/tree.py
index da12f8eb..bdebecf7 100644
--- a/vectorless/cli/commands/tree.py
+++ b/vectorless/cli/commands/tree.py
@@ -9,14 +9,14 @@
 from vectorless.cli.workspace import get_workspace_path
 
 
-def _create_session(workspace_dir: str):
-    """Create a Session from workspace config."""
-    from vectorless.session import Session
+def _create_engine(workspace_dir: str):
+    """Create an Engine from workspace config."""
+    from vectorless.engine import Engine
 
     config_path = os.path.join(workspace_dir, "config.toml")
     if os.path.exists(config_path):
-        return Session.from_config_file(config_path)
-    return Session.from_env()
+        return Engine.from_config_file(config_path)
+    return Engine.from_env()
 
 
 def tree_cmd(
@@ -46,9 +46,9 @@ def tree_cmd(
     workspace = get_workspace_path()
 
     try:
-        session = _create_session(workspace)
+        session = _create_engine(workspace)
     except Exception as e:
-        raise click.ClickException(f"Failed to create session: {e}") from e
+        raise click.ClickException(f"Failed to create engine: {e}") from e
 
     async def _run():
         graph = await session.get_graph()
diff --git a/vectorless/cli/main.py b/vectorless/cli/main.py
index f41d0fed..db62cd94 100644
--- a/vectorless/cli/main.py
+++ b/vectorless/cli/main.py
@@ -94,14 +94,12 @@ def remove(doc_id: str) -> None:
 @app.command()
 @click.argument("question")
 @click.option("--doc", "-d", multiple=True, help="Limit query to specific document IDs.")
-@click.option("--workspace-scope", is_flag=True, help="Query across all documents.")
 @click.option("--format", "fmt", type=click.Choice(["text", "json"]), default="text")
 @click.option("--verbose", "-v", is_flag=True, help="Show Agent navigation steps.")
 @click.option("--max-tokens", type=int, help="Max result tokens.")
 def query(
     question: str,
     doc: tuple[str, ...],
-    workspace_scope: bool,
     fmt: str,
     verbose: bool,
     max_tokens: Optional[int],
@@ -113,7 +111,6 @@ def query(
     query_cmd(
         question,
         doc_ids=doc,
-        workspace_scope=workspace_scope,
         fmt=fmt,
         verbose=verbose,
         timeout_secs=max_tokens,
diff --git a/vectorless/cli/output.py b/vectorless/cli/output.py
index 4c131f10..5969186e 100644
--- a/vectorless/cli/output.py
+++ b/vectorless/cli/output.py
@@ -104,48 +104,45 @@ def format_query_result(
     """Format query results for output.
 
     Args:
-        result: QueryResponse or similar with items and failed.
+        result: Output from Engine.ask().
         fmt: Output format.
         verbose: Show evidence details.
     """
     if fmt == OutputFormat.JSON:
-        if hasattr(result, "to_dict"):
-            return format_json(result.to_dict())
-        return format_json(result)
+        data = {
+            "answer": result.answer,
+            "confidence": result.confidence,
+            "evidence": [
+                {
+                    "title": e.node_title,
+                    "path": e.source_path,
+                    "content": e.content,
+                    "doc_name": e.doc_name,
+                }
+                for e in result.evidence
+            ],
+            "metrics": {
+                "llm_calls": result.metrics.llm_calls,
+                "rounds_used": result.metrics.rounds_used,
+                "nodes_visited": result.metrics.nodes_visited,
+                "evidence_chars": result.metrics.evidence_chars,
+            },
+        }
+        return format_json(data)
 
     lines = []
-    items = result.items if hasattr(result, "items") else result.get("items", [])
-
-    for item in items:
-        content = item.content if hasattr(item, "content") else item.get("content", "")
-        doc_id = item.doc_id if hasattr(item, "doc_id") else item.get("doc_id", "")
-        confidence = (
-            item.confidence if hasattr(item, "confidence") else item.get("confidence", 0)
-        )
-
-        lines.append(f"[{doc_id}] (confidence: {confidence:.2f})")
-        lines.append(f"  {content}")
-
-        if verbose:
-            evidence = (
-                item.evidence if hasattr(item, "evidence") else item.get("evidence", [])
-            )
-            if evidence:
-                lines.append("  Evidence:")
-                for ev in evidence:
-                    title = ev.title if hasattr(ev, "title") else ev.get("title", "")
-                    path = ev.path if hasattr(ev, "path") else ev.get("path", "")
-                    lines.append(f"    - {title} ({path})")
-
-        lines.append("")
-
-    failed = result.failed if hasattr(result, "failed") else result.get("failed", [])
-    if failed:
-        lines.append("Failures:")
-        for f in failed:
-            source = f.source if hasattr(f, "source") else f.get("source", "")
-            error = f.error if hasattr(f, "error") else f.get("error", "")
-            lines.append(f"  {source}: {error}")
+    lines.append(f"(confidence: {result.confidence:.2f})")
+    lines.append(f"  {result.answer}")
+
+    if verbose:
+        evidence = result.evidence
+        if evidence:
+            lines.append("  Evidence:")
+            for ev in evidence:
+                doc_label = f" [{ev.doc_name}]" if ev.doc_name else ""
+                lines.append(f"    - {ev.node_title} ({ev.source_path}){doc_label}")
+
+    lines.append("")
 
     return "\n".join(lines)
 
diff --git a/vectorless/cli/workspace.py b/vectorless/cli/workspace.py
new file mode 100644
index 00000000..b93898c1
--- /dev/null
+++ b/vectorless/cli/workspace.py
@@ -0,0 +1,173 @@
+"""Workspace management — .vectorless/ directory operations."""
+
+from __future__ import annotations
+
+import json
+import os
+from pathlib import Path
+from typing import Any, Dict, Optional
+
+import click
+
+WORKSPACE_DIR = ".vectorless"
+CONFIG_FILE = "config.toml"
+DATA_DIR = "data"
+CACHE_DIR = "cache"
+
+_DEFAULT_CONFIG = """\
+# Vectorless workspace configuration
+# See https://vectorless.dev/docs/configuration
+
+[llm]
+# model = "gpt-4o"
+# api_key = "sk-..."
+# endpoint = "https://api.openai.com/v1"
+
+[llm.throttle]
+# max_concurrent_requests = 10
+# requests_per_minute = 500
+
+[retrieval]
+# top_k = 3
+# max_iterations = 10
+
+[storage]
+# workspace_dir = "~/.vectorless"
+
+[metrics]
+# enabled = true
+"""
+
+
+def find_workspace(start: str = ".") -> Optional[str]:
+    """Find .vectorless/ directory by walking up from start.
+
+    Args:
+        start: Directory to start searching from.
+
+    Returns:
+        Absolute path to .vectorless/ if found, else None.
+    """
+    current = Path(start).resolve()
+    while True:
+        candidate = current / WORKSPACE_DIR
+        if candidate.is_dir():
+            return str(candidate)
+        parent = current.parent
+        if parent == current:
+            return None
+        current = parent
+
+
+def init_workspace(target: str = ".") -> str:
+    """Create .vectorless/ directory structure with default config.
+
+    Args:
+        target: Parent directory to create workspace in.
+
+    Returns:
+        Path to created .vectorless/ directory.
+
+    Creates:
+        target/.vectorless/
+        ├── config.toml
+        ├── data/
+        └── cache/
+    """
+    workspace = Path(target).resolve() / WORKSPACE_DIR
+    workspace.mkdir(parents=True, exist_ok=True)
+    (workspace / DATA_DIR).mkdir(exist_ok=True)
+    (workspace / CACHE_DIR).mkdir(exist_ok=True)
+
+    config_path = workspace / CONFIG_FILE
+    if not config_path.exists():
+        config_path.write_text(_DEFAULT_CONFIG)
+
+    return str(workspace)
+
+
+def get_workspace_path(start: str = ".") -> str:
+    """Get workspace path or raise.
+
+    Args:
+        start: Directory to search from.
+
+    Returns:
+        Absolute path to .vectorless/ directory.
+
+    Raises:
+        click.ClickException: If workspace not found.
+    """
+    path = find_workspace(start)
+    if path is None:
+        raise click.ClickException(
+            "No .vectorless/ workspace found. Run 'vectorless init' first."
+        )
+    return path
+
+
+def load_config(workspace: str) -> Dict[str, Any]:
+    """Load configuration from workspace config.toml.
+
+    Args:
+        workspace: Path to .vectorless/ directory.
+
+    Returns:
+        Configuration dict.
+    """
+    import sys
+
+    config_path = Path(workspace) / CONFIG_FILE
+    if not config_path.exists():
+        return {}
+
+    if sys.version_info >= (3, 11):
+        import tomllib
+    else:
+        try:
+            import tomli as tomllib  # type: ignore[no-redef]
+        except ImportError:
+            # Fallback: parse as plain text (comments-only files)
+            return {}
+
+    with open(config_path, "rb") as f:
+        return tomllib.load(f)
+
+
+def save_config(workspace: str, config: Dict[str, Any]) -> None:
+    """Save configuration to workspace config.toml.
+
+    Args:
+        workspace: Path to .vectorless/ directory.
+        config: Configuration dict to save.
+    """
+    config_path = Path(workspace) / CONFIG_FILE
+    lines: list[str] = []
+
+    def _write_section(key: str, value: Any, prefix: str = "") -> None:
+        section = f"{prefix}{key}" if not prefix else f"{prefix}.{key}"
+        if isinstance(value, dict):
+            lines.append(f"\n[{section}]")
+            for k, v in value.items():
+                _write_section(k, v, section)
+        elif isinstance(value, str):
+            lines.append(f'{key} = "{value}"')
+        elif isinstance(value, bool):
+            lines.append(f"{key} = {'true' if value else 'false'}")
+        elif isinstance(value, (int, float)):
+            lines.append(f"{key} = {value}")
+
+    for k, v in config.items():
+        _write_section(k, v)
+
+    config_path.write_text("\n".join(lines) + "\n")
+
+
+def get_data_dir(workspace: str) -> str:
+    """Get data directory path within workspace."""
+    return str(Path(workspace) / DATA_DIR)
+
+
+def get_cache_dir(workspace: str) -> str:
+    """Get cache directory path within workspace."""
+    return str(Path(workspace) / CACHE_DIR)
diff --git a/vectorless/config/models.py b/vectorless/config/models.py
index 57da548c..eafc2a26 100644
--- a/vectorless/config/models.py
+++ b/vectorless/config/models.py
@@ -6,7 +6,7 @@
 
 from pydantic import BaseModel, Field
 
-from vectorless._core import Config as RustConfig
+from vectorless._internal._core import Config as RustConfig
 
 
 class ThrottleConfig(BaseModel):
diff --git a/vectorless/engine.py b/vectorless/engine.py
new file mode 100644
index 00000000..fd681a33
--- /dev/null
+++ b/vectorless/engine.py
@@ -0,0 +1,448 @@
+"""High-level Vectorless Engine API.
+
+``Engine`` is the single recommended entry point for all operations.
+It wraps the Rust compile layer with Python strategy for retrieval:
+typed configuration, event callbacks, flexible input methods, and batch operations.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+from pathlib import Path
+from typing import Any, Callable, List, Optional, Union
+
+from vectorless._internal._core import Engine as RustEngine
+from vectorless.ask.dispatcher import dispatch
+from vectorless.ask.types import DocCard, Output, Specified, Workspace
+from vectorless.config import EngineConfig, load_config, load_config_from_env, load_config_from_file
+from vectorless.events import (
+    EventEmitter,
+    IndexEventData,
+    IndexEventType,
+    QueryEventData,
+    QueryEventType,
+)
+from vectorless.llm_client import LLMClient
+from vectorless.streaming import StreamingQueryResult
+from vectorless.types.graph import DocumentGraphWrapper
+from vectorless.types.results import (
+    IndexResultWrapper,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class Engine:
+    """High-level Vectorless engine.
+
+    compile (ingest) runs in Rust; ask (retrieval) runs in Python.
+
+    Configuration precedence: constructor args > env vars > config file > defaults.
+
+    Usage::
+
+        from vectorless import Engine
+
+        engine = Engine(api_key="sk-...", model="gpt-4o")
+        result = await engine.compile(path="./report.pdf")
+        answer = await engine.ask("What is the Q4 revenue?", doc_ids=[result.doc_id])
+        print(answer.answer)
+
+    Or from environment variables::
+
+        # VECTORLESS_API_KEY, VECTORLESS_MODEL set in env
+        engine = Engine.from_env()
+    """
+
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        model: Optional[str] = None,
+        endpoint: Optional[str] = None,
+        config: Optional[EngineConfig] = None,
+        config_file: Optional[Union[str, Path]] = None,
+        events: Optional[EventEmitter] = None,
+    ) -> None:
+        self._events = events or EventEmitter()
+
+        # Resolve config: constructor > env > file > defaults
+        if config is not None:
+            self._config = config
+        else:
+            self._config = self._resolve_config(api_key, model, endpoint, config_file)
+
+        # Build Rust engine (for compile / document management)
+        rust_config = self._config.to_rust_config()
+        self._rust = RustEngine(
+            api_key=self._config.llm.api_key,
+            model=self._config.llm.model or None,
+            endpoint=self._config.llm.endpoint or None,
+            config=rust_config,
+        )
+
+        # Build Python LLM client (for strategy layer)
+        self._llm = LLMClient(
+            api_key=self._config.llm.api_key,
+            model=self._config.llm.model,
+            endpoint=self._config.llm.endpoint or None,
+        )
+
+    @classmethod
+    def from_env(cls, events: Optional[EventEmitter] = None) -> Engine:
+        """Create an Engine from environment variables only."""
+        config = load_config_from_env()
+        return cls(config=config, events=events)
+
+    @classmethod
+    def from_config_file(
+        cls,
+        path: Union[str, Path],
+        events: Optional[EventEmitter] = None,
+    ) -> Engine:
+        """Create an Engine from a TOML config file."""
+        config = load_config_from_file(Path(path))
+        return cls(config=config, events=events)
+
+    def _resolve_config(
+        self,
+        api_key: Optional[str],
+        model: Optional[str],
+        endpoint: Optional[str],
+        config_file: Optional[Union[str, Path]],
+    ) -> EngineConfig:
+        overrides: dict[str, Any] = {}
+        llm_overrides: dict[str, Any] = {}
+        if api_key is not None:
+            llm_overrides["api_key"] = api_key
+        if model is not None:
+            llm_overrides["model"] = model
+        if endpoint is not None:
+            llm_overrides["endpoint"] = endpoint
+        if llm_overrides:
+            overrides["llm"] = llm_overrides
+
+        return load_config(
+            config_file=Path(config_file) if config_file else None,
+            overrides=overrides if overrides else None,
+        )
+
+    # ── Compiling (Rust compile pipeline) ───────────────────────
+
+    async def compile(
+        self,
+        path: Optional[Union[str, Path]] = None,
+        paths: Optional[List[Union[str, Path]]] = None,
+        directory: Optional[Union[str, Path]] = None,
+        content: Optional[str] = None,
+        bytes_data: Optional[bytes] = None,
+        format: str = "markdown",
+        name: Optional[str] = None,
+        mode: str = "default",
+        force: bool = False,
+    ) -> IndexResultWrapper:
+        """Compile a document from various sources.
+
+        Exactly one source must be provided: path, paths, directory,
+        content, or bytes_data.
+        """
+        sources_provided = sum(
+            x is not None for x in [path, paths, directory, content, bytes_data]
+        )
+        if sources_provided != 1:
+            raise ValueError(
+                "Provide exactly one source: path, paths, directory, content, or bytes_data"
+            )
+
+        # For single file, delegate to Rust ingest
+        if path is not None:
+            source_desc = str(path)
+            self._events.emit_index(
+                IndexEventData(event_type=IndexEventType.STARTED, path=source_desc)
+            )
+            doc_info = await self._rust.ingest(str(path))
+            self._events.emit_index(
+                IndexEventData(
+                    event_type=IndexEventType.COMPLETE,
+                    doc_id=doc_info.doc_id,
+                    message=f"Indexed {doc_info.doc_id}",
+                )
+            )
+            return IndexResultWrapper.from_doc_info(doc_info)
+
+        # For multiple files, index them sequentially
+        if paths is not None:
+            return await self.compile_batch(
+                paths, mode="force" if force else mode,
+            )
+
+        if directory is not None:
+            # Scan directory for supported files
+            dir_path = Path(directory)
+            extensions = {".md", ".pdf", ".markdown"}
+            file_paths = [
+                str(f) for f in dir_path.rglob("*")
+                if f.suffix.lower() in extensions and f.is_file()
+            ]
+            if not file_paths:
+                raise ValueError(f"No supported documents found in {directory}")
+            return await self.compile_batch(file_paths, mode="force" if force else mode)
+
+        if content is not None:
+            # Write content to a temp file and ingest
+            import tempfile
+            suffix = ".md" if format == "markdown" else f".{format}"
+            with tempfile.NamedTemporaryFile(mode="w", suffix=suffix, delete=False) as f:
+                f.write(content)
+                tmp_path = f.name
+            try:
+                doc_info = await self._rust.ingest(tmp_path)
+                return IndexResultWrapper.from_doc_info(doc_info)
+            finally:
+                import os
+                os.unlink(tmp_path)
+
+        if bytes_data is not None:
+            import tempfile
+            suffix = ".md" if format == "markdown" else f".{format}"
+            with tempfile.NamedTemporaryFile(mode="wb", suffix=suffix, delete=False) as f:
+                f.write(bytes_data)
+                tmp_path = f.name
+            try:
+                doc_info = await self._rust.ingest(tmp_path)
+                return IndexResultWrapper.from_doc_info(doc_info)
+            finally:
+                import os
+                os.unlink(tmp_path)
+
+        raise ValueError("No source provided")
+
+    async def compile_batch(
+        self,
+        paths: List[Union[str, Path]],
+        *,
+        mode: str = "default",
+        jobs: int = 1,
+        force: bool = False,
+        progress: bool = True,
+    ) -> IndexResultWrapper:
+        """Compile multiple files with optional concurrency.
+
+        Args:
+            paths: List of file paths to index.
+            mode: Indexing mode ("default", "force", "incremental").
+            jobs: Max concurrent indexing jobs.
+            force: Force re-index existing documents.
+            progress: Emit progress events.
+        """
+        semaphore = asyncio.Semaphore(jobs)
+
+        async def _index_one(p: Union[str, Path]) -> object:
+            async with semaphore:
+                self._events.emit_index(
+                    IndexEventData(event_type=IndexEventType.STARTED, path=str(p))
+                )
+                doc_info = await self._rust.ingest(str(p))
+                if progress:
+                    self._events.emit_index(
+                        IndexEventData(
+                            event_type=IndexEventType.COMPLETE,
+                            path=str(p),
+                            doc_id=doc_info.doc_id,
+                        )
+                    )
+                return doc_info
+
+        results = await asyncio.gather(*[_index_one(p) for p in paths])
+        return IndexResultWrapper.from_doc_infos(list(results))
+
+    # ── Querying (Python strategy layer) ────────────────────────
+
+    async def ask(
+        self,
+        question: str,
+        *,
+        doc_ids: Optional[List[str]] = None,
+        timeout_secs: Optional[int] = None,
+    ) -> Output:
+        """Ask a question and get results with source attribution.
+
+        Uses the Python strategy layer: query understanding → orchestrator → workers → rerank.
+
+        Args:
+            question: Natural language query.
+            doc_ids: Limit query to specific document IDs. If None, queries all documents.
+            timeout_secs: Per-operation timeout.
+        """
+        self._events.emit_query(
+            QueryEventData(event_type=QueryEventType.STARTED, query=question)
+        )
+
+        try:
+            result = await self._ask_python(question, doc_ids)
+        except Exception as e:
+            self._events.emit_query(
+                QueryEventData(
+                    event_type=QueryEventType.ERROR,
+                    query=question,
+                    message=str(e),
+                )
+            )
+            raise
+
+        self._events.emit_query(
+            QueryEventData(
+                event_type=QueryEventType.COMPLETE,
+                query=question,
+                total_results=len(result.evidence),
+            )
+        )
+
+        return result
+
+    async def query_stream(
+        self,
+        question: str,
+        *,
+        doc_ids: Optional[List[str]] = None,
+        timeout_secs: Optional[int] = None,
+    ) -> StreamingQueryResult:
+        """Stream query progress as an async iterator.
+
+        Yields real-time events from the Python strategy pipeline.
+        Terminal events are ``'completed'`` (with results) or ``'error'``.
+
+        Usage::
+
+            stream = await engine.query_stream("What is the revenue?")
+            async for event in stream:
+                print(event["type"], event)
+            result = stream.result
+        """
+        return StreamingQueryResult.from_engine(self, question, doc_ids)
+
+    # ── Python strategy implementation ──────────────────────────
+
+    async def _ask_python(
+        self,
+        question: str,
+        doc_ids: Optional[List[str]],
+        event_queue: Optional[asyncio.Queue] = None,
+    ) -> Output:
+        """Run the full Python strategy: dispatch → Output.
+
+        Uses dispatcher as the unified entry point. The dispatcher handles
+        query understanding, orchestrator execution, and rerank internally.
+        """
+        emit = event_queue.put if event_queue else lambda _: asyncio.ensure_future(asyncio.sleep(0))
+
+        # 1. Resolve target documents
+        all_doc_infos = await self._rust.list_documents()
+
+        if doc_ids is not None:
+            target_ids = doc_ids
+        else:
+            target_ids = [d.doc_id for d in all_doc_infos]
+
+        if not target_ids:
+            return Output(answer="")
+
+        # 2. Build DocCards for orchestrator analysis
+        info_map = {d.doc_id: d for d in all_doc_infos}
+        target_infos = [info_map[did] for did in target_ids if did in info_map]
+
+        if not target_infos:
+            raise DocumentNotFoundError(
+                f"None of the requested doc_ids found: {doc_ids}"
+            )
+
+        doc_cards = []
+        for info in target_infos:
+            concepts = []
+            if info.concepts:
+                concepts = [c.name for c in info.concepts]
+            doc_cards.append(DocCard(
+                doc_id=info.doc_id,
+                name=info.name,
+                summary=info.summary or "",
+                section_count=info.section_count,
+                concepts=concepts,
+            ))
+
+        # 3. Determine scope: doc_ids specified → Specified, else → Workspace
+        if doc_ids is not None:
+            scope = Specified(docs=doc_cards)
+        else:
+            scope = Workspace(docs=doc_cards)
+
+        # 4. Dispatch (understand + orchestrator + rerank)
+        return await dispatch(
+            query=question,
+            scope=scope,
+            llm=self._llm,
+            doc_loader=self._load_document,
+            event_callback=emit if event_queue else None,
+        )
+
+    async def _load_document(self, doc_id: str):
+        """Load a navigable Document from the Rust engine."""
+        return await self._rust.load_document(doc_id)
+
+    # ── Document Management (Rust) ──────────────────────────────
+
+    async def list_documents(self) -> list:
+        """List all indexed documents."""
+        return await self._rust.list_documents()
+
+    async def remove_document(self, doc_id: str) -> bool:
+        """Remove a document by ID."""
+        await self._rust.forget(doc_id)
+        return True
+
+    async def document_exists(self, doc_id: str) -> bool:
+        """Check if a document exists."""
+        return await self._rust.exists(doc_id)
+
+    async def clear_all(self) -> int:
+        """Remove all indexed documents. Returns count removed."""
+        return await self._rust.clear()
+
+    # ── Graph (Rust) ────────────────────────────────────────────
+
+    async def get_graph(self) -> Optional[DocumentGraphWrapper]:
+        """Get the cross-document relationship graph."""
+        graph = await self._rust.get_graph()
+        if graph is None:
+            return None
+        return DocumentGraphWrapper.from_rust(graph)
+
+    # ── Metrics (Rust) ──────────────────────────────────────────
+
+    def metrics_report(self) -> Any:
+        """Get a comprehensive metrics report."""
+        return self._rust.metrics_report()
+
+    # ── Context Manager ─────────────────────────────────────────
+
+    async def __aenter__(self) -> Engine:
+        return self
+
+    async def __aexit__(self, *args: Any) -> None:
+        pass
+
+    def __repr__(self) -> str:
+        model = self._config.llm.model or "unknown"
+        return f"Engine(model={model!r})"
+
+
+# ---------------------------------------------------------------------------
+# Exceptions
+# ---------------------------------------------------------------------------
+
+
+class DocumentNotFoundError(Exception):
+    """Raised when a requested document ID is not found in the workspace."""
+
+
+class EmptyWorkspaceError(Exception):
+    """Raised when no documents are indexed in the workspace."""
diff --git a/vectorless/events.py b/vectorless/events.py
index 6d764993..6d386769 100644
--- a/vectorless/events.py
+++ b/vectorless/events.py
@@ -71,7 +71,7 @@ class EventEmitter:
 
     Usage::
 
-        from vectorless import Session, EventEmitter
+        from vectorless import Engine, EventEmitter
 
         events = EventEmitter()
 
@@ -79,7 +79,7 @@ class EventEmitter:
         def on_query(event):
             print(f"Query: {event.query}")
 
-        session = Session(api_key="sk-...", model="gpt-4o", events=events)
+        engine = Engine(api_key="sk-...", model="gpt-4o", events=events)
     """
 
     def __init__(self) -> None:
diff --git a/vectorless/jupyter.py b/vectorless/jupyter.py
index 2d63a92e..2ae81cd7 100644
--- a/vectorless/jupyter.py
+++ b/vectorless/jupyter.py
@@ -5,7 +5,7 @@
 import html as html_module
 from typing import Any, List, Optional
 
-from vectorless.types.results import QueryResponse, QueryResult, Evidence
+from vectorless.ask.types import Output
 
 
 class QueryResultDisplay:
@@ -15,63 +15,60 @@ class QueryResultDisplay:
     for automatic rendering.
     """
 
-    def __init__(self, result: QueryResponse) -> None:
+    def __init__(self, result: Output) -> None:
         self._result = result
 
     def _repr_html_(self) -> str:
-        rows = []
-        for item in self._result.items:
-            escaped_content = html_module.escape(item.content[:500])
-            confidence_bar = _confidence_bar(item.confidence)
-            evidence_html = _evidence_list_html(item.evidence)
-            rows.append(
-                f"<div style='margin-bottom:16px; padding:12px; "
-                f"border:1px solid #e0e0e0; border-radius:4px;'>"
-                f"<div style='display:flex; justify-content:space-between; "
-                f"align-items:center; margin-bottom:8px;'>"
-                f"<code>{html_module.escape(item.doc_id)}</code>"
-                f"{confidence_bar}"
-                f"</div>"
-                f"<p style='margin:0;'>{escaped_content}</p>"
-                f"{evidence_html}"
-                f"</div>"
-            )
-
-        failed_html = ""
-        if self._result.has_failures():
-            failed_items = []
-            for f in self._result.failed:
-                failed_items.append(
-                    f"<li>{html_module.escape(f.source)}: "
-                    f"{html_module.escape(f.error)}</li>"
-                )
-            failed_html = (
-                f"<div style='color:red; margin-top:8px;'>"
-                f"<strong>Failures:</strong><ul>{''.join(failed_items)}</ul></div>"
-            )
+        result = self._result
+        escaped_answer = html_module.escape(result.answer[:500])
+        confidence_bar = _confidence_bar(result.confidence)
+        evidence_html = _evidence_list_html(result.evidence)
 
         return (
             f"<div style='font-family:sans-serif;'>"
-            f"<h4>Results ({len(self._result.items)})</h4>"
-            f"{''.join(rows)}"
-            f"{failed_html}"
+            f"<div style='display:flex; justify-content:space-between; "
+            f"align-items:center; margin-bottom:8px;'>"
+            f"<h4>Result</h4>{confidence_bar}</div>"
+            f"<p style='margin:0;'>{escaped_answer}</p>"
+            f"{evidence_html}"
             f"</div>"
         )
 
     def _repr_markdown_(self) -> str:
-        lines = [f"## Results ({len(self._result.items)})\n"]
-        for item in self._result.items:
-            lines.append(f"### {item.doc_id} (confidence: {item.confidence:.2f})\n")
-            lines.append(f"{item.content}\n")
-            if item.evidence:
-                lines.append("**Evidence:**\n")
-                for ev in item.evidence:
-                    lines.append(f"- **{ev.title}** ({ev.path})")
+        result = self._result
+        lines = [
+            f"## Result (confidence: {result.confidence:.2f})\n",
+            f"{result.answer}\n",
+        ]
+        if result.evidence:
+            lines.append("**Evidence:**\n")
+            for ev in result.evidence:
+                doc_label = f" [{ev.doc_name}]" if ev.doc_name else ""
+                lines.append(f"- **{ev.node_title}** ({ev.source_path}){doc_label}")
             lines.append("")
         return "\n".join(lines)
 
     def _repr_json_(self) -> dict:
-        return self._result.to_dict()
+        result = self._result
+        return {
+            "answer": result.answer,
+            "confidence": result.confidence,
+            "evidence": [
+                {
+                    "title": e.node_title,
+                    "path": e.source_path,
+                    "content": e.content,
+                    "doc_name": e.doc_name,
+                }
+                for e in result.evidence
+            ],
+            "metrics": {
+                "llm_calls": result.metrics.llm_calls,
+                "rounds_used": result.metrics.rounds_used,
+                "nodes_visited": result.metrics.nodes_visited,
+                "evidence_chars": result.metrics.evidence_chars,
+            },
+        }
 
 
 class DocumentGraphDisplay:
@@ -126,15 +123,15 @@ def _confidence_bar(confidence: float) -> str:
     )
 
 
-def _evidence_list_html(evidence: List[Evidence]) -> str:
+def _evidence_list_html(evidence: list) -> str:
     """Generate HTML for evidence items."""
     if not evidence:
         return ""
     items = []
     for ev in evidence[:5]:
         items.append(
-            f"<li><strong>{html_module.escape(ev.title)}</strong> "
-            f"<code>{html_module.escape(ev.path)}</code></li>"
+            f"<li><strong>{html_module.escape(ev.node_title)}</strong> "
+            f"<code>{html_module.escape(ev.source_path)}</code></li>"
         )
     extra = f" <em>(+{len(evidence) - 5} more)</em>" if len(evidence) > 5 else ""
     return f"<ul style='margin:8px 0 0 0; font-size:0.9em;'>{''.join(items)}{extra}</ul>"
diff --git a/vectorless/llm_client.py b/vectorless/llm_client.py
new file mode 100644
index 00000000..4b99b8d0
--- /dev/null
+++ b/vectorless/llm_client.py
@@ -0,0 +1,297 @@
+"""Async LLM client for the Python strategy layer.
+
+Uses litellm for multi-provider support (OpenAI, Anthropic, DeepSeek, etc.)
+and instructor for structured output validation.
+
+Features:
+- Unified interface via litellm (100+ providers)
+- Structured JSON output via instructor + Pydantic
+- Automatic retry with feedback on validation failure
+- Per-request timeout
+- In-memory response cache (optional, per-session dedup)
+"""
+
+from __future__ import annotations
+
+import hashlib
+import json
+import logging
+from typing import Any, Optional, Type, TypeVar
+
+import litellm
+from pydantic import BaseModel
+
+logger = logging.getLogger(__name__)
+
+T = TypeVar("T", bound=BaseModel)
+
+# ---------------------------------------------------------------------------
+# Defaults
+# ---------------------------------------------------------------------------
+
+DEFAULT_MAX_RETRIES = 2
+DEFAULT_TIMEOUT = 120.0  # seconds
+
+# ---------------------------------------------------------------------------
+# LLMClient
+# ---------------------------------------------------------------------------
+
+
+class LLMClient:
+    """Async LLM client backed by litellm.
+
+    Supports any provider litellm supports (OpenAI, Anthropic, DeepSeek, etc.)
+    via model prefix conventions (e.g. "openai/gpt-4o", "anthropic/claude-sonnet-4").
+
+    Usage::
+
+        llm = LLMClient(api_key="sk-...", model="gpt-4o")
+        text = await llm.complete("You are a helpful assistant", "What is 2+2?")
+
+        # Structured output
+        class MyResponse(BaseModel):
+            answer: str
+            confidence: float
+        result = await llm.complete_structured("...", "...", MyResponse)
+        print(result.answer)
+    """
+
+    def __init__(
+        self,
+        api_key: str,
+        model: str,
+        endpoint: Optional[str] = None,
+        *,
+        max_retries: int = DEFAULT_MAX_RETRIES,
+        timeout: float = DEFAULT_TIMEOUT,
+        enable_cache: bool = True,
+    ) -> None:
+        self._model = model
+        self._api_key = api_key
+        self._endpoint = endpoint
+        self._max_retries = max_retries
+        self._timeout = timeout
+        self._cache: dict[str, str] = {} if enable_cache else {}
+        self._cache_enabled = enable_cache
+
+        # Configure litellm defaults
+        if endpoint:
+            litellm.api_base = endpoint
+
+    @property
+    def model(self) -> str:
+        return self._model
+
+    # ── Core completion ──────────────────────────────────────────
+
+    async def complete(
+        self,
+        system: str,
+        user: str,
+        *,
+        temperature: float = 0.0,
+        timeout: Optional[float] = None,
+    ) -> str:
+        """Send a completion request and return the assistant message text.
+
+        Args:
+            system: System prompt.
+            user: User message.
+            temperature: Sampling temperature.
+            timeout: Per-request timeout in seconds (overrides default).
+
+        Returns:
+            The assistant's text response.
+        """
+        cache_key = self._cache_key(system, user, temperature)
+        if self._cache_enabled and cache_key in self._cache:
+            return self._cache[cache_key]
+
+        response = await self._call_with_retry(
+            system=system,
+            user=user,
+            temperature=temperature,
+            timeout=timeout or self._timeout,
+        )
+
+        if self._cache_enabled:
+            self._cache[cache_key] = response
+
+        return response
+
+    async def complete_json(
+        self,
+        system: str,
+        user: str,
+        *,
+        temperature: float = 0.0,
+        timeout: Optional[float] = None,
+    ) -> dict[str, Any]:
+        """Send a completion request and parse the response as JSON.
+
+        Falls back to regex extraction if the response is not valid JSON.
+        """
+        text = await self.complete(system, user, temperature=temperature, timeout=timeout)
+        return _extract_json(text)
+
+    async def complete_structured(
+        self,
+        system: str,
+        user: str,
+        response_model: Type[T],
+        *,
+        max_retries: Optional[int] = None,
+        temperature: float = 0.0,
+        timeout: Optional[float] = None,
+    ) -> T:
+        """Send a completion request with structured output via instructor.
+
+        Uses instructor's `from_litellm` to get typed Pydantic responses.
+        On validation failure, automatically retries with error feedback.
+
+        Args:
+            system: System prompt.
+            user: User message.
+            response_model: Pydantic model class for the expected response.
+            max_retries: Max retries on validation failure (overrides default).
+            temperature: Sampling temperature.
+            timeout: Per-request timeout in seconds.
+
+        Returns:
+            Validated instance of response_model.
+        """
+        import instructor
+
+        client = instructor.from_litellm(litellm.acompletion)
+        retries = max_retries if max_retries is not None else self._max_retries
+
+        messages = [
+            {"role": "system", "content": system},
+            {"role": "user", "content": user},
+        ]
+
+        return await client.chat.completions.create(
+            model=self._model,
+            messages=messages,
+            response_model=response_model,
+            max_retries=retries,
+            temperature=temperature,
+            timeout=timeout or self._timeout,
+            api_key=self._api_key,
+            api_base=self._endpoint,
+        )
+
+    async def complete_with_messages(
+        self,
+        messages: list[dict[str, str]],
+        *,
+        temperature: float = 0.0,
+        timeout: Optional[float] = None,
+    ) -> str:
+        """Send a completion request with pre-built messages."""
+        response = await litellm.acompletion(
+            model=self._model,
+            messages=messages,
+            temperature=temperature,
+            timeout=timeout or self._timeout,
+            api_key=self._api_key,
+            api_base=self._endpoint,
+        )
+        return response.choices[0].message.content or ""
+
+    # ── Internal ─────────────────────────────────────────────────
+
+    async def _call_with_retry(
+        self,
+        system: str,
+        user: str,
+        temperature: float,
+        timeout: float,
+    ) -> str:
+        """Call litellm.acompletion with retry on transient errors."""
+        messages = [
+            {"role": "system", "content": system},
+            {"role": "user", "content": user},
+        ]
+
+        last_error: Optional[Exception] = None
+        for attempt in range(1 + self._max_retries):
+            try:
+                response = await litellm.acompletion(
+                    model=self._model,
+                    messages=messages,
+                    temperature=temperature,
+                    timeout=timeout,
+                    api_key=self._api_key,
+                    api_base=self._endpoint,
+                )
+                return response.choices[0].message.content or ""
+            except litellm.RateLimitError as e:
+                last_error = e
+                logger.warning("LLM rate limit hit, attempt %d/%d: %s", attempt + 1, self._max_retries + 1, e)
+                if attempt < self._max_retries:
+                    import asyncio
+                    await asyncio.sleep(2 ** attempt)
+            except litellm.Timeout as e:
+                last_error = e
+                logger.warning("LLM timeout, attempt %d/%d", attempt + 1, self._max_retries + 1)
+            except litellm.APIConnectionError as e:
+                last_error = e
+                logger.warning("LLM connection error, attempt %d/%d: %s", attempt + 1, self._max_retries + 1, e)
+
+        raise LLMError(f"LLM call failed after {self._max_retries + 1} attempts: {last_error}") from last_error
+
+    def _cache_key(self, system: str, user: str, temperature: float) -> str:
+        raw = f"{self._model}:{temperature}:{system}|||{user}"
+        return hashlib.sha256(raw.encode()).hexdigest()
+
+    def clear_cache(self) -> None:
+        """Clear the in-memory response cache."""
+        if self._cache_enabled:
+            self._cache.clear()
+
+
+# ---------------------------------------------------------------------------
+# Exceptions
+# ---------------------------------------------------------------------------
+
+
+class LLMError(Exception):
+    """Raised when an LLM call fails after all retries."""
+
+
+# ---------------------------------------------------------------------------
+# JSON extraction fallback
+# ---------------------------------------------------------------------------
+
+
+def _extract_json(text: str) -> dict[str, Any]:
+    """Extract a JSON object from LLM output.
+
+    Handles:
+    - Plain JSON
+    - JSON wrapped in ```json ... ``` code blocks
+    - JSON with leading/trailing text
+    """
+    import re
+
+    match = re.search(r"```(?:json)?\s*\n?(.*?)```", text, re.DOTALL)
+    if match:
+        text = match.group(1).strip()
+
+    start = text.find("{")
+    if start != -1:
+        depth = 0
+        for i in range(start, len(text)):
+            if text[i] == "{":
+                depth += 1
+            elif text[i] == "}":
+                depth -= 1
+                if depth == 0:
+                    candidate = text[start : i + 1]
+                    try:
+                        return json.loads(candidate)
+                    except json.JSONDecodeError:
+                        break
+
+    return json.loads(text.strip())
diff --git a/vectorless/rerank/__init__.py b/vectorless/rerank/__init__.py
new file mode 100644
index 00000000..43c302bf
--- /dev/null
+++ b/vectorless/rerank/__init__.py
@@ -0,0 +1,12 @@
+"""Evidence reranking and answer synthesis."""
+
+from vectorless.rerank.quality import filter_by_quality
+from vectorless.rerank.synthesize import RerankOutput, dedup, format_answer, process
+
+__all__ = [
+    "RerankOutput",
+    "dedup",
+    "filter_by_quality",
+    "format_answer",
+    "process",
+]
diff --git a/vectorless/rerank/quality.py b/vectorless/rerank/quality.py
new file mode 100644
index 00000000..3b8247ac
--- /dev/null
+++ b/vectorless/rerank/quality.py
@@ -0,0 +1,152 @@
+"""LLM-based evidence quality filtering.
+
+Evaluates whether each evidence item actually answers the question.
+Low-quality or irrelevant evidence is filtered out before synthesis.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from typing import Optional
+
+from vectorless.ask.types import Evidence
+from vectorless.llm_client import LLMClient
+from vectorless.ask.plan import QueryIntent
+
+logger = logging.getLogger(__name__)
+
+# Minimum evidence count after quality filter — keep at least this many
+MIN_KEEP_COUNT = 1
+
+
+async def filter_by_quality(
+    evidence: list[Evidence],
+    query: str,
+    intent: QueryIntent,
+    llm: LLMClient,
+    *,
+    confidence: float = 0.0,
+    min_relevance: float = 0.4,
+) -> list[Evidence]:
+    """Filter evidence by LLM-judged relevance to the query.
+
+    For small evidence sets (≤3), skip LLM evaluation — all evidence is kept.
+    For larger sets, batch-evaluate relevance and filter below threshold.
+
+    Args:
+        evidence: Evidence items from workers.
+        query: The original question.
+        intent: Classified query intent.
+        llm: LLM client for quality evaluation.
+        confidence: Orchestrator confidence (used to adjust threshold).
+        min_relevance: Minimum relevance score (0.0-1.0) to keep.
+
+    Returns:
+        Filtered evidence list.
+    """
+    if not evidence:
+        return []
+
+    # Skip quality filter for small evidence sets — trust the worker
+    if len(evidence) <= 3:
+        return evidence
+
+    system = _quality_system_prompt(intent)
+    user = _quality_user_prompt(query, evidence)
+
+    try:
+        result = await llm.complete_json(system, user, temperature=0.0)
+    except Exception as e:
+        logger.warning("Quality filter LLM call failed, keeping all evidence: %s", e)
+        return evidence
+
+    scores = _parse_scores(result, len(evidence))
+    if not scores:
+        return evidence
+
+    # Filter by threshold
+    kept = [
+        ev for i, ev in enumerate(evidence)
+        if scores.get(i, 0.5) >= min_relevance
+    ]
+
+    # Ensure we keep at least MIN_KEEP_COUNT
+    if not kept and evidence:
+        # Keep the highest-scoring evidence
+        best_idx = max(scores, key=scores.get) if scores else 0
+        kept = [evidence[best_idx]]
+
+    logger.info(
+        "Quality filter: %d/%d evidence kept (threshold=%.2f)",
+        len(kept), len(evidence), min_relevance,
+    )
+
+    return kept
+
+
+# ---------------------------------------------------------------------------
+# Prompt construction
+# ---------------------------------------------------------------------------
+
+
+def _quality_system_prompt(intent: QueryIntent) -> str:
+    intent_desc = {
+        QueryIntent.FACTUAL: "a factual question seeking specific information",
+        QueryIntent.ANALYTICAL: "an analytical question requiring reasoning across multiple sections",
+        QueryIntent.NAVIGATIONAL: "a navigational question seeking where to find information",
+        QueryIntent.SUMMARY: "a summary question seeking a broad overview",
+    }.get(intent, "a question")
+
+    return f"""You are an evidence quality evaluator. The user asked {intent_desc}.
+
+For each evidence item, rate its relevance to the question on a scale of 0.0 to 1.0:
+- 1.0: Directly and completely answers the question
+- 0.8: Contains relevant information that partially answers the question
+- 0.6: Tangentially related but does not directly address the question
+- 0.4: Mentions related concepts but is not useful for answering
+- 0.2: Barely related or only provides background context
+- 0.0: Completely irrelevant
+
+Respond with a JSON object: {{"scores": [0.8, 0.3, ...]}}
+One score per evidence item, in order. Scores array length must match evidence count."""
+
+
+def _quality_user_prompt(query: str, evidence: list[Evidence]) -> str:
+    items = []
+    for i, ev in enumerate(evidence):
+        # Truncate long evidence for the prompt
+        content = ev.content
+        if len(content) > 300:
+            content = content[:300] + "..."
+        items.append(f"[{i}] {ev.node_title}\n{content}")
+
+    return f"""Question: {query}
+
+Evidence items:
+{chr(10).join(items)}
+
+Rate the relevance of each evidence item to the question."""
+
+
+# ---------------------------------------------------------------------------
+# Response parsing
+# ---------------------------------------------------------------------------
+
+
+def _parse_scores(response: dict, expected_count: int) -> dict[int, float]:
+    """Parse relevance scores from LLM response."""
+    scores_raw = response.get("scores", [])
+    if not isinstance(scores_raw, list):
+        return {}
+
+    scores = {}
+    for i, s in enumerate(scores_raw):
+        if i >= expected_count:
+            break
+        try:
+            scores[i] = float(s)
+        except (ValueError, TypeError):
+            scores[i] = 0.5  # default mid-score on parse failure
+
+    return scores
diff --git a/vectorless/rerank/synthesize.py b/vectorless/rerank/synthesize.py
new file mode 100644
index 00000000..72279aec
--- /dev/null
+++ b/vectorless/rerank/synthesize.py
@@ -0,0 +1,164 @@
+"""Evidence deduplication and answer formatting.
+
+Mirrors vectorless-core/vectorless-rerank/src/.
+Dedup is pure compute (no LLM). Answer formatting is intent-aware.
+
+The Rust rerank principle: "Find what you find, return what you find."
+No LLM synthesis — the evidence IS the answer.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+from vectorless.ask.types import Evidence
+from vectorless.ask.plan import QueryIntent
+
+
+# ---------------------------------------------------------------------------
+# Deduplication (compute — mirrors vectorless-rerank/src/dedup.rs)
+# ---------------------------------------------------------------------------
+
+MIN_EVIDENCE_CHARS = 50
+SIMILARITY_THRESHOLD = 0.8
+
+
+def dedup(evidence: list[Evidence]) -> list[Evidence]:
+    """Deduplicate evidence in three stages:
+    1. Quality filter — remove evidence with < 50 chars
+    2. Source dedup — keep first per unique (doc_name, source_path)
+    3. Content similarity — remove near-duplicates via Jaccard similarity
+    """
+    # Stage 1: quality filter
+    filtered = [e for e in evidence if len(e.content.strip()) >= MIN_EVIDENCE_CHARS]
+
+    # Stage 2: source dedup — mirrors Rust dedup key = "doc_name:source_path"
+    seen_keys: set[str] = set()
+    stage2: list[Evidence] = []
+    for e in filtered:
+        doc_key = e.doc_name or "_unknown"
+        key = f"{doc_key}:{e.source_path}"
+        if key not in seen_keys:
+            seen_keys.add(key)
+            stage2.append(e)
+
+    # Stage 3: content similarity (Jaccard)
+    result: list[Evidence] = []
+    for e in stage2:
+        is_duplicate = False
+        for existing in result:
+            if _jaccard_similarity(e.content, existing.content) >= SIMILARITY_THRESHOLD:
+                is_duplicate = True
+                break
+        if not is_duplicate:
+            result.append(e)
+
+    return result
+
+
+def _jaccard_similarity(a: str, b: str) -> float:
+    """Compute Jaccard similarity between two strings (word-level)."""
+    words_a = set(a.lower().split())
+    words_b = set(b.lower().split())
+    if not words_a and not words_b:
+        return 1.0
+    if not words_a or not words_b:
+        return 0.0
+    intersection = words_a & words_b
+    union = words_a | words_b
+    return len(intersection) / len(union)
+
+
+# ---------------------------------------------------------------------------
+# Formatting (intent-aware — mirrors rerank/src/lib.rs)
+# ---------------------------------------------------------------------------
+
+def format_answer(
+    evidence: list[Evidence],
+    intent: QueryIntent = QueryIntent.FACTUAL,
+) -> str:
+    """Format evidence into an answer string based on query intent.
+
+    Mirrors Rust rerank process() — no LLM, just formatting.
+    """
+    if not evidence:
+        return ""
+
+    if intent == QueryIntent.NAVIGATIONAL:
+        return _format_locations(evidence)
+
+    return _format_evidence_as_answer(evidence)
+
+
+def _format_evidence_as_answer(evidence: list[Evidence]) -> str:
+    """Format collected evidence directly as the answer.
+
+    Mirrors Rust format_evidence_as_answer — includes doc_name attribution.
+    """
+    parts: list[str] = []
+
+    for e in evidence:
+        doc = e.doc_name or ""
+        if doc:
+            parts.append(f"[{e.node_title} — {doc}]\n{e.content}")
+        else:
+            parts.append(f"[{e.node_title}]\n{e.content}")
+
+    return "\n\n".join(parts)
+
+
+def _format_locations(evidence: list[Evidence]) -> str:
+    """Format evidence as location references (for navigational queries).
+
+    Mirrors Rust format_locations.
+    """
+    if not evidence:
+        return "No matching locations found."
+    result = "Found at:\n"
+    for e in evidence:
+        doc = e.doc_name or "unknown"
+        result += f"- **{e.node_title}** in {doc} at {e.source_path}\n"
+    return result
+
+
+# ---------------------------------------------------------------------------
+# Rerank output — mirrors rerank::types::RerankOutput
+# ---------------------------------------------------------------------------
+
+@dataclass
+class RerankOutput:
+    """Output from the rerank pipeline.
+
+    Mirrors Rust RerankOutput: answer + llm_calls + confidence.
+    Evidence is also included for the Python Orchestrator to assemble Output.
+    """
+
+    answer: str
+    evidence: list[Evidence]
+    confidence: float
+    llm_calls: int = 0  # Always 0 for rerank — no LLM calls
+
+
+def process(
+    evidence: list[Evidence],
+    intent: QueryIntent = QueryIntent.FACTUAL,
+    confidence: float = 0.0,
+) -> RerankOutput:
+    """Run the rerank pipeline: dedup → format.
+
+    No LLM calls — pure compute and formatting.
+    Mirrors Rust rerank::process().
+    """
+    deduped = dedup(evidence)
+
+    if not deduped:
+        return RerankOutput(answer="", evidence=[], confidence=0.0, llm_calls=0)
+
+    answer = format_answer(deduped, intent)
+
+    return RerankOutput(
+        answer=answer,
+        evidence=deduped,
+        confidence=confidence,
+        llm_calls=0,
+    )
diff --git a/vectorless/session.py b/vectorless/session.py
deleted file mode 100644
index 0cbdc72c..00000000
--- a/vectorless/session.py
+++ /dev/null
@@ -1,354 +0,0 @@
-"""High-level Vectorless Session API.
-
-``Session`` is the single recommended entry point for all operations.
-It wraps the Rust Engine with Pythonic ergonomics: typed configuration,
-event callbacks, flexible input methods, and batch operations.
-"""
-
-from __future__ import annotations
-
-import asyncio
-from pathlib import Path
-from typing import Any, List, Optional, Union
-
-from vectorless._core import Engine, IndexContext, IndexOptions, QueryContext
-from vectorless.config import EngineConfig, load_config_from_env
-from vectorless.events import (
-    EventEmitter,
-    IndexEventData,
-    IndexEventType,
-    QueryEventData,
-    QueryEventType,
-)
-from vectorless.streaming import StreamingQueryResult
-from vectorless.types.graph import DocumentGraphWrapper
-from vectorless.types.results import (
-    IndexResultWrapper,
-    QueryResponse,
-)
-
-
-class Session:
-    """High-level Vectorless session.
-
-    Configuration precedence: constructor args > env vars > config file > defaults.
-
-    Usage::
-
-        from vectorless import Session
-
-        session = Session(api_key="sk-...", model="gpt-4o")
-        result = await session.index(path="./report.pdf")
-        answer = await session.ask("What is the Q4 revenue?", doc_ids=[result.doc_id])
-        print(answer.single().content)
-
-    Or from environment variables::
-
-        # VECTORLESS_API_KEY, VECTORLESS_MODEL set in env
-        session = Session.from_env()
-    """
-
-    def __init__(
-        self,
-        api_key: Optional[str] = None,
-        model: Optional[str] = None,
-        endpoint: Optional[str] = None,
-        config: Optional[EngineConfig] = None,
-        config_file: Optional[Union[str, Path]] = None,
-        events: Optional[EventEmitter] = None,
-    ) -> None:
-        self._events = events or EventEmitter()
-
-        # Resolve config: constructor > env > file > defaults
-        if config is not None:
-            self._config = config
-        else:
-            self._config = self._resolve_config(api_key, model, endpoint, config_file)
-
-        # Build Rust engine
-        rust_config = self._config.to_rust_config()
-        self._engine = Engine(
-            api_key=self._config.llm.api_key,
-            model=self._config.llm.model or None,
-            endpoint=self._config.llm.endpoint or None,
-            config=rust_config,
-        )
-
-    @classmethod
-    def from_env(cls, events: Optional[EventEmitter] = None) -> "Session":
-        """Create a Session from environment variables only."""
-        config = load_config_from_env()
-        return cls(config=config, events=events)
-
-    @classmethod
-    def from_config_file(
-        cls,
-        path: Union[str, Path],
-        events: Optional[EventEmitter] = None,
-    ) -> "Session":
-        """Create a Session from a TOML config file."""
-        from vectorless.config import load_config_from_file
-
-        config = load_config_from_file(Path(path))
-        return cls(config=config, events=events)
-
-    def _resolve_config(
-        self,
-        api_key: Optional[str],
-        model: Optional[str],
-        endpoint: Optional[str],
-        config_file: Optional[Union[str, Path]],
-    ) -> EngineConfig:
-        from vectorless.config import load_config
-
-        overrides: dict[str, Any] = {}
-        llm_overrides: dict[str, Any] = {}
-        if api_key is not None:
-            llm_overrides["api_key"] = api_key
-        if model is not None:
-            llm_overrides["model"] = model
-        if endpoint is not None:
-            llm_overrides["endpoint"] = endpoint
-        if llm_overrides:
-            overrides["llm"] = llm_overrides
-
-        return load_config(
-            config_file=Path(config_file) if config_file else None,
-            overrides=overrides if overrides else None,
-        )
-
-    # ── Indexing ──────────────────────────────────────────────
-
-    async def index(
-        self,
-        path: Optional[Union[str, Path]] = None,
-        paths: Optional[List[Union[str, Path]]] = None,
-        directory: Optional[Union[str, Path]] = None,
-        content: Optional[str] = None,
-        bytes_data: Optional[bytes] = None,
-        format: str = "markdown",
-        name: Optional[str] = None,
-        mode: str = "default",
-        force: bool = False,
-    ) -> IndexResultWrapper:
-        """Index a document from various sources.
-
-        Exactly one source must be provided: path, paths, directory,
-        content, or bytes_data.
-        """
-        sources_provided = sum(
-            x is not None for x in [path, paths, directory, content, bytes_data]
-        )
-        if sources_provided != 1:
-            raise ValueError(
-                "Provide exactly one source: path, paths, directory, content, or bytes_data"
-            )
-
-        if force:
-            mode = "force"
-
-        # Build IndexContext
-        if path is not None:
-            ctx = IndexContext.from_path(str(path))
-        elif paths is not None:
-            ctx = IndexContext.from_paths([str(p) for p in paths])
-        elif directory is not None:
-            ctx = IndexContext.from_dir(str(directory), recursive=True)
-        elif content is not None:
-            ctx = IndexContext.from_content(content, format)
-        elif bytes_data is not None:
-            ctx = IndexContext.from_bytes(list(bytes_data), format)
-        else:
-            raise ValueError("No source provided")
-
-        if name is not None:
-            ctx = ctx.with_name(name)
-        if mode != "default":
-            ctx = ctx.with_mode(mode)
-
-        # Emit start event
-        source_desc = str(path or paths or directory or "<content>" or "<bytes>")
-        self._events.emit_index(
-            IndexEventData(event_type=IndexEventType.STARTED, path=source_desc)
-        )
-
-        result = await self._engine.index(ctx)
-
-        # Emit complete event
-        self._events.emit_index(
-            IndexEventData(
-                event_type=IndexEventType.COMPLETE,
-                doc_id=result.doc_id,
-                message=f"Indexed {result.doc_id or 'documents'}",
-            )
-        )
-
-        return IndexResultWrapper.from_rust(result)
-
-    async def index_batch(
-        self,
-        paths: List[Union[str, Path]],
-        *,
-        mode: str = "default",
-        jobs: int = 1,
-        force: bool = False,
-        progress: bool = True,
-    ) -> List[IndexResultWrapper]:
-        """Index multiple files with optional concurrency.
-
-        Args:
-            paths: List of file paths to index.
-            mode: Indexing mode ("default", "force", "incremental").
-            jobs: Max concurrent indexing jobs.
-            force: Force re-index existing documents.
-            progress: Emit progress events.
-        """
-        semaphore = asyncio.Semaphore(jobs)
-        results: List[IndexResultWrapper] = []
-
-        async def _index_one(p: Union[str, Path]) -> IndexResultWrapper:
-            async with semaphore:
-                self._events.emit_index(
-                    IndexEventData(
-                        event_type=IndexEventType.STARTED,
-                        path=str(p),
-                    )
-                )
-                result = await self.index(path=p, mode=mode, force=force)
-                if progress:
-                    self._events.emit_index(
-                        IndexEventData(
-                            event_type=IndexEventType.COMPLETE,
-                            path=str(p),
-                            doc_id=result.doc_id,
-                        )
-                    )
-                return result
-
-        tasks = [_index_one(p) for p in paths]
-        results = await asyncio.gather(*tasks)
-        return list(results)
-
-    # ── Querying ──────────────────────────────────────────────
-
-    async def ask(
-        self,
-        question: str,
-        *,
-        doc_ids: Optional[List[str]] = None,
-        workspace_scope: bool = False,
-        timeout_secs: Optional[int] = None,
-    ) -> QueryResponse:
-        """Ask a question and get results with source attribution.
-
-        Args:
-            question: Natural language query.
-            doc_ids: Limit query to specific document IDs.
-            workspace_scope: Query across all indexed documents.
-            timeout_secs: Per-operation timeout.
-        """
-        # Emit start event
-        self._events.emit_query(
-            QueryEventData(
-                event_type=QueryEventType.STARTED,
-                query=question,
-            )
-        )
-
-        ctx = QueryContext(question)
-        if doc_ids is not None:
-            ctx = ctx.with_doc_ids(doc_ids)
-        elif workspace_scope:
-            ctx = ctx.with_workspace()
-        if timeout_secs is not None:
-            ctx = ctx.with_timeout_secs(timeout_secs)
-
-        result = await self._engine.query(ctx)
-        response = QueryResponse.from_rust(result)
-
-        # Emit complete event
-        self._events.emit_query(
-            QueryEventData(
-                event_type=QueryEventType.COMPLETE,
-                query=question,
-                total_results=len(response.items),
-            )
-        )
-
-        return response
-
-    async def query_stream(
-        self,
-        question: str,
-        *,
-        doc_ids: Optional[List[str]] = None,
-        workspace_scope: bool = False,
-        timeout_secs: Optional[int] = None,
-    ) -> StreamingQueryResult:
-        """Stream query progress as an async iterator.
-
-        Yields real-time events from the retrieval pipeline.
-        Terminal events are ``'completed'`` (with results) or ``'error'``.
-
-        Usage::
-
-            stream = await session.query_stream("What is the revenue?")
-            async for event in stream:
-                print(event["type"], event)
-            result = stream.result
-        """
-        ctx = QueryContext(question)
-        if doc_ids is not None:
-            ctx = ctx.with_doc_ids(doc_ids)
-        elif workspace_scope:
-            ctx = ctx.with_workspace()
-        if timeout_secs is not None:
-            ctx = ctx.with_timeout_secs(timeout_secs)
-
-        raw_stream = await self._engine.query_stream(ctx)
-        return StreamingQueryResult(raw_stream)
-
-    # ── Document Management ───────────────────────────────────
-
-    async def list_documents(self) -> list:
-        """List all indexed documents."""
-        return await self._engine.list()
-
-    async def remove_document(self, doc_id: str) -> bool:
-        """Remove a document by ID."""
-        return await self._engine.remove(doc_id)
-
-    async def document_exists(self, doc_id: str) -> bool:
-        """Check if a document exists."""
-        return await self._engine.exists(doc_id)
-
-    async def clear_all(self) -> int:
-        """Remove all indexed documents. Returns count removed."""
-        return await self._engine.clear()
-
-    # ── Graph ─────────────────────────────────────────────────
-
-    async def get_graph(self) -> Optional[DocumentGraphWrapper]:
-        """Get the cross-document relationship graph."""
-        graph = await self._engine.get_graph()
-        if graph is None:
-            return None
-        return DocumentGraphWrapper.from_rust(graph)
-
-    # ── Metrics ───────────────────────────────────────────────
-
-    def metrics_report(self) -> Any:
-        """Get a comprehensive metrics report."""
-        return self._engine.metrics_report()
-
-    # ── Context Manager ───────────────────────────────────────
-
-    async def __aenter__(self) -> "Session":
-        return self
-
-    async def __aexit__(self, *args: Any) -> None:
-        pass
-
-    def __repr__(self) -> str:
-        model = self._config.llm.model or "unknown"
-        return f"Session(model={model!r})"
diff --git a/vectorless/streaming.py b/vectorless/streaming.py
index 9f01661c..edb2e409 100644
--- a/vectorless/streaming.py
+++ b/vectorless/streaming.py
@@ -1,14 +1,18 @@
-"""Streaming query results backed by real-time Rust streaming events.
+"""Streaming query results with real-time progress events.
 
-Wraps the PyO3 ``StreamingQuery`` async iterator and builds a
-``QueryResponse`` from the terminal ``completed`` event.
+Uses asyncio.Queue for producer-consumer pattern.
+Events are emitted at each stage of the Python strategy pipeline:
+  understanding_done → workers_dispatched → worker_step → evaluation_done → synthesis_done → completed
+
+Terminal events are ``'completed'`` (with results) or ``'error'``.
 """
 
 from __future__ import annotations
 
+import asyncio
 from typing import Any, AsyncIterator, Dict, List, Optional
 
-from vectorless.types.results import QueryResponse, QueryResult
+from vectorless.ask.types import Output
 
 
 class StreamingQueryResult:
@@ -16,17 +20,51 @@ class StreamingQueryResult:
 
     Usage::
 
-        stream = await session.query_stream("What is the revenue?")
+        stream = await engine.query_stream("What is the revenue?")
         async for event in stream:
             print(event["type"], event)
-        result = stream.result  # Available after iteration completes
+        result = stream.result
     """
 
-    def __init__(self, raw_stream: Any) -> None:
-        self._stream = raw_stream  # PyStreamingQuery from Rust
-        self._result: Optional[QueryResponse] = None
+    def __init__(self, queue: asyncio.Queue[Optional[Dict]]) -> None:
+        self._queue = queue
+        self._result: Optional[Output] = None
         self._consumed = False
 
+    @classmethod
+    def from_engine(
+        cls,
+        engine: Any,
+        question: str,
+        doc_ids: Optional[List[str]],
+    ) -> StreamingQueryResult:
+        """Create a StreamingQueryResult that runs the engine pipeline in background."""
+        queue: asyncio.Queue[Optional[Dict]] = asyncio.Queue()
+        instance = cls(queue)
+
+        async def _run() -> None:
+            try:
+                result = await engine._ask_python(
+                    question, doc_ids,
+                    event_queue=queue,
+                )
+                instance._result = result
+                await queue.put({
+                    "type": "completed",
+                    "total_results": len(result.evidence),
+                    "confidence": result.confidence,
+                })
+            except Exception as e:
+                await queue.put({
+                    "type": "error",
+                    "message": str(e),
+                })
+            # Sentinel: None signals end of stream
+            await queue.put(None)
+
+        asyncio.ensure_future(_run())
+        return instance
+
     def __aiter__(self) -> AsyncIterator[Dict]:
         return self._iterate()
 
@@ -35,41 +73,16 @@ async def _iterate(self) -> AsyncIterator[Dict]:
             return
         self._consumed = True
 
-        completed_event: Optional[Dict] = None
-
-        async for event in self._stream:
-            event_type = event.get("type", "")
-
+        while True:
+            event = await self._queue.get()
+            if event is None:
+                # Sentinel — producer is done
+                break
             yield event
-
-            if event_type in ("completed", "error"):
-                if event_type == "completed":
-                    completed_event = event
-                break  # Terminal events end the stream
-
-        if completed_event is not None:
-            self._result = self._build_response(completed_event)
-
-    @staticmethod
-    def _build_response(event: Dict) -> QueryResponse:
-        """Build a QueryResponse from the completed event dict."""
-        items: List[QueryResult] = []
-        for r in event.get("results", []):
-            node_id = r.get("node_id")
-            items.append(
-                QueryResult(
-                    doc_id=node_id or "",
-                    content=r.get("content") or "",
-                    score=r.get("score", 0.0),
-                    confidence=event.get("confidence", 0.0),
-                    node_ids=[node_id] if node_id else [],
-                    evidence=[],
-                    metrics=None,
-                )
-            )
-        return QueryResponse(items=items, failed=[])
+            if event.get("type") in ("completed", "error"):
+                break
 
     @property
-    def result(self) -> Optional[QueryResponse]:
+    def result(self) -> Optional[Output]:
         """Final result, available after iteration completes."""
         return self._result if self._consumed else None
diff --git a/vectorless/sync_session.py b/vectorless/sync_session.py
deleted file mode 100644
index bb772d1d..00000000
--- a/vectorless/sync_session.py
+++ /dev/null
@@ -1,196 +0,0 @@
-"""Synchronous Vectorless Session API.
-
-``SyncSession`` provides the same API as ``Session`` but with synchronous
-methods — no ``async``/``await`` required. Works in scripts, Jupyter
-notebooks, and any synchronous Python context.
-"""
-
-from __future__ import annotations
-
-from pathlib import Path
-from typing import Any, List, Optional, Union
-
-from vectorless._async_utils import run_async
-from vectorless.config import EngineConfig, load_config_from_env, load_config_from_file
-from vectorless.events import EventEmitter
-from vectorless.session import Session
-from vectorless.streaming import StreamingQueryResult
-from vectorless.types.graph import DocumentGraphWrapper
-from vectorless.types.results import IndexResultWrapper, QueryResponse
-
-
-class SyncSession:
-    """Synchronous Vectorless session.
-
-    Same API as ``Session`` but all methods are blocking (no async/await).
-    Works in Jupyter notebooks, scripts, and synchronous contexts.
-
-    Usage::
-
-        from vectorless import SyncSession
-
-        session = SyncSession(api_key="sk-...", model="gpt-4o")
-        result = session.index(path="./report.pdf")
-        answer = session.ask("What is the Q4 revenue?", doc_ids=[result.doc_id])
-        print(answer.single().content)
-    """
-
-    def __init__(
-        self,
-        api_key: Optional[str] = None,
-        model: Optional[str] = None,
-        endpoint: Optional[str] = None,
-        config: Optional[EngineConfig] = None,
-        config_file: Optional[Union[str, Path]] = None,
-        events: Optional[EventEmitter] = None,
-    ) -> None:
-        self._session = Session(
-            api_key=api_key,
-            model=model,
-            endpoint=endpoint,
-            config=config,
-            config_file=config_file,
-            events=events,
-        )
-
-    @classmethod
-    def from_env(cls, events: Optional[EventEmitter] = None) -> "SyncSession":
-        """Create a SyncSession from environment variables."""
-        config = load_config_from_env()
-        return cls(config=config, events=events)
-
-    @classmethod
-    def from_config_file(
-        cls,
-        path: Union[str, Path],
-        events: Optional[EventEmitter] = None,
-    ) -> "SyncSession":
-        """Create a SyncSession from a TOML config file."""
-        config = load_config_from_file(Path(path))
-        return cls(config=config, events=events)
-
-    # ── Indexing ──────────────────────────────────────────────
-
-    def index(
-        self,
-        path: Optional[Union[str, Path]] = None,
-        paths: Optional[List[Union[str, Path]]] = None,
-        directory: Optional[Union[str, Path]] = None,
-        content: Optional[str] = None,
-        bytes_data: Optional[bytes] = None,
-        format: str = "markdown",
-        name: Optional[str] = None,
-        mode: str = "default",
-        force: bool = False,
-    ) -> IndexResultWrapper:
-        """Index a document (synchronous).
-
-        Exactly one source must be provided: path, paths, directory,
-        content, or bytes_data.
-        """
-        return run_async(
-            self._session.index(
-                path=path,
-                paths=paths,
-                directory=directory,
-                content=content,
-                bytes_data=bytes_data,
-                format=format,
-                name=name,
-                mode=mode,
-                force=force,
-            )
-        )
-
-    def index_batch(
-        self,
-        paths: List[Union[str, Path]],
-        *,
-        mode: str = "default",
-        jobs: int = 1,
-        force: bool = False,
-        progress: bool = True,
-    ) -> List[IndexResultWrapper]:
-        """Index multiple files with optional concurrency (synchronous)."""
-        return run_async(
-            self._session.index_batch(
-                paths,
-                mode=mode,
-                jobs=jobs,
-                force=force,
-                progress=progress,
-            )
-        )
-
-    # ── Querying ──────────────────────────────────────────────
-
-    def ask(
-        self,
-        question: str,
-        *,
-        doc_ids: Optional[List[str]] = None,
-        workspace_scope: bool = False,
-        timeout_secs: Optional[int] = None,
-    ) -> QueryResponse:
-        """Ask a question and get results with source attribution (synchronous)."""
-        return run_async(
-            self._session.ask(
-                question,
-                doc_ids=doc_ids,
-                workspace_scope=workspace_scope,
-                timeout_secs=timeout_secs,
-            )
-        )
-
-    def query_stream(
-        self,
-        question: str,
-        **kwargs: Any,
-    ) -> StreamingQueryResult:
-        """Start a streaming query (synchronous).
-
-        Returns a ``StreamingQueryResult`` that is consumed as an async
-        iterator. For fully synchronous queries, use ``ask()`` instead.
-        """
-        return run_async(self._session.query_stream(question, **kwargs))
-
-    # ── Document Management ───────────────────────────────────
-
-    def list_documents(self) -> list:
-        """List all indexed documents."""
-        return run_async(self._session.list_documents())
-
-    def remove_document(self, doc_id: str) -> bool:
-        """Remove a document by ID."""
-        return run_async(self._session.remove_document(doc_id))
-
-    def document_exists(self, doc_id: str) -> bool:
-        """Check if a document exists."""
-        return run_async(self._session.document_exists(doc_id))
-
-    def clear_all(self) -> int:
-        """Remove all indexed documents. Returns count removed."""
-        return run_async(self._session.clear_all())
-
-    # ── Graph ─────────────────────────────────────────────────
-
-    def get_graph(self) -> Optional[DocumentGraphWrapper]:
-        """Get the cross-document relationship graph."""
-        return run_async(self._session.get_graph())
-
-    # ── Metrics ───────────────────────────────────────────────
-
-    def metrics_report(self) -> Any:
-        """Get a comprehensive metrics report."""
-        return self._session.metrics_report()
-
-    # ── Context Manager ───────────────────────────────────────
-
-    def __enter__(self) -> "SyncSession":
-        return self
-
-    def __exit__(self, *args: Any) -> None:
-        pass
-
-    def __repr__(self) -> str:
-        return f"SyncSession({self._session!r})"
diff --git a/vectorless/types/results.py b/vectorless/types/results.py
index 5914d728..f725ddaf 100644
--- a/vectorless/types/results.py
+++ b/vectorless/types/results.py
@@ -217,6 +217,37 @@ class IndexResultWrapper:
     items: List[IndexItemWrapper] = field(default_factory=list)
     failed: List[FailedItem] = field(default_factory=list)
 
+    @classmethod
+    def from_doc_info(cls, doc_info: object) -> IndexResultWrapper:
+        """Create from a single Rust PyDocumentInfo (returned by ingest)."""
+        item = IndexItemWrapper(
+            doc_id=doc_info.doc_id,
+            name=doc_info.name,
+            format=doc_info.format,
+            description=getattr(doc_info, "description", None),
+            source_path=getattr(doc_info, "source_path", None),
+            page_count=getattr(doc_info, "page_count", None),
+        )
+        return cls(doc_id=doc_info.doc_id, items=[item])
+
+    @classmethod
+    def from_doc_infos(cls, doc_infos: list) -> IndexResultWrapper:
+        """Create from a list of Rust PyDocumentInfo objects."""
+        items = []
+        first_doc_id = None
+        for info in doc_infos:
+            if first_doc_id is None:
+                first_doc_id = info.doc_id
+            items.append(IndexItemWrapper(
+                doc_id=info.doc_id,
+                name=info.name,
+                format=info.format,
+                description=getattr(info, "description", None),
+                source_path=getattr(info, "source_path", None),
+                page_count=getattr(info, "page_count", None),
+            ))
+        return cls(doc_id=first_doc_id, items=items)
+
     @classmethod
     def from_rust(cls, result: object) -> IndexResultWrapper:
         items = [IndexItemWrapper.from_rust(i) for i in result.items]