diff --git a/README.md b/README.md
index 2ee4b516..9d2b6e88 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 <div align="center">
 
-<img src="https://raw.githubusercontent.com/vectorlessflow/vectorless/main/docs/design/logo-title.svg" alt="Vectorless" width="400" style="vertical-align:middle;">
+<img src="https://raw.githubusercontent.com/vectorlessflow/vectorless/main/docs/design/with-title.png" alt="Vectorless" width="400" style="vertical-align:middle;">
 
 <h1>Reasoning-native Document Intelligence Engine</h1>
 
@@ -43,7 +43,8 @@ from vectorless import Engine, IndexContext
 engine = Engine(workspace="./data")
 
 # Index a document (PDF, Markdown, DOCX, HTML)
-doc_id = engine.index(IndexContext.from_file("./report.pdf"))
+result = engine.index(IndexContext.from_file("./report.pdf"))
+doc_id = result.doc_id
 
 # Query
 result = engine.query(doc_id, "What is the total revenue?")
@@ -60,7 +61,7 @@ vectorless = "0.1"
 ```
 
 ```rust
-use vectorless::client::{Engine, EngineBuilder, IndexContext};
+use vectorless::client::{EngineBuilder, IndexContext, QueryContext};
 
 #[tokio::main]
 async fn main() -> vectorless::Result<()> {
@@ -70,10 +71,13 @@ async fn main() -> vectorless::Result<()> {
         .await?;
 
     // Index
-    let doc_id = engine.index(IndexContext::from_path("./report.pdf")).await?;
+    let result = engine.index(IndexContext::from_path("./report.pdf")).await?;
+    let doc_id = result.doc_id().unwrap();
 
     // Query
-    let result = engine.query(&doc_id, "What is the total revenue?").await?;
+    let result = engine.query(
+        QueryContext::new("What is the total revenue?").with_doc_id(doc_id)
+    ).await?;
     println!("Answer: {}", result.content);
 
     Ok(())
diff --git a/docs/design/with-title.png b/docs/design/with-title.png
new file mode 100644
index 00000000..9f8a6c34
Binary files /dev/null and b/docs/design/with-title.png differ
diff --git a/examples/rust/advanced.rs b/examples/rust/advanced.rs
index 946f619b..bc89d756 100644
--- a/examples/rust/advanced.rs
+++ b/examples/rust/advanced.rs
@@ -16,7 +16,7 @@
 //! cargo run --example advanced
 //! ```
 
-use vectorless::{Engine, IndexContext};
+use vectorless::{EngineBuilder, IndexContext, QueryContext};
 
 #[tokio::main]
 async fn main() -> vectorless::Result<()> {
@@ -24,7 +24,7 @@ async fn main() -> vectorless::Result<()> {
 
     // Method 1: Use explicit config file path
     // This loads all settings from the specified config file
-    let client = Engine::builder()
+    let client = EngineBuilder::new()
         .with_config_path("./config.toml") // or "./my_vectorless.toml"
         .build()
         .await
@@ -33,11 +33,14 @@ async fn main() -> vectorless::Result<()> {
     println!("✓ Client created with config file\n");
 
     // Index a document
-    let doc_id = client.index(IndexContext::from_path("./README.md")).await?;
+    let result = client.index(IndexContext::from_path("./README.md")).await?;
+    let doc_id = result.doc_id().unwrap().to_string();
     println!("✓ Indexed: {}\n", doc_id);
 
     // Query
-    let result = client.query(&doc_id, "What features does Vectorless provide?").await?;
+    let result = client
+        .query(QueryContext::new("What features does Vectorless provide?").with_doc_id(&doc_id))
+        .await?;
     println!("Query: What features does Vectorless provide?");
     println!("Score: {:.2}", result.score);
     if !result.content.is_empty() {
@@ -55,7 +58,7 @@ async fn main() -> vectorless::Result<()> {
     println!("  2. Auto-detected config file (vectorless.toml, config.toml, .vectorless.toml)");
     println!("  3. Explicit config file (with_config_path)");
     println!("  4. Environment variables (OPENAI_API_KEY, VECTORLESS_MODEL, etc.)");
-    println!("  5. Builder methods (with_openai, with_model, etc.)");
+    println!("  5. Builder methods (with_key, with_model, with_endpoint)");
     println!();
     println!("Environment Variables:");
     println!("  OPENAI_API_KEY       - LLM API key");
diff --git a/examples/rust/basic.rs b/examples/rust/basic.rs
index 7064d889..5d5df2bd 100644
--- a/examples/rust/basic.rs
+++ b/examples/rust/basic.rs
@@ -3,44 +3,46 @@
 
 //! Basic usage example for Vectorless.
 //!
-//! This example demonstrates the core API in ~30 lines.
-//!
 //! # Usage
 //!
 //! ```bash
 //! cargo run --example basic
 //! ```
 
-use vectorless::{Engine, IndexContext};
+use vectorless::{EngineBuilder, IndexContext, QueryContext};
 
 #[tokio::main]
 async fn main() -> vectorless::Result<()> {
     println!("=== Vectorless Basic Example ===\n");
 
-    // 1. Create a client
-    let client = Engine::builder()
+    // 1. Create an engine
+    let engine = EngineBuilder::new()
         .with_workspace("./workspace")
         .build()
         .await
         .map_err(|e: vectorless::BuildError| vectorless::Error::Config(e.to_string()))?;
 
-    println!("✓ Client created\n");
+    println!("Engine created\n");
 
     // 2. Index a document
-    let doc_id = client.index(IndexContext::from_path("./README.md")).await?;
-    println!("✓ Indexed: {}\n", doc_id);
+    let result = engine.index(IndexContext::from_path("./README.md")).await?;
+    let doc_id = result.doc_id().unwrap().to_string();
+    println!("Indexed: {}\n", doc_id);
 
     // 3. List documents
     println!("Documents:");
-    for doc in client.list_documents().await? {
+    for doc in engine.list().await? {
         println!("  - {} ({})", doc.name, doc.id);
     }
     println!();
 
     // 4. Query
-    match client.query(&doc_id, "What is vectorless?").await {
+    match engine
+        .query(QueryContext::new("What is vectorless?").with_doc_id(&doc_id))
+        .await
+    {
         Ok(result) => {
-            println!("Query score: {:.2}", result.score);
+            println!("Score: {:.2}", result.score);
             if !result.content.is_empty() {
                 let preview: String = result.content.chars().take(150).collect();
                 println!("Result: {}...", preview);
@@ -50,14 +52,9 @@ async fn main() -> vectorless::Result<()> {
     }
     println!();
 
-    // 5. Clone for concurrent use (client is Clone + Send + Sync)
-    let _client1 = client.clone();
-    let _client2 = client.clone();
-    println!("✓ Client cloned for concurrent use\n");
-
-    // 6. Cleanup
-    client.remove(&doc_id).await?;
-    println!("✓ Removed: {}", doc_id);
+    // 5. Cleanup
+    engine.remove(&doc_id).await?;
+    println!("Removed: {}", doc_id);
 
     println!("\n=== Done ===");
     Ok(())
diff --git a/examples/rust/batch_processing.rs b/examples/rust/batch_processing.rs
deleted file mode 100644
index 1e0d11ee..00000000
--- a/examples/rust/batch_processing.rs
+++ /dev/null
@@ -1,1156 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! Batch document processing example.
-//!
-//! This example demonstrates how to efficiently process
-//! multiple documents in batch mode using sessions.
-//!
-//! # Usage
-//!
-//! ```bash
-//! cargo run --example batch_processing
-//! ```
-
-use vectorless::client::{EngineBuilder, IndexContext};
-
-#[tokio::main]
-async fn main() -> Result<(), Box<dyn std::error::Error>> {
-    println!("=== Batch Document Processing Example ===\n");
-
-    // 1. Create engine and session
-    println!("Step 1: Setting up...");
-    let engine = EngineBuilder::new()
-        .with_workspace("./workspace_batch_example")
-        .build()
-        .await
-        .map_err(|e: vectorless::BuildError| vectorless::Error::Config(e.to_string()))?;
-
-    let session = engine.session().await;
-    println!("  ✓ Session created: {}\n", session.id());
-
-    // 2. Create sample documents
-    println!("Step 2: Creating sample documents...");
-    let temp_dir = tempfile::tempdir()?;
-
-    let documents = vec![
-        (
-            "intro.md",
-            r#"# Introduction
-
-Welcome to the vectorless library. This is a document intelligence engine.
-
-## Features
-
-- Tree-based navigation
-- Multi-format support
-- Session management
-"#,
-        ),
-        (
-            "api.md",
-            r#"# API Reference
-
-## Engine
-
-The main client for document operations.
-
-### Methods
-
-- `index(path)`: Index a document
-- `query(question)`: Query indexed content
-
-## Session
-
-Multi-document operations with caching.
-
-### Methods
-
-- `index(path)`: Index into session
-- `query_all(question)`: Query across all documents
-"#,
-        ),
-        (
-            "guide.md",
-            r#"# User Guide
-
-## Getting Started
-
-First, create a client with workspace configuration.
-
-## Best Practices
-
-- Use sessions for multi-document operations
-- Enable caching for better performance
-- Monitor events for debugging
-"#,
-        ),
-        (
-            "advanced.md",
-            r#"# Advanced Topics
-
-## Performance Tuning
-
-Configure retrieval parameters for optimal performance.
-
-### Parameters
-
-- `top_k`: Number of results
-- `max_tokens`: Token budget
-
-## Custom Pilots
-
-Implement custom navigation logic.
-"#,
-        ),
-        (
-            "reference.md",
-            r#"# Reference
-
-## Configuration
-
-All configuration is done via TOML files.
-
-### Example
-
-```toml
-[retrieval]
-top_k = 5
-max_tokens = 4000
-```
-"#,
-        ),
-        (
-            "examples.md",
-            r#"# Examples
-
-## Basic Usage
-
-Simple indexing and querying example.
-
-## Batch Processing
-
-Process multiple documents concurrently.
-
-## Session Usage
-
-Multi-document operations with caching.
-"#,
-        ),
-        (
-            "faq.md",
-            r#"# FAQ
-
-## Common Questions
-
-**Q: How do I index a document?**
-A: Use `engine.index(path)` method.
-
-**Q: How to query?**
-A: Use `engine.query(doc_id, question)` method.
-
-**Q: What formats are supported?**
-A: Markdown, PDF, DOCX, HTML.
-"#,
-        ),
-        (
-            "changelog.md",
-            r#"# Changelog
-
-## Version 0.1.0
-
-- Initial release
-- Basic indexing support
-- Simple retrieval
-
-## Version 0.2.0
-
-- Session support
-- Event system
-- Content aggregator
-"#,
-        ),
-        (
-            "contributing.md",
-            r#"# Contributing
-
-## How to Contribute
-
-We welcome contributions! Please follow these steps:
-
-1. Fork the repository
-2. Create a feature branch
-3. Submit a pull request
-
-## Code Style
-
-- Run `cargo fmt`
-- Run `cargo clippy`
-- Add tests
-"#,
-        ),
-        (
-            "license.md",
-            r#"# License
-
-Apache License, Version 2.0
-
-Copyright 2026 vectorless developers
-"#,
-        ),
-        (
-            "architecture.md",
-            r#"# Architecture
-
-## Overview
-
-Vectorless uses a tree-based architecture.
-
-## Components
-
-- Parser: Document parsing
-- Indexer: Tree building
-- Retriever: Content search
-- Storage: Persistence
-"#,
-        ),
-        (
-            "security.md",
-            r#"# Security
-
-## Security Considerations
-
-- API keys are stored securely
-- No sensitive data in logs
-- Input validation
-
-## Best Practices
-
-- Use environment variables
-- Rotate keys periodically
-"#,
-        ),
-        (
-            "performance.md",
-            r#"# Performance
-
-## Optimization Tips
-
-- Use caching effectively
-- Configure appropriate batch sizes
-- Monitor memory usage
-
-## Benchmarks
-
-Run `cargo bench` for performance metrics.
-"#,
-        ),
-        (
-            "testing.md",
-            r#"# Testing
-
-## Running Tests
-
-```bash
-cargo test
-```
-
-## Test Coverage
-
-- Unit tests
-- Integration tests
-- Example tests
-"#,
-        ),
-        (
-            "deployment.md",
-            r#"# Deployment
-
-## Production Setup
-
-- Configure workspace directory
-- Set up logging
-- Monitor performance
-
-## Configuration
-
-Use TOML configuration files.
-"#,
-        ),
-        (
-            "troubleshooting.md",
-            r#"# Troubleshooting
-
-## Common Issues
-
-### Indexing Fails
-
-Check file format and permissions.
-
-### Query Returns Empty
-
-Ensure document is indexed.
-
-### Performance Issues
-
-Reduce batch size or enable caching.
-"#,
-        ),
-        (
-            "integrations.md",
-            r#"# Integrations
-
-## LLM Providers
-
-- OpenAI
-- Anthropic
-- Local models
-
-## Storage Backends
-
-- File system (default)
-- S3 (planned)
-"#,
-        ),
-        (
-            "migrations.md",
-            r#"# Migrations
-
-## Version Migrations
-
-### 0.1.x to 0.2.x
-
-- Update configuration format
-- Re-index documents
-"#,
-        ),
-        (
-            "roadmap.md",
-            r#"# Roadmap
-
-## Future Plans
-
-### Short Term
-
-- Streaming support
-- More formats
-
-### Long Term
-
-- Distributed indexing
-- Real-time updates
-"#,
-        ),
-        (
-            "credits.md",
-            r#"# Credits
-
-## Contributors
-
-Thanks to all contributors!
-
-## Libraries
-
-Built with Rust and many open-source libraries.
-"#,
-        ),
-        (
-            "index.md",
-            r#"# Index
-
-## Quick Links
-
-- [Introduction](intro.md)
-- [API Reference](api.md)
-- [User Guide](guide.md)
-
-## Search
-
-Use the search functionality to find specific content.
-"#,
-        ),
-        (
-            "search.md",
-            r#"# Search
-
-## Search Functionality
-
-### Basic Search
-
-```rust
-let results = engine.query(&doc_id, "search term").await?;
-```
-
-### Advanced Search
-
-Use sessions for cross-document search.
-"#,
-        ),
-        (
-            "export.md",
-            r#"# Export
-
-## Exporting Data
-
-### JSON Export
-
-```rust
-let json = tree.to_structure_json();
-```
-
-### Custom Formats
-
-Implement custom exporters as needed.
-"#,
-        ),
-        (
-            "import.md",
-            r#"# Import
-
-## Importing Data
-
-### From Files
-
-```rust
-let doc_id = engine.index("./document.md").await?;
-```
-
-### From Memory
-
-Use the content directly with parsers.
-"#,
-        ),
-        (
-            "validation.md",
-            r#"# Validation
-
-## Input Validation
-
-### Document Paths
-
-Must exist and be readable.
-
-### Configuration
-
-Validated on load with helpful errors.
-
-### Queries
-
-Sanitized before processing.
-"#,
-        ),
-        (
-            "formatting.md",
-            r#"# Formatting
-
-## Content Formatting
-
-### Markdown
-
-Standard CommonMark with extensions.
-
-### Code Blocks
-
-Syntax highlighting support.
-
-### Tables
-
-Basic table parsing.
-"#,
-        ),
-        (
-            "localization.md",
-            r#"# Localization
-
-## Internationalization
-
-Currently English-only.
-
-## Future Support
-
-Planned i18n support for:
-- Error messages
-- UI strings
-- Documentation
-"#,
-        ),
-        (
-            "accessibility.md",
-            r#"# Accessibility
-
-## Accessibility
-
-### Documentation
-
-Clear and comprehensive docs.
-
-### API Design
-
-Consistent and intuitive naming.
-
-### Error Messages
-
-Helpful and actionable.
-"#,
-        ),
-        (
-            "glossary.md",
-            r#"# Glossary
-
-## Terms
-
-- **Document Tree**: Hierarchical structure
-- **Session**: Multi-document context
-- **Workspace**: Document storage
-- **Retrieval**: Content search
-"#,
-        ),
-        (
-            "appendix.md",
-            r#"# Appendix
-
-## Additional Resources
-
-- [GitHub Repository](https://github.com)
-- [Documentation Site](https://docs.vectorless.dev)
-- [Community Discord](https://discord.gg)
-"#,
-        ),
-        (
-            "summary.md",
-            r#"# Summary
-
-## Overview
-
-This documentation covers all aspects of vectorless.
-
-## Next Steps
-
-- Try the examples
-- Join the community
-- Contribute!
-"#,
-        ),
-        (
-            "conclusion.md",
-            r#"# Conclusion
-
-## Thank You
-
-Thanks for using vectorless!
-
-## Feedback
-
-We'd love to hear from you. Open an issue on GitHub.
-"#,
-        ),
-        (
-            "revision.md",
-            r#"# Revision History
-
-## Document Versions
-
-| Version | Date       | Changes                    |
-|---------|------------|---------------------------|
-| 1.0     | 2026-01-01 | Initial version           |
-| 1.1     | 2026-02-01 | Session support           |
-"#,
-        ),
-        (
-            "feedback.md",
-            r#"# Feedback
-
-## Providing Feedback
-
-We value your input!
-
-### Channels
-
-- GitHub Issues
-- Discord Community
-- Email Support
-
-### What to Share
-
-- Bug reports
-- Feature requests
-- Documentation improvements
-"#,
-        ),
-        (
-            "support.md",
-            r#"# Support
-
-## Getting Help
-
-### Documentation
-
-Start with the user guide.
-
-### Community
-
-Join our Discord for discussions.
-
-### Enterprise
-
-Contact us for enterprise support.
-"#,
-        ),
-        (
-            "updates.md",
-            r#"# Updates
-
-## Staying Updated
-
-### Version Updates
-
-Check the changelog for updates.
-
-### Security Updates
-
-Apply security patches promptly.
-
-### Deprecations
-
-Watch for deprecation notices.
-"#,
-        ),
-        (
-            "resources.md",
-            r#"# Resources
-
-## External Resources
-
-### Official
-
-- Documentation: docs.vectorless.dev
-- GitHub: github.com/vectorless
-- Discord: discord.gg/vectorless
-
-### Community
-
-- Blog posts
-- Tutorial videos
-- Example projects
-"#,
-        ),
-        (
-            "contact.md",
-            r#"# Contact
-
-## Contact Information
-
-### General Inquiries
-
-Email: hello@vectorless.dev
-
-### Security Issues
-
-Email: security@vectorless.dev
-
-### Enterprise Sales
-
-Email: enterprise@vectorless.dev
-"#,
-        ),
-        (
-            "privacy.md",
-            r#"# Privacy Policy
-
-## Data Handling
-
-Vectorless processes documents locally.
-
-## No Tracking
-
-We don't track usage or content.
-
-## API Keys
-
-Stored securely in configuration files.
-"#,
-        ),
-        (
-            "terms.md",
-            r#"# Terms of Service
-
-## Usage Terms
-
-By using vectorless, you agree to:
-
-- Use responsibly
-- Follow applicable laws
-- Respect rate limits
-
-## Changes
-
-Terms may be updated. Check for revisions.
-"#,
-        ),
-        (
-            "legal.md",
-            r#"# Legal
-
-## Licensing
-
-Apache License 2.0
-
-## Copyright
-
-Copyright 2026 vectorless developers
-
-## Trademarks
-
-Vectorless is a trademark.
-"#,
-        ),
-        (
-            "versioning.md",
-            r#"# Versioning
-
-## Semantic Versioning
-
-We follow semver:
-
-- MAJOR: Breaking changes
-- MINOR: New features
-- PATCH: Bug fixes
-
-## Current Version
-
-0.1.10
-"#,
-        ),
-        (
-            "compatibility.md",
-            r#"# Compatibility
-
-## Supported Versions
-
-- Rust 1.70+
-- Tokio 1.x
-
-## Platform Support
-
-- Linux
-- macOS
-- Windows
-
-## Breaking Changes
-
-Documented in changelog.
-"#,
-        ),
-        (
-            "installation.md",
-            r#"# Installation
-
-## Requirements
-
-- Rust 1.70+
-- Tokio runtime
-
-## Install
-
-```bash
-cargo install vectorless
-```
-
-## Verify
-
-```bash
-vectorless --version
-```
-"#,
-        ),
-        (
-            "quickstart.md",
-            r#"# Quick Start
-
-## 5-Minute Setup
-
-1. Install vectorless
-2. Create a client
-3. Index a document
-4. Query!
-
-```rust
-let client = Engine::builder().build()?;
-let doc_id = client.index("./doc.md").await?;
-let result = client.query(&doc_id, "What is this?").await?;
-```
-"#,
-        ),
-        (
-            "tutorial.md",
-            r#"# Tutorial
-
-## Introduction
-
-This tutorial covers basic usage.
-
-## Step 1: Setup
-
-Create a client with workspace.
-
-## Step 2: Index
-
-Index your first document.
-
-## Step 3: Query
-
-Ask questions about your document.
-
-## Step 4: Next
-
-Explore advanced features.
-"#,
-        ),
-        (
-            "examples_overview.md",
-            r#"# Examples Overview
-
-## Available Examples
-
-| Example         | Description                    |
-|-----------------|--------------------------------|
-| basic.rs        | Basic usage                   |
-| session.rs      | Multi-document operations     |
-| events.rs       | Event callbacks              |
-| batch.rs        | Batch processing             |
-
-## Running Examples
-
-```bash
-cargo run --example <name>
-```
-"#,
-        ),
-        (
-            "configuration.md",
-            r#"# Configuration
-
-## Configuration File
-
-Use `config.toml` for settings:
-
-```toml
-[storage]
-workspace_dir = "./workspace"
-
-[retrieval]
-top_k = 5
-max_tokens = 4000
-```
-
-## Environment Variables
-
-- `OPENAI_API_KEY`: LLM API key
-"#,
-        ),
-        (
-            "optimization.md",
-            r#"# Optimization
-
-## Performance Tips
-
-- Use sessions for caching
-- Batch document indexing
-- Configure appropriate token limits
-
-## Memory Management
-
-Documents are cached in sessions.
-
-## Concurrency
-
-Use `buffer_unordered` for parallel indexing.
-"#,
-        ),
-        (
-            "errors.md",
-            r#"# Error Handling
-
-## Error Types
-
-- `ConfigError`: Configuration issues
-- `ParseError`: Document parsing failures
-- `RetrievalError`: Query failures
-
-## Handling Errors
-
-```rust
-match result {
-    Ok(response) => { /* success */ },
-    Err(Error::Parse(msg)) => { /* handle parse error */ },
-    Err(e) => { /* other error */ },
-}
-```
-"#,
-        ),
-        (
-            "logging.md",
-            r#"# Logging
-
-## Log Levels
-
-- ERROR: Serious issues
-- WARN: Potential issues
-- INFO: General information
-- DEBUG: Detailed information
-- TRACE: Very detailed
-
-## Enabling Logs
-
-```bash
-RUST_LOG=debug cargo run
-```
-"#,
-        ),
-        (
-            "metrics.md",
-            r#"# Metrics
-
-## Available Metrics
-
-- Query count
-- Cache hit rate
-- Average query time
-
-## Accessing Metrics
-
-```rust
-let stats = session.stats();
-println!("Cache hit rate: {:.1}%", stats.cache_hit_rate() * 100.0);
-```
-"#,
-        ),
-        (
-            "health.md",
-            r#"# Health Checks
-
-## Workspace Health
-
-Check workspace integrity:
-
-```rust
-let docs = engine.list_documents();
-println!("{} documents indexed", docs.len());
-```
-
-## Session Health
-
-Monitor session statistics regularly.
-"#,
-        ),
-        (
-            "backup.md",
-            r#"# Backup
-
-## Backing Up
-
-Copy the workspace directory:
-
-```bash
-cp -r ./workspace ./workspace_backup
-```
-
-## Restoration
-
-Restore by copying back:
-
-```bash
-cp -r ./workspace_backup ./workspace
-```
-"#,
-        ),
-        (
-            "recovery.md",
-            r#"# Recovery
-
-## Corrupted Documents
-
-Remove and re-index:
-
-```rust
-engine.remove(&doc_id)?;
-engine.index(&path).await?;
-```
-
-## Session Recovery
-
-Create a new session if issues occur.
-"#,
-        ),
-        (
-            "monitoring.md",
-            r#"# Monitoring
-
-## Production Monitoring
-
-Use events for real-time monitoring:
-
-```rust
-let events = EventEmitter::new()
-    .on_query(|e| {
-        // Log to monitoring system
-    });
-```
-
-## Alerts
-
-Set up alerts for error rates.
-"#,
-        ),
-        (
-            "scaling.md",
-            r#"# Scaling
-
-## Horizontal Scaling
-
-Run multiple instances with shared storage.
-
-## Vertical Scaling
-
-Increase resources for single instance.
-
-## Considerations
-
-- Storage backend
-- Cache coordination
-- Rate limiting
-"#,
-        ),
-        (
-            "security_config.md",
-            r#"# Security Configuration
-
-## API Keys
-
-Store securely:
-
-```toml
-[summary]
-api_key = "${OPENAI_API_KEY}"
-```
-
-## Network Security
-
-Use HTTPS for all API calls.
-
-## Access Control
-
-Implement authentication for production.
-"#,
-        ),
-    ];
-
-    for (name, content) in &documents {
-        let path = temp_dir.path().join(name);
-        std::fs::write(&path, content)?;
-    }
-
-    println!("  ✓ Created {} sample documents\n", documents.len());
-
-    // 3. Batch indexing with progress
-    println!("Step 3: Batch indexing...");
-    let start = std::time::Instant::now();
-    let mut doc_ids = Vec::new();
-
-    for (name, _) in &documents {
-        let path = temp_dir.path().join(name);
-        match session.index(IndexContext::from_path(&path)).await {
-            Ok(doc_id) => {
-                doc_ids.push(doc_id);
-            }
-            Err(e) => {
-                eprintln!("  ✗ Failed to index {}: {}", name, e);
-            }
-        }
-    }
-
-    let elapsed = start.elapsed();
-    println!("  ✓ Indexed {} documents in {:?}", doc_ids.len(), elapsed);
-    println!(
-        "  - Rate: {:.1} docs/sec",
-        doc_ids.len() as f64 / elapsed.as_secs_f64()
-    );
-    println!();
-
-    // 4. Show session stats
-    println!("Step 4: Session statistics:");
-    let stats = session.stats();
-    println!(
-        "  - Documents in session: {}",
-        session.list_documents().len()
-    );
-    println!("  - Queries: {}", stats.query_count.get());
-    println!();
-
-    // 5. Batch query with progress
-    println!("Step 5: Batch querying...");
-    let queries = vec![
-        "What is vectorless?",
-        "How to index?",
-        "Configuration options",
-        "API methods",
-        "Performance tips",
-        "Error handling",
-        "Logging setup",
-        "Security considerations",
-        "Scaling options",
-        "Getting help",
-    ];
-
-    let start = std::time::Instant::now();
-    let mut success_count = 0;
-
-    for query in &queries {
-        match session.query_all(query).await {
-            Ok(results) => {
-                if !results.is_empty() {
-                    success_count += 1;
-                }
-            }
-            Err(e) => {
-                eprintln!("  ✗ Query failed: {}", e);
-            }
-        }
-    }
-
-    let elapsed = start.elapsed();
-    println!("  ✓ Completed {} queries in {:?}", queries.len(), elapsed);
-    println!(
-        "  - Success rate: {:.0}%",
-        (success_count as f64 / queries.len() as f64) * 100.0
-    );
-    println!(
-        "  - Rate: {:.1} queries/sec",
-        queries.len() as f64 / elapsed.as_secs_f64()
-    );
-    println!();
-
-    // 6. Final statistics
-    println!("Step 6: Final statistics:");
-    let stats = session.stats();
-    println!("  - Total documents: {}", session.list_documents().len());
-    println!("  - Total queries: {}", stats.query_count.get());
-    println!("  - Cache hits: {}", stats.cache_hits.get());
-    println!("  - Cache misses: {}", stats.cache_misses.get());
-    println!("  - Cache hit rate: {:.1}%", stats.cache_hit_rate() * 100.0);
-    if let Some(avg_time) = stats.avg_query_time() {
-        println!("  - Avg query time: {:?}", avg_time);
-    }
-    println!("  - Session age: {:?}", session.age());
-    println!();
-
-    // 7. Cleanup
-    println!("Step 7: Cleanup...");
-    for doc_id in &doc_ids {
-        engine.remove(doc_id).await?;
-    }
-    println!("  ✓ Removed {} documents\n", doc_ids.len());
-
-    println!("=== Example Complete ===");
-    Ok(())
-}
diff --git a/examples/rust/content_aggregation.rs b/examples/rust/content_aggregation.rs
deleted file mode 100644
index 8437ccd0..00000000
--- a/examples/rust/content_aggregation.rs
+++ /dev/null
@@ -1,196 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! Content Aggregation Accuracy Example
-//!
-//! This example demonstrates the content aggregation module's ability to:
-//! 1. Score content relevance
-//! 2. Allocate token budget
-//! 3. Build structured output
-//!
-//! # Usage
-//!
-//! ```bash
-//! cargo run --example content_aggregation
-//! ```
-
-use indextree::Arena;
-use vectorless::document::NodeId;
-use vectorless::retrieval::content::{
-    AllocationStrategy, BudgetAllocator, ContentAggregator, ContentAggregatorConfig, ContentChunk,
-    OutputFormat, RelevanceScorer, ScoringContext, ScoringStrategyConfig, StructureBuilder,
-};
-
-fn make_node_id() -> NodeId {
-    let mut arena = Arena::new();
-    let node = vectorless::document::TreeNode {
-        title: "Test".to_string(),
-        structure: String::new(),
-        content: String::new(),
-        summary: String::new(),
-        depth: 0,
-        start_index: 0,
-        end_index: 0,
-        start_page: None,
-        end_page: None,
-        node_id: None,
-        physical_index: None,
-        token_count: None,
-        references: Vec::new(),
-    };
-    NodeId(arena.new_node(node))
-}
-
-fn main() {
-    println!("=== Content Aggregation Accuracy Demo ===\n");
-
-    // 1. Demonstrate Relevance Scoring
-    println!("1. Relevance Scoring Demo");
-    println!("---------------------------");
-
-    let query = "What is the architecture of vectorless?";
-    let scorer = RelevanceScorer::new(query, ScoringStrategyConfig::KeywordWithBM25);
-
-    let chunks = vec![
-        ContentChunk::new(
-            make_node_id(),
-            "Architecture Overview".to_string(),
-            "Vectorless uses a tree-based architecture for document navigation. The system consists of multiple stages: parsing, indexing, and retrieval.".to_string(),
-            0,
-        ),
-        ContentChunk::new(
-            make_node_id(),
-            "Installation Guide".to_string(),
-            "To install vectorless, add it to your Cargo.toml file. Then run cargo build to compile.".to_string(),
-            1,
-        ),
-        ContentChunk::new(
-            make_node_id(),
-            "Core Components".to_string(),
-            "The architecture includes Pilot for navigation, Judge for sufficiency checking, and multiple search algorithms like beam search and greedy search.".to_string(),
-            1,
-        ),
-    ];
-
-    let ctx = ScoringContext::default();
-
-    println!("Query: \"{}\"", query);
-    println!("\nScored chunks:");
-    for chunk in &chunks {
-        let relevance = scorer.score_chunk(chunk, &ctx);
-        println!(
-            "  - '{}' (depth {}): score {:.3}",
-            chunk.title, chunk.depth, relevance.score
-        );
-        println!(
-            "    Components: keyword={:.2}, bm25={:.2}, depth_penalty={:.2}, density={:.2}",
-            relevance.components.keyword_score,
-            relevance.components.bm25_score,
-            relevance.components.depth_penalty,
-            relevance.components.density_score,
-        );
-    }
-
-    // 2. Demonstrate Budget Allocation
-    println!("\n\n2. Budget Allocation Demo");
-    println!("---------------------------");
-
-    let scored: Vec<_> = chunks
-        .iter()
-        .map(|chunk| scorer.score_chunk(chunk, &ctx))
-        .collect();
-
-    let strategies = vec![
-        ("Greedy", AllocationStrategy::Greedy),
-        (
-            "Hierarchical (20%/level)",
-            AllocationStrategy::Hierarchical { min_per_level: 0.2 },
-        ),
-    ];
-
-    for (name, strategy) in strategies {
-        let allocator = BudgetAllocator::new(200).with_strategy(strategy);
-
-        let result = allocator.allocate(scored.clone(), 2);
-
-        println!("\n{} Strategy:", name);
-        println!("  Tokens used: {}/{}", result.tokens_used, 200);
-        println!("  Items selected: {}", result.selected.len());
-        println!("  Avg score: {:.3}", result.stats.avg_score);
-
-        for content in &result.selected {
-            let trunc = if content.is_truncated() {
-                " [truncated]"
-            } else {
-                ""
-            };
-            println!(
-                "    - '{}' ({} tokens, score {:.2}){}",
-                content.title, content.tokens, content.score, trunc
-            );
-        }
-    }
-
-    // 3. Demonstrate Structure Building
-    println!("\n\n3. Structure Building Demo");
-    println!("---------------------------");
-
-    let formats = vec![
-        ("Markdown", OutputFormat::Markdown),
-        ("Flat", OutputFormat::Flat),
-    ];
-
-    let allocator = BudgetAllocator::new(500).with_strategy(AllocationStrategy::Greedy);
-    let result = allocator.allocate(scored.clone(), 2);
-
-    for (name, format) in formats {
-        let builder = StructureBuilder::new(format);
-        let tree = vectorless::document::DocumentTree::new("Test", "");
-        let structured = builder.build(result.selected.clone(), &tree);
-
-        println!(
-            "\n{} Output ({} chars, {} tokens):",
-            name,
-            structured.content.len(),
-            structured.metadata.total_tokens
-        );
-        let preview = if structured.content.len() > 300 {
-            format!("{}...", &structured.content[..300])
-        } else {
-            structured.content.clone()
-        };
-        println!("{}", preview.lines().take(8).collect::<Vec<_>>().join("\n"));
-    }
-
-    // 4. Demonstrate Full Aggregation Pipeline
-    println!("\n\n4. Full Aggregation Pipeline Demo");
-    println!("-----------------------------------");
-
-    let configs = vec![
-        ("Default (4000 tokens)", ContentAggregatorConfig::default()),
-        (
-            "Conservative (1000 tokens)",
-            ContentAggregatorConfig::new()
-                .with_token_budget(1000)
-                .with_min_relevance(0.3),
-        ),
-        (
-            "High Precision (2000 tokens, 0.5 threshold)",
-            ContentAggregatorConfig::new()
-                .with_token_budget(2000)
-                .with_min_relevance(0.5),
-        ),
-    ];
-
-    for (name, config) in configs {
-        println!("\n{} Config:", name);
-        println!("  Token budget: {}", config.token_budget);
-        println!("  Min relevance: {:.1}", config.min_relevance_score);
-
-        let aggregator = ContentAggregator::new(config);
-        // Note: Full aggregation requires a DocumentTree with actual content
-        let _ = aggregator; // Suppress unused warning
-    }
-
-    println!("\n=== Demo Complete ===");
-}
diff --git a/examples/rust/custom_config.rs b/examples/rust/custom_config.rs
index 12eaedc4..b916143b 100644
--- a/examples/rust/custom_config.rs
+++ b/examples/rust/custom_config.rs
@@ -12,7 +12,7 @@
 //! cargo run --example custom_config
 //! ```
 
-use vectorless::{Engine, IndexContext};
+use vectorless::{EngineBuilder, IndexContext, QueryContext};
 
 #[tokio::main]
 async fn main() -> vectorless::Result<()> {
@@ -32,9 +32,10 @@ async fn main() -> vectorless::Result<()> {
     // ============================================================
 
     // Example: Use DeepSeek API
-    let client = Engine::builder()
+    let client = EngineBuilder::new()
         .with_workspace("./workspace")
-        .with_model("deepseek-chat", Some("sk-your-deepseek-key".to_string()))
+        .with_model("deepseek-chat")
+        .with_key("sk-your-deepseek-key")
         .with_endpoint("https://api.deepseek.com/v1")
         .build()
         .await
@@ -43,11 +44,14 @@ async fn main() -> vectorless::Result<()> {
     println!("✓ Client created with custom settings\n");
 
     // Index a document
-    let doc_id = client.index(IndexContext::from_path("./README.md")).await?;
+    let index_result = client.index(IndexContext::from_path("./README.md")).await?;
+    let doc_id = index_result.doc_id().unwrap().to_string();
     println!("✓ Indexed: {}\n", doc_id);
 
     // Query
-    let result = client.query(&doc_id, "What is Vectorless?").await?;
+    let result = client
+        .query(QueryContext::new("What is Vectorless?").with_doc_id(&doc_id))
+        .await?;
     println!("Query: What is Vectorless?");
     println!("Score: {:.2}", result.score);
     if !result.content.is_empty() {
@@ -64,25 +68,27 @@ async fn main() -> vectorless::Result<()> {
     // ============================================================
 
     // Azure OpenAI:
-    // let client = Engine::builder()
+    // let client = EngineBuilder::new()
     //     .with_workspace("./workspace")
-    //     .with_model("gpt-4o", Some("your-azure-key".to_string()))
+    //     .with_model("gpt-4o")
+    //     .with_key("your-azure-key")
     //     .with_endpoint("https://your-resource.openai.azure.com/openai/deployments/your-deployment")
     //     .build()
     //     .await?;
 
     // Local LLM (e.g., Ollama with OpenAI-compatible API):
-    // let client = Engine::builder()
+    // let client = EngineBuilder::new()
     //     .with_workspace("./workspace")
-    //     .with_model("llama3", None)  // No API key needed
+    //     .with_model("llama3")
     //     .with_endpoint("http://localhost:11434/v1")
     //     .build()
     //     .await?;
 
     // Anthropic Claude (via OpenAI-compatible proxy):
-    // let client = Engine::builder()
+    // let client = EngineBuilder::new()
     //     .with_workspace("./workspace")
-    //     .with_model("claude-3-5-sonnet-20241022", Some("sk-ant-...".to_string()))
+    //     .with_model("claude-3-5-sonnet-20241022")
+    //     .with_key("sk-ant-...")
     //     .with_endpoint("https://api.anthropic.com/v1")
     //     .build()
     //     .await?;
diff --git a/examples/rust/custom_pilot.rs b/examples/rust/custom_pilot.rs
deleted file mode 100644
index 15f4542e..00000000
--- a/examples/rust/custom_pilot.rs
+++ /dev/null
@@ -1,286 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! Custom Pilot implementation example.
-//!
-//! This example demonstrates how to implement a custom Pilot
-//! that provides navigation guidance during retrieval.
-//!
-//! # What you'll learn:
-//! - How to implement the Pilot trait
-//! - When to intervene (START, FORK, BACKTRACK, EVALUATE)
-//! - How to provide ranked candidates
-//! - How to integrate custom Pilot with the retrieval pipeline
-//!
-//! # Key concepts:
-//!
-//! ## Intervention Points
-//! - START: Before search begins - analyze query, set direction
-//! - FORK: At branch points - rank candidates, guide path selection
-//! - BACKTRACK: When search fails - suggest alternatives
-//! - EVALUATE: After content found - check sufficiency
-//!
-//! ## Score Merging
-//! ```text
-//! final_score = alpha * algorithm_score + beta * llm_score
-//! ```
-
-use async_trait::async_trait;
-use std::collections::HashSet;
-use vectorless::document::{DocumentTree, NodeId};
-use vectorless::retrieval::pilot::{
-    InterventionPoint, Pilot, PilotConfig, PilotDecision, RankedCandidate, SearchDirection,
-    SearchState,
-};
-
-/// A custom Pilot that uses simple keyword matching for guidance.
-///
-/// This demonstrates the Pilot trait implementation without requiring
-/// an actual LLM client.
-pub struct KeywordPilot {
-    config: PilotConfig,
-}
-
-impl KeywordPilot {
-    /// Create a new KeywordPilot.
-    pub fn new() -> Self {
-        Self {
-            config: PilotConfig::default(),
-        }
-    }
-
-    /// Score a node title based on keyword overlap with the query.
-    fn score_by_keywords(&self, query: &str, title: &str) -> f32 {
-        let query_lower = query.to_lowercase();
-        let title_lower = title.to_lowercase();
-
-        let query_words: HashSet<&str> = query_lower
-            .split_whitespace()
-            .filter(|w| w.len() > 2)
-            .collect();
-
-        let title_words: HashSet<&str> = title_lower
-            .split_whitespace()
-            .filter(|w| w.len() > 2)
-            .collect();
-
-        if query_words.is_empty() || title_words.is_empty() {
-            return 0.0;
-        }
-
-        let overlap = query_words.intersection(&title_words).count();
-        overlap as f32 / query_words.len().max(1) as f32
-    }
-}
-
-impl Default for KeywordPilot {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-#[async_trait]
-impl Pilot for KeywordPilot {
-    fn name(&self) -> &str {
-        "keyword_pilot"
-    }
-
-    fn should_intervene(&self, state: &SearchState<'_>) -> bool {
-        // Intervene at fork points with multiple candidates
-        if state.candidates.len() > 2 {
-            return true;
-        }
-
-        // Intervene when best score is low
-        if state.best_score < 0.3 {
-            return true;
-        }
-
-        // Intervene during backtracking
-        if state.is_backtracking {
-            return true;
-        }
-
-        false
-    }
-
-    async fn decide(&self, state: &SearchState<'_>) -> PilotDecision {
-        // Rank candidates by keyword overlap
-        let mut ranked: Vec<RankedCandidate> = state
-            .candidates
-            .iter()
-            .filter_map(|&node_id| {
-                state.tree.get(node_id).map(|node| {
-                    let score = self.score_by_keywords(state.query, &node.title);
-                    RankedCandidate::new(node_id, score)
-                })
-            })
-            .collect();
-
-        ranked.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
-
-        // Determine direction
-        let direction = if ranked.is_empty() {
-            SearchDirection::backtrack("No candidates available", vec![])
-        } else if ranked[0].score > 0.5 {
-            SearchDirection::go_deeper(format!("Strong match: {:.2}", ranked[0].score))
-        } else if ranked[0].score > 0.2 {
-            SearchDirection::go_deeper(format!("Moderate match: {:.2}", ranked[0].score))
-        } else {
-            SearchDirection::backtrack("No strong matches found", vec![])
-        };
-
-        let confidence = ranked.first().map(|c| c.score).unwrap_or(0.0);
-
-        PilotDecision {
-            ranked_candidates: ranked,
-            direction,
-            confidence,
-            reasoning: "Keyword-based decision".to_string(),
-            intervention_point: InterventionPoint::Fork,
-        }
-    }
-
-    async fn guide_start(&self, tree: &DocumentTree, query: &str) -> Option<PilotDecision> {
-        // Score root's children
-        let children = tree.children(tree.root());
-        let mut ranked: Vec<RankedCandidate> = children
-            .iter()
-            .filter_map(|&node_id| {
-                tree.get(node_id).map(|node| {
-                    let score = self.score_by_keywords(query, &node.title);
-                    RankedCandidate::new(node_id, score)
-                })
-            })
-            .collect();
-
-        ranked.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
-
-        let confidence = ranked.first().map(|c| c.score).unwrap_or(0.0);
-
-        Some(PilotDecision {
-            ranked_candidates: ranked,
-            direction: SearchDirection::go_deeper("Starting search"),
-            confidence,
-            reasoning: "Keyword-based start guidance".to_string(),
-            intervention_point: InterventionPoint::Start,
-        })
-    }
-
-    async fn guide_backtrack(&self, state: &SearchState<'_>) -> Option<PilotDecision> {
-        // Find unvisited alternatives
-        let mut alternatives = Vec::new();
-        for node_id in state.tree.children(state.tree.root()) {
-            if !state.visited.contains(&node_id) {
-                alternatives.push(node_id);
-            }
-        }
-
-        let ranked: Vec<RankedCandidate> = alternatives
-            .iter()
-            .take(5)
-            .map(|&node_id| RankedCandidate::new(node_id, 0.5))
-            .collect();
-
-        Some(PilotDecision {
-            ranked_candidates: ranked,
-            direction: SearchDirection::backtrack("Backtrack guidance", alternatives),
-            confidence: 0.5,
-            reasoning: "Suggesting alternative branches".to_string(),
-            intervention_point: InterventionPoint::Backtrack,
-        })
-    }
-
-    fn config(&self) -> &PilotConfig {
-        &self.config
-    }
-
-    fn is_active(&self) -> bool {
-        true
-    }
-
-    fn reset(&self) {
-        // No state to reset
-    }
-}
-
-fn main() {
-    println!("=== Custom Pilot Example ===\n");
-
-    // 1. Create the custom pilot
-    let pilot = KeywordPilot::new();
-    println!("Created KeywordPilot\n");
-
-    // 2. Create a sample document tree
-    let tree = create_sample_tree();
-    println!("Created sample tree with {} nodes\n", tree.node_count());
-
-    // 3. Create search state for demonstration
-    let query = "What is the architecture?";
-    let candidates: Vec<NodeId> = tree.children(tree.root());
-    let visited: HashSet<NodeId> = HashSet::new();
-    let state = SearchState::new(&tree, query, &[], &candidates, &visited);
-
-    println!("Query: \"{}\"", query);
-    println!("Candidates: {}", candidates.len());
-    println!("Should intervene: {}\n", pilot.should_intervene(&state));
-
-    // 4. Demonstrate keyword scoring
-    println!("Keyword scoring:");
-    for node_id in tree.children(tree.root()) {
-        if let Some(node) = tree.get(node_id) {
-            let score = pilot.score_by_keywords(query, &node.title);
-            println!("  - '{}': {:.2}", node.title, score);
-        }
-    }
-
-    // 5. Show how to integrate with retrieval
-    println!("\n--- Integration Example ---\n");
-    println!("To use with Engine:");
-    println!("```rust");
-    println!("use std::sync::Arc;");
-    println!("use vectorless::Engine;");
-    println!();
-    println!("let pilot = Arc::new(KeywordPilot::new());");
-    println!("let engine = Engine::builder()");
-    println!("    .with_workspace(\"./workspace\")");
-    println!("    .with_pilot(pilot)");
-    println!("    .build()");
-    println!("    .await?;");
-    println!("```");
-
-    println!("\n=== Done ===");
-}
-
-fn create_sample_tree() -> DocumentTree {
-    let mut tree = DocumentTree::new(
-        "Vectorless Documentation",
-        "A hierarchical document intelligence engine written in Rust.",
-    );
-
-    let arch = tree.add_child(
-        tree.root(),
-        "Architecture",
-        "The system consists of three main components.",
-    );
-    tree.add_child(
-        arch,
-        "Index Pipeline",
-        "Processes documents into a tree structure.",
-    );
-    tree.add_child(
-        arch,
-        "Retrieval Pipeline",
-        "Finds relevant content using multi-stage processing.",
-    );
-
-    let usage = tree.add_child(tree.root(), "Usage", "How to use the vectorless library.");
-    tree.add_child(usage, "Basic Example", "Simple usage with default configuration.");
-    tree.add_child(
-        usage,
-        "Advanced Example",
-        "Custom pipeline configuration with LLM.",
-    );
-
-    tree
-}
diff --git a/examples/rust/document_graph.rs b/examples/rust/document_graph.rs
deleted file mode 100644
index d765e3b5..00000000
--- a/examples/rust/document_graph.rs
+++ /dev/null
@@ -1,290 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! Document Graph example.
-//!
-//! Demonstrates how to:
-//! 1. Build a document graph from multiple documents
-//! 2. Explore cross-document relationships (shared keywords, edges)
-//! 3. Use graph-aware retrieval with different merge strategies
-//!
-//! # What is a Document Graph?
-//!
-//! A workspace-scoped weighted graph connecting documents by shared concepts.
-//! Nodes = documents, Edges = relationships (shared keywords with weights).
-//!
-//! # Key outputs:
-//! - Document nodes with top keywords
-//! - Bidirectional edges with Jaccard similarity and shared keyword evidence
-//! - Keyword inverted index for cross-document lookup
-//! - Graph-boosted retrieval ranking
-//!
-//! # Usage
-//!
-//! ```bash
-//! cargo run --example document_graph
-//! ```
-
-use std::collections::HashMap;
-
-use vectorless::document::{
-    DocumentGraph, DocumentGraphConfig, DocumentGraphNode, WeightedKeyword,
-};
-use vectorless::index::graph_builder::DocumentGraphBuilder;
-
-#[tokio::main]
-async fn main() {
-    println!("=== Document Graph Example ===\n");
-
-    // -------------------------------------------------------
-    // Part 1: Build the graph manually (low-level API)
-    // -------------------------------------------------------
-    println!("--- Part 1: Build Graph Manually ---\n");
-    demo_manual_graph();
-
-    // -------------------------------------------------------
-    // Part 2: Build the graph with DocumentGraphBuilder
-    // -------------------------------------------------------
-    println!("\n--- Part 2: Build Graph with Builder ---\n");
-    let graph = demo_builder();
-
-    // -------------------------------------------------------
-    // Part 3: Explore the graph
-    // -------------------------------------------------------
-    println!("\n--- Part 3: Explore the Graph ---\n");
-    demo_explore(&graph);
-
-    // -------------------------------------------------------
-    // Part 4: Keyword-based document lookup
-    // -------------------------------------------------------
-    println!("\n--- Part 4: Keyword Lookup ---\n");
-    demo_keyword_lookup(&graph);
-
-    // -------------------------------------------------------
-    // Part 5: Show graph-boosted retrieval concept
-    // -------------------------------------------------------
-    println!("\n--- Part 5: Graph-Boosted Retrieval ---\n");
-    demo_graph_boosted_retrieval(&graph);
-
-    println!("\n=== Done ===");
-}
-
-/// Manually build a small graph to show the data model.
-fn demo_manual_graph() {
-    let mut graph = DocumentGraph::new();
-
-    // Add document nodes
-    graph.add_node(DocumentGraphNode {
-        doc_id: "rust-book".to_string(),
-        title: "The Rust Programming Language".to_string(),
-        format: "md".to_string(),
-        top_keywords: vec![
-            WeightedKeyword { keyword: "ownership".to_string(), weight: 0.95 },
-            WeightedKeyword { keyword: "borrowing".to_string(), weight: 0.90 },
-            WeightedKeyword { keyword: "lifetimes".to_string(), weight: 0.80 },
-            WeightedKeyword { keyword: "traits".to_string(), weight: 0.70 },
-        ],
-        node_count: 42,
-    });
-
-    graph.add_node(DocumentGraphNode {
-        doc_id: "rust-async".to_string(),
-        title: "Async Programming in Rust".to_string(),
-        format: "md".to_string(),
-        top_keywords: vec![
-            WeightedKeyword { keyword: "async".to_string(), weight: 0.95 },
-            WeightedKeyword { keyword: "tokio".to_string(), weight: 0.85 },
-            WeightedKeyword { keyword: "lifetimes".to_string(), weight: 0.60 },
-            WeightedKeyword { keyword: "traits".to_string(), weight: 0.50 },
-        ],
-        node_count: 28,
-    });
-
-    println!("Nodes: {}", graph.node_count());
-    for doc_id in graph.doc_ids() {
-        let node = graph.get_node(doc_id).unwrap();
-        println!("  {} ({}): {} keywords, {} nodes",
-            node.doc_id, node.title, node.top_keywords.len(), node.node_count);
-    }
-}
-
-/// Build a graph from multiple documents using DocumentGraphBuilder.
-fn demo_builder() -> DocumentGraph {
-    let config = DocumentGraphConfig {
-        enabled: true,
-        min_keyword_jaccard: 0.05,
-        min_shared_keywords: 2,
-        max_keywords_per_doc: 50,
-        max_edges_per_node: 20,
-        retrieval_boost_factor: 0.15,
-    };
-
-    let mut builder = DocumentGraphBuilder::new(config);
-
-    // Document 1: Rust Language Guide
-    builder.add_document(
-        "rust-guide",
-        "Rust Language Guide",
-        "md",
-        35,
-        keywords(&[
-            ("ownership", 0.95), ("borrowing", 0.90), ("lifetimes", 0.85),
-            ("traits", 0.80), ("generics", 0.75), ("error-handling", 0.70),
-            ("pattern-matching", 0.65), ("closures", 0.60),
-        ]),
-    );
-
-    // Document 2: Async Rust (overlaps on lifetimes, traits, closures)
-    builder.add_document(
-        "async-guide",
-        "Async Rust Guide",
-        "md",
-        28,
-        keywords(&[
-            ("async", 0.95), ("tokio", 0.90), ("futures", 0.85),
-            ("lifetimes", 0.60), ("traits", 0.55), ("closures", 0.50),
-            ("pinning", 0.80), ("waker", 0.75),
-        ]),
-    );
-
-    // Document 3: Rust Testing (overlaps on traits, closures, error-handling)
-    builder.add_document(
-        "testing-guide",
-        "Rust Testing Guide",
-        "md",
-        22,
-        keywords(&[
-            ("testing", 0.95), ("assertions", 0.90), ("mocking", 0.85),
-            ("traits", 0.60), ("closures", 0.55), ("error-handling", 0.50),
-            ("benchmarks", 0.80), ("coverage", 0.75),
-        ]),
-    );
-
-    // Document 4: Unrelated document (cooking — no overlap)
-    builder.add_document(
-        "cooking",
-        "Italian Cooking",
-        "md",
-        15,
-        keywords(&[
-            ("pasta", 0.95), ("sauce", 0.90), ("olive-oil", 0.85),
-            ("garlic", 0.80), ("basil", 0.75), ("tomato", 0.70),
-        ]),
-    );
-
-    let graph = builder.build();
-
-    println!("Graph built:");
-    println!("  Documents: {}", graph.node_count());
-    println!("  Edges:     {}", graph.edge_count());
-
-    graph
-}
-
-/// Explore nodes, edges, and relationship evidence.
-fn demo_explore(graph: &DocumentGraph) {
-    for doc_id in graph.doc_ids() {
-        let node = graph.get_node(doc_id).unwrap();
-        let neighbors = graph.get_neighbors(doc_id);
-
-        println!("[{}] {} ({} nodes)", node.doc_id, node.title, node.node_count);
-
-        // Show top keywords
-        let top_3: Vec<String> = node.top_keywords.iter()
-            .take(3)
-            .map(|kw| format!("{} ({:.2})", kw.keyword, kw.weight))
-            .collect();
-        println!("  Keywords: {}", top_3.join(", "));
-
-        // Show edges to other documents
-        if neighbors.is_empty() {
-            println!("  Edges: (none — isolated document)");
-        } else {
-            println!("  Edges:");
-            for edge in neighbors {
-                println!(
-                    "    -> {} [weight={:.3}, jaccard={:.3}, shared={}]",
-                    edge.target_doc_id,
-                    edge.weight,
-                    edge.evidence.keyword_jaccard,
-                    edge.evidence.shared_keyword_count,
-                );
-                // Show shared keywords
-                let shared: Vec<String> = edge.evidence.shared_keywords.iter()
-                    .map(|sk| format!("{} ({:.2}/{:.2})", sk.keyword, sk.source_weight, sk.target_weight))
-                    .collect();
-                println!("       Shared: {}", shared.join(", "));
-            }
-        }
-        println!();
-    }
-}
-
-/// Look up documents by keyword using the inverted index.
-fn demo_keyword_lookup(graph: &DocumentGraph) {
-    let queries = ["traits", "closures", "async", "pasta", "nonexistent"];
-
-    for kw in &queries {
-        let entries = graph.find_by_keyword(kw);
-        if entries.is_empty() {
-            println!("  '{}': not found in any document", kw);
-        } else {
-            let docs: Vec<String> = entries.iter()
-                .map(|e| format!("{} ({:.2})", e.doc_id, e.weight))
-                .collect();
-            println!("  '{}': found in {}", kw, docs.join(", "));
-        }
-    }
-}
-
-/// Show how graph-boosted retrieval works conceptually.
-fn demo_graph_boosted_retrieval(graph: &DocumentGraph) {
-    println!("Scenario: User queries 'traits and closures'");
-    println!();
-
-    // Step 1: Simulate per-document scores
-    let results = vec![
-        ("rust-guide".to_string(), 0.85),
-        ("async-guide".to_string(), 0.60),
-        ("testing-guide".to_string(), 0.55),
-        ("cooking".to_string(), 0.10),
-    ];
-
-    println!("Before graph boosting:");
-    for (doc, score) in &results {
-        println!("  {}: {:.3}", doc, score);
-    }
-
-    // Step 2: Apply graph boost — high-score docs boost their neighbors
-    let boost_factor = 0.15;
-    let mut boosted = results.clone();
-    for (doc, base_score) in &results {
-        if *base_score > 0.5 {
-            for edge in graph.get_neighbors(doc) {
-                for entry in boosted.iter_mut() {
-                    if entry.0 == edge.target_doc_id {
-                        let boost = boost_factor * edge.weight * base_score;
-                        entry.1 += boost;
-                    }
-                }
-            }
-        }
-    }
-    boosted.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
-
-    println!();
-    println!("After graph boosting (boost_factor={}):", boost_factor);
-    for (doc, score) in &boosted {
-        let delta = score - results.iter().find(|(d, _)| d == doc).unwrap().1;
-        println!("  {}: {:.3} (+{:.3})", doc, score, delta);
-    }
-
-    println!();
-    println!("Effect: Related documents (rust-guide, async-guide, testing-guide)");
-    println!("  boost each other via shared keywords, while 'cooking' stays low.");
-}
-
-// Helper to build keyword maps
-fn keywords(pairs: &[(&str, f32)]) -> HashMap<String, f32> {
-    pairs.iter().map(|&(k, w)| (k.to_string(), w)).collect()
-}
diff --git a/examples/rust/events.rs b/examples/rust/events.rs
index 706454fc..7d5c99c2 100644
--- a/examples/rust/events.rs
+++ b/examples/rust/events.rs
@@ -17,7 +17,8 @@
 use std::sync::Arc;
 use std::sync::atomic::{AtomicUsize, Ordering};
 
-use vectorless::client::{EngineBuilder, EventEmitter, IndexContext, IndexEvent, QueryEvent};
+use vectorless::client::{EngineBuilder, EventEmitter, IndexContext, QueryContext};
+use vectorless::client::events::{IndexEvent, QueryEvent};
 
 #[tokio::main]
 async fn main() -> Result<(), Box<dyn std::error::Error>> {
@@ -123,14 +124,15 @@ The event system uses handlers that can be attached to the engine builder.
     let doc_path = temp_dir.path().join("example.md");
     tokio::fs::write(&doc_path, doc_content).await?;
 
-    let doc_id = engine.index(IndexContext::from_path(&doc_path)).await?;
+    let index_result = engine.index(IndexContext::from_path(&doc_path)).await?;
+    let doc_id = index_result.doc_id().unwrap().to_string();
     println!();
 
     // 4. Query the document (events will fire)
     println!("Step 4: Querying document (watch events)...\n");
 
     let result = engine
-        .query(&doc_id, "What features are available?")
+        .query(QueryContext::new("What features are available?").with_doc_id(&doc_id))
         .await?;
     println!();
 
diff --git a/examples/rust/feedback_learning.rs b/examples/rust/feedback_learning.rs
deleted file mode 100644
index 3848e534..00000000
--- a/examples/rust/feedback_learning.rs
+++ /dev/null
@@ -1,143 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! Feedback Learning example.
-//!
-//! This example demonstrates how to use the feedback learning system
-//! to improve Pilot decision quality over time.
-//!
-//! # What you'll learn:
-//! - How to create a FeedbackStore for collecting feedback
-//! - How to integrate PilotLearner with LlmPilot
-//! - How to record user feedback for decisions
-//! - How the learner automatically adjusts decisions
-//!
-//! # Key concepts:
-//!
-//! ## Feedback Flow
-//! ```text
-//! Retrieval → Decision → User Feedback → FeedbackStore
-//!                ↑                              ↓
-//!                └──────── PilotLearner ────────┘
-//!                     (adjusts confidence)
-//! ```
-//!
-//! ## Learning Effect
-//! - High accuracy scenarios → Pilot confidence boosted
-//! - Low accuracy scenarios → Algorithm trusted more
-//! - Very low accuracy → Intervention skipped entirely
-
-use std::sync::Arc;
-use vectorless::llm::LlmClient;
-use vectorless::retrieval::pilot::{
-    FeedbackRecord, FeedbackStore, FeedbackStoreConfig, InterventionPoint, LearnerConfig,
-    PilotLearner, DecisionId, LlmPilot, PilotConfig,
-};
-
-fn main() -> Result<(), Box<dyn std::error::Error>> {
-    println!("=== Feedback Learning Example ===\n");
-
-    // 1. Create FeedbackStore with in-memory storage
-    let store = Arc::new(FeedbackStore::in_memory());
-    println!("✓ Created FeedbackStore (in-memory)");
-
-    // 2. Create Learner with custom configuration
-    let learner_config = LearnerConfig {
-        min_samples: 5,               // Need 5 samples before adjusting
-        high_accuracy_threshold: 0.8, // 80%+ accuracy = boost confidence
-        low_accuracy_threshold: 0.5,  // 50%- accuracy = reduce confidence
-        max_confidence_delta: 0.2,    // Max adjustment ±0.2
-    };
-    let learner = Arc::new(PilotLearner::with_config(store.clone(), learner_config));
-    println!("✓ Created PilotLearner with custom config");
-
-    // 3. Create LlmPilot with feedback learning
-    let client = LlmClient::for_model("gpt-4o-mini");
-    let pilot = LlmPilot::new(client, PilotConfig::default()).with_learner(learner.clone());
-    println!("✓ Created LlmPilot with feedback learner");
-
-    // 4. Simulate some retrieval operations with feedback
-    println!("\n=== Simulating Retrieval with Feedback ===\n");
-
-    // Simulate 10 retrieval operations
-    for i in 0..10 {
-        let decision_id = DecisionId(i);
-        let was_correct = i % 3 != 0; // 66% accuracy
-        let confidence = 0.7 + (i as f64 * 0.02);
-
-        // Create feedback record
-        let record = FeedbackRecord::new(
-            decision_id,
-            was_correct,
-            confidence,
-            InterventionPoint::Fork,
-            12345, // query_hash
-            67890, // path_hash
-        );
-
-        // Record feedback
-        pilot.record_feedback(record);
-
-        println!(
-            "Decision {}: {} (confidence: {:.2})",
-            i,
-            if was_correct { "✓ Correct" } else { "✗ Incorrect" },
-            confidence
-        );
-    }
-
-    // 5. View learning statistics
-    println!("\n=== Learning Statistics ===\n");
-
-    let stats = store.intervention_stats();
-    println!("Fork Point Statistics:");
-    println!("  Total decisions: {}", stats.fork.total);
-    println!("  Correct: {}", stats.fork.correct);
-    println!("  Accuracy: {:.1}%", stats.fork.accuracy() * 100.0);
-    println!(
-        "  Avg confidence (correct): {:.2}",
-        stats.fork.avg_confidence_correct
-    );
-    println!(
-        "  Avg confidence (incorrect): {:.2}",
-        stats.fork.avg_confidence_incorrect
-    );
-
-    let overall = store.overall_accuracy();
-    println!("\nOverall accuracy: {:.1}%", overall * 100.0);
-    println!("Total records: {}", store.total_records());
-
-    // 6. Check if learner has enough data
-    println!("\n=== Learner Status ===\n");
-    if learner.has_sufficient_data() {
-        println!("✓ Learner has sufficient data for adjustments");
-
-        // Get adjustment for similar context
-        let adjustment = learner.get_adjustment(InterventionPoint::Fork, 12345, 67890);
-        println!("\nAdjustment for similar context:");
-        println!("  Confidence delta: {:.3}", adjustment.confidence_delta);
-        println!("  Algorithm weight: {:.2}", adjustment.algorithm_weight);
-        println!(
-            "  Skip intervention: {}",
-            adjustment.skip_intervention
-        );
-    } else {
-        println!("✗ Learner needs more data before adjusting");
-    }
-
-    // 7. Demonstrate persistence (optional)
-    println!("\n=== Persistence (Optional) ===\n");
-
-    let persistent_config = FeedbackStoreConfig::with_persistence("/tmp/feedback.json");
-    let _persistent_store = FeedbackStore::new(persistent_config);
-
-    // In a real app, you would:
-    // - Load existing feedback at startup: persistent_store.load()?
-    // - Save periodically: persistent_store.persist()?
-
-    println!("To enable persistence, create FeedbackStore with:");
-    println!("  FeedbackStoreConfig::with_persistence(\"/path/to/feedback.json\")");
-
-    println!("\n=== Example Complete ===");
-    Ok(())
-}
diff --git a/examples/rust/html_parser.rs b/examples/rust/html_parser.rs
deleted file mode 100644
index e41aaea7..00000000
--- a/examples/rust/html_parser.rs
+++ /dev/null
@@ -1,291 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! HTML Parser Example.
-//!
-//! This example demonstrates how to parse HTML documents using vectorless.
-//!
-//! # Features
-//!
-//! - Parses HTML5 documents
-//! - Extracts heading hierarchy (h1-h6)
-//! - Extracts content from paragraphs, lists, tables
-//! - Extracts metadata from <head> (title, description, etc.)
-//!
-//! # Usage
-//!
-//! ```bash
-//! cargo run --example html_parser
-//! ```
-
-use vectorless::parser::{DocumentParser, HtmlConfig, HtmlParser};
-
-#[tokio::main]
-async fn main() -> vectorless::Result<()> {
-    println!("=== HTML Parser Example ===\n");
-
-    // 1. Basic HTML parsing
-    println!("--- Step 1: Basic HTML Parsing ---\n");
-    demo_basic_parsing().await?;
-
-    // 2. Parsing with metadata
-    println!("\n--- Step 2: HTML with Metadata ---\n");
-    demo_metadata_parsing().await?;
-
-    // 3. Complex HTML structure
-    println!("\n--- Step 3: Complex HTML Structure ---\n");
-    demo_complex_structure().await?;
-
-    // 4. Configuration options
-    println!("\n--- Step 4: Configuration Options ---\n");
-    demo_configuration().await?;
-
-    // 5. Integration with Engine
-    println!("\n--- Step 5: Integration with Engine ---\n");
-    demo_engine_integration();
-
-    println!("\n=== Done ===");
-    Ok(())
-}
-
-/// Demonstrate basic HTML parsing.
-async fn demo_basic_parsing() -> vectorless::Result<()> {
-    let parser = HtmlParser::new();
-    let html = r#"
-<!DOCTYPE html>
-<html>
-<head><title>Basic Document</title></head>
-<body>
-    <h1>Main Title</h1>
-    <p>This is the introduction paragraph.</p>
-
-    <h2>Section 1</h2>
-    <p>Content for section 1.</p>
-
-    <h2>Section 2</h2>
-    <p>Content for section 2.</p>
-    <h3>Subsection 2.1</h3>
-    <p>Detailed content here.</p>
-</body>
-</html>
-"#;
-
-    let result = parser.parse(html).await?;
-
-    println!("Document: {}", result.meta.name);
-    println!("Nodes extracted: {}\n", result.nodes.len());
-
-    for node in &result.nodes {
-        println!("  {} {} (level {})",
-            "•".repeat(node.level),
-            node.title,
-            node.level
-        );
-        if !node.content.is_empty() {
-            let preview: String = node.content.chars().take(50).collect();
-            println!("    Content: {}...", preview);
-        }
-    }
-
-    Ok(())
-}
-
-/// Demonstrate parsing HTML with metadata.
-async fn demo_metadata_parsing() -> vectorless::Result<()> {
-    let parser = HtmlParser::new();
-    let html = r#"
-<!DOCTYPE html>
-<html>
-<head>
-    <title>Technical Documentation</title>
-    <meta name="description" content="Complete guide to the API">
-    <meta name="author" content="Documentation Team">
-    <meta name="keywords" content="API, REST, documentation">
-    <meta property="og:description" content="Open Graph description">
-</head>
-<body>
-    <h1>API Reference</h1>
-    <p>Introduction to the API.</p>
-</body>
-</html>
-"#;
-
-    let result = parser.parse(html).await?;
-
-    println!("Metadata extracted:");
-    println!("  Title: {}", result.meta.name);
-    println!("  Description: {:?}", result.meta.description);
-    println!("  Format: {:?}", result.meta.format);
-    println!("  Lines: {}", result.meta.line_count);
-
-    Ok(())
-}
-
-/// Demonstrate parsing complex HTML structure.
-async fn demo_complex_structure() -> vectorless::Result<()> {
-    let parser = HtmlParser::new();
-    let html = r#"
-<!DOCTYPE html>
-<html>
-<body>
-    <h1>Complex Document</h1>
-
-    <h2>Lists</h2>
-    <ul>
-        <li>First item</li>
-        <li>Second item</li>
-        <li>Third item</li>
-    </ul>
-
-    <ol>
-        <li>Step one</li>
-        <li>Step two</li>
-        <li>Step three</li>
-    </ol>
-
-    <h2>Table</h2>
-    <table>
-        <tr><th>Name</th><th>Value</th></tr>
-        <tr><td>Option A</td><td>100</td></tr>
-        <tr><td>Option B</td><td>200</td></tr>
-    </table>
-
-    <h2>Code Block</h2>
-    <pre><code>fn main() {
-    println!("Hello, World!");
-}</code></pre>
-
-    <h2>Blockquote</h2>
-    <blockquote>
-        This is a quoted text from another source.
-        It can span multiple lines.
-    </blockquote>
-</body>
-</html>
-"#;
-
-    let result = parser.parse(html).await?;
-
-    println!("Nodes with complex content:\n");
-    for node in &result.nodes {
-        println!("  [Level {}] {}", node.level, node.title);
-        if node.content.contains("•") || node.content.contains("1.") {
-            println!("    → Contains list content");
-        }
-        if node.content.contains("|") {
-            println!("    → Contains table content");
-        }
-        if node.content.contains("```") {
-            println!("    → Contains code block");
-        }
-        if node.content.contains(">") {
-            println!("    → Contains blockquote");
-        }
-    }
-
-    Ok(())
-}
-
-/// Demonstrate configuration options.
-async fn demo_configuration() -> vectorless::Result<()> {
-    // Default configuration
-    let _default_parser = HtmlParser::new();
-    println!("Default config:");
-    println!("  - max_heading_level: 6");
-    println!("  - include_code_blocks: true");
-    println!("  - merge_small_nodes: true");
-    println!("  - min_content_length: 50\n");
-
-    // Custom configuration
-    let config = HtmlConfig::new()
-        .with_max_heading_level(3)  // Only h1-h3
-        .with_code_blocks(false)     // Exclude code
-        .with_min_content_length(20) // Smaller threshold
-        .with_default_title("Overview");
-
-    let custom_parser = HtmlParser::with_config(config);
-    println!("Custom config:");
-    println!("  - max_heading_level: 3");
-    println!("  - include_code_blocks: false");
-    println!("  - min_content_length: 20");
-    println!("  - default_title: \"Overview\"\n");
-
-    // Parse with custom config
-    let html = r#"
-<html>
-<body>
-    <h1>Title</h1>
-    <p>Short.</p>
-    <h4>This heading is ignored (level > 3)</h4>
-    <p>This content goes to parent.</p>
-</body>
-</html>
-"#;
-
-    let result = custom_parser.parse(html).await?;
-    println!("Nodes with max_level=3: {}", result.nodes.len());
-
-    // Show preset configs
-    println!("\nPreset configurations:");
-    let simple = HtmlConfig::simple();
-    println!("  HtmlConfig::simple():");
-    println!("    - merge_small_nodes: {}", simple.merge_small_nodes);
-    println!("    - min_content_length: {}", simple.min_content_length);
-
-    let no_code = HtmlConfig::no_code_blocks();
-    println!("  HtmlConfig::no_code_blocks():");
-    println!("    - include_code_blocks: {}", no_code.include_code_blocks);
-
-    Ok(())
-}
-
-/// Demonstrate integration with Engine.
-fn demo_engine_integration() {
-    println!("Integration with Engine:\n");
-
-    println!("```rust");
-    println!("use vectorless::{{EngineBuilder, IndexContext}};");
-    println!("use vectorless::parser::DocumentFormat;");
-    println!();
-    println!("# #[tokio::main]");
-    println!("# async fn main() -> vectorless::Result<()> {{");
-    println!("    let engine = EngineBuilder::new()");
-    println!("        .with_workspace(\"./workspace\")");
-    println!("        .build()");
-    println!("        .await?;");
-    println!();
-    println!("    // Method 1: From HTML file");
-    println!("    let doc_id = engine.index(");
-    println!("        IndexContext::from_path(\"./documentation.html\")");
-    println!("    ).await?;");
-    println!();
-    println!("    // Method 2: From HTML content");
-    println!("    let html = r#\"");
-    println!("<html>");
-    println!("<head><title>My Doc</title></head>");
-    println!("<body>");
-    println!("    <h1>Introduction</h1>");
-    println!("    <p>Content here...</p>");
-    println!("</body>");
-    println!("</html>");
-    println!("\"#;");
-    println!();
-    println!("    let doc_id = engine.index(");
-    println!("        IndexContext::from_content(html, DocumentFormat::Html)");
-    println!("            .with_name(\"my-document\")");
-    println!("    ).await?;");
-    println!();
-    println!("    // Query the indexed document");
-    println!("    let result = engine.query(&doc_id, \"What is the introduction?\").await?;");
-    println!("    println!(\"{{}}\", result.content);");
-    println!();
-    println!("    Ok(())");
-    println!("}}");
-    println!("```\n");
-
-    println!("Supported file extensions:");
-    println!("  - .html, .htm → HTML format");
-    println!("  - .md, .markdown → Markdown format");
-    println!("  - .pdf → PDF format");
-    println!("  - .docx → Word document");
-}
diff --git a/examples/rust/index.rs b/examples/rust/index.rs
deleted file mode 100644
index bd2b6aac..00000000
--- a/examples/rust/index.rs
+++ /dev/null
@@ -1,106 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! Index example - demonstrates the document indexing.
-//!
-//! This example shows how to:
-//! 1. Create an index pipeline executor
-//! 2. Configure pipeline options
-//! 3. Execute the pipeline on a document
-//! 4. Inspect the generated document tree
-//!
-//! # Usage
-//!
-//! ```bash
-//! cargo run --example index
-//! ```
-
-use vectorless::index::{IndexInput, PipelineExecutor, PipelineOptions};
-
-#[tokio::main]
-async fn main() -> vectorless::Result<()> {
-    println!("=== Index Pipeline Example ===\n");
-
-    // 1. Create pipeline executor
-    let mut executor = PipelineExecutor::new();
-    println!("✓ Created pipeline executor\n");
-
-    // 2. Configure pipeline options
-    let options = PipelineOptions::default();
-    println!("Pipeline options:");
-    println!("  - Generate IDs: {}", options.generate_ids);
-    println!("  - Generate description: {}", options.generate_description);
-    println!();
-
-    // 3. Create input from a file
-    let input = IndexInput::file("./README.md");
-    println!("Input: ./README.md\n");
-
-    // 4. Execute the pipeline
-    println!("Executing pipeline stages:");
-    println!("  [1/5] Parse     - Parse document into tree structure");
-    println!("  [2/5] Build     - Build document tree with metadata");
-    println!("  [3/5] Enhance   - Add ToC and section detection");
-    println!("  [4/5] Enrich    - Generate summaries for nodes");
-    println!("  [5/5] Optimize  - Optimize tree structure");
-    println!();
-
-    let result = executor.execute(input, options).await?;
-    println!("✓ Pipeline completed\n");
-
-    // 5. Inspect the result
-    println!("Results:");
-    println!("  - Document name: {}", result.name);
-
-    if let Some(ref description) = result.description {
-        let preview: String = description.chars().take(100).collect();
-        println!("  - Description: {}...", preview);
-    }
-
-    if let Some(ref tree) = result.tree {
-        println!("  - Tree nodes: {}", tree.node_count());
-        println!();
-
-        // Print tree structure (first 2 levels)
-        println!("Document structure:");
-        print_tree_structure(&tree, tree.root(), 0, 2);
-    }
-
-    if let Some(page_count) = result.page_count {
-        println!("\n  - Pages: {}", page_count);
-    }
-
-    println!("\n=== Done ===");
-    Ok(())
-}
-
-/// Print tree structure up to a maximum depth.
-fn print_tree_structure(
-    tree: &vectorless::document::DocumentTree,
-    node_id: vectorless::document::NodeId,
-    current_depth: usize,
-    max_depth: usize,
-) {
-    if current_depth > max_depth {
-        return;
-    }
-
-    let indent = "  ".repeat(current_depth);
-
-    if let Some(node) = tree.get(node_id) {
-        let children = tree.children(node_id);
-        let marker = if children.is_empty() {
-            "└─"
-        } else {
-            "├─"
-        };
-        println!(
-            "{}{} {} (depth: {})",
-            indent, marker, node.title, node.depth
-        );
-
-        for child_id in children {
-            print_tree_structure(tree, child_id, current_depth + 1, max_depth);
-        }
-    }
-}
diff --git a/examples/rust/markdownflow.rs b/examples/rust/markdownflow.rs
index 60e96f54..7d7988c0 100644
--- a/examples/rust/markdownflow.rs
+++ b/examples/rust/markdownflow.rs
@@ -19,8 +19,8 @@
 //! OPENAI_API_KEY=sk-... cargo run --example markdown_flow
 //! ```
 
-use vectorless::Engine;
-use vectorless::client::{IndexContext, IndexOptions};
+use vectorless::EngineBuilder;
+use vectorless::client::{IndexContext, IndexOptions, QueryContext};
 
 /// Sample markdown content for demonstration.
 const SAMPLE_MARKDOWN: &str = r#"
@@ -43,7 +43,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
     // Step 1: Create a Vectorless client (no API key needed - LLM config is automatic)
     println!("Step 1: Creating Vectorless client...");
 
-    let client = Engine::builder()
+    let client = EngineBuilder::new()
         .with_workspace("./workspace")
         .build()
         .await
@@ -62,29 +62,19 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
 
     // Check if we should generate summaries (requires API key)
     println!("  - API key detected, generating summaries...");
-    let doc_id = client
+    let index_result = client
         .index(IndexContext::from_path(&md_path).with_options(IndexOptions::new().with_summaries()))
         .await?;
+    let doc_id = index_result.doc_id().unwrap().to_string();
 
     println!("  - Document indexed successfully");
     println!("  - Document ID: {}", doc_id);
     println!();
 
-    // Step 3: Show document structure in JSON format
-    println!("Step 3: Document structure (JSON):");
-    println!();
-
-    match client.get_structure(&doc_id).await {
-        Ok(tree) => {
-            // Export to JSON format (PageIndex compatible)
-            let structure = tree.to_structure_json("sample.md");
-            let json = serde_json::to_string_pretty(&structure)
-                .unwrap_or_else(|_| "Failed to serialize".to_string());
-            println!("{}", json);
-        }
-        Err(e) => {
-            println!("  - Error getting structure: {}", e);
-        }
+    // Step 3: List indexed documents
+    println!("Step 3: Indexed documents:");
+    for doc in client.list().await? {
+        println!("  - {} ({})", doc.name, doc.id);
     }
     println!();
 
@@ -96,7 +86,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
     for query in queries {
         println!("  Query: \"{}\"", query);
 
-        match client.query(&doc_id, query).await {
+        match client.query(QueryContext::new(query).with_doc_id(&doc_id)).await {
             Ok(result) => {
                 if result.content.is_empty() {
                     println!("    - No relevant content found");
diff --git a/examples/rust/memo_cache.rs b/examples/rust/memo_cache.rs
deleted file mode 100644
index d4655189..00000000
--- a/examples/rust/memo_cache.rs
+++ /dev/null
@@ -1,264 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! MemoStore verification example.
-//!
-//! This example demonstrates the LLM memoization system working in a real scenario,
-//! showing cache hits/misses and cost savings.
-//!
-//! # Usage
-//!
-//! ```bash
-//! cargo run --example memo_cache
-//! ```
-//!
-//! # Environment
-//!
-//! Set OPENAI_API_KEY or ANTHROPIC_API_KEY for full functionality.
-//! The example will still run without API keys (using fallback mode).
-
-use chrono::Duration;
-use vectorless::memo::{MemoKey, MemoOpType, MemoStore, MemoValue};
-
-fn print_separator(title: &str) {
-    println!("\n{}", "=".repeat(60));
-    println!("  {}", title);
-    println!("{}", "=".repeat(60));
-}
-
-fn main() -> vectorless::Result<()> {
-    println!("=== MemoStore Verification Example ===\n");
-
-    // ============================================================
-    // Part 1: Basic MemoStore Operations
-    // ============================================================
-    print_separator("Part 1: Basic Operations");
-
-    let store = MemoStore::new()
-        .with_ttl(Duration::days(7))
-        .with_model("gpt-4o")
-        .with_version(1);
-
-    println!("Created MemoStore with:");
-    println!("  - TTL: 7 days");
-    println!("  - Model: gpt-4o");
-    println!("  - Version: 1");
-
-    // Create a summary cache key
-    let content = "This is a long document about machine learning...";
-    let content_fp = vectorless::utils::fingerprint::Fingerprint::from_str(content);
-    let key = MemoKey::summary(&content_fp).with_model("gpt-4o").with_version(1);
-
-    println!("\nCache key created:");
-    println!("  - Op type: {:?}", key.op_type);
-    println!("  - Input FP: {}", key.input_fp);
-
-    // Check cache (should miss)
-    println!("\nChecking cache (first time)...");
-    let cached = store.get(&key);
-    println!("  Cache hit: {}", cached.is_some());
-
-    // Store a value
-    println!("\nStoring summary...");
-    let summary = "Machine learning is a subset of AI that enables systems to learn from data.";
-    store.put_with_tokens(key.clone(), MemoValue::Summary(summary.to_string()), 500);
-    println!("  Stored: \"{}\"", summary);
-    println!("  Tokens saved estimate: 500");
-
-    // Check cache again (should hit)
-    println!("\nChecking cache (second time)...");
-    let cached = store.get(&key);
-    println!("  Cache hit: {}", cached.is_some());
-    if let Some(value) = cached {
-        println!("  Value: \"{}\"", value.as_summary().unwrap_or("(not a summary)"));
-    }
-
-    // ============================================================
-    // Part 2: Statistics Tracking
-    // ============================================================
-    print_separator("Part 2: Statistics Tracking");
-
-    // Create a new store for this demo
-    let store = MemoStore::with_capacity(100)
-        .with_model("gpt-4o-mini");
-
-    println!("Simulating cache usage...\n");
-
-    // Simulate 10 operations
-    let operations = [
-        ("doc1", "Content about Rust programming"),
-        ("doc2", "Introduction to machine learning"),
-        ("doc1", "Content about Rust programming"), // Repeat - should hit
-        ("doc3", "Deep learning fundamentals"),
-        ("doc2", "Introduction to machine learning"), // Repeat - should hit
-        ("doc1", "Content about Rust programming"), // Repeat - should hit
-        ("doc4", "Natural language processing"),
-        ("doc3", "Deep learning fundamentals"), // Repeat - should hit
-        ("doc5", "Computer vision basics"),
-        ("doc2", "Introduction to machine learning"), // Repeat - should hit
-    ];
-
-    let mut hits = 0u64;
-    let mut misses = 0u64;
-
-    for (i, (doc_id, content)) in operations.iter().enumerate() {
-        let content_fp = vectorless::utils::fingerprint::Fingerprint::from_str(content);
-        let key = MemoKey::summary(&content_fp);
-
-        if let Some(_value) = store.get(&key) {
-            hits += 1;
-            println!("  [{:2}] {} - CACHE HIT", i + 1, doc_id);
-        } else {
-            misses += 1;
-            println!("  [{:2}] {} - cache miss (storing...)", i + 1, doc_id);
-            store.put_with_tokens(key, MemoValue::Summary(format!("Summary of {}", content)), 100);
-        }
-    }
-
-    println!("\nStatistics:");
-    println!("  - Hits: {}", hits);
-    println!("  - Misses: {}", misses);
-    println!("  - Hit rate: {:.1}%", (hits as f64 / (hits + misses) as f64) * 100.0);
-
-    // ============================================================
-    // Part 3: Cache Invalidation
-    // ============================================================
-    print_separator("Part 3: Cache Invalidation");
-
-    let store = MemoStore::new().with_model("gpt-4o");
-
-    // Store different operation types
-    let fp1 = vectorless::utils::fingerprint::Fingerprint::from_str("content1");
-    let fp2 = vectorless::utils::fingerprint::Fingerprint::from_str("content2");
-
-    store.put(MemoKey::summary(&fp1), MemoValue::Summary("Summary 1".to_string()));
-    store.put(MemoKey::summary(&fp2), MemoValue::Summary("Summary 2".to_string()));
-    store.put(
-        MemoKey::pilot_decision(&fp1, &fp2),
-        MemoValue::PilotDecision(vectorless::memo::PilotDecisionValue {
-            selected_idx: 0,
-            confidence: 0.9,
-            reasoning: "Test decision".to_string(),
-        }),
-    );
-
-    println!("Stored 3 entries:");
-    println!("  - 2 Summary entries");
-    println!("  - 1 PilotDecision entry");
-    println!("  - Total: {} entries", store.len());
-
-    // Invalidate by operation type
-    println!("\nInvalidating all Summary entries...");
-    let removed = store.invalidate_by_op_type(MemoOpType::Summary);
-    println!("  Removed: {} entries", removed);
-    println!("  Remaining: {} entries", store.len());
-
-    // ============================================================
-    // Part 4: Persistence
-    // ============================================================
-    print_separator("Part 4: Persistence");
-
-    let temp_dir = tempfile::TempDir::new().expect("Failed to create temp dir");
-    let cache_path = temp_dir.path().join("memo_cache.json");
-
-    println!("Cache path: {:?}", cache_path);
-
-    // Create and populate store
-    let store = MemoStore::new().with_model("gpt-4o");
-
-    for i in 0..5 {
-        let content = format!("Document content {}", i);
-        let fp = vectorless::utils::fingerprint::Fingerprint::from_str(&content);
-        store.put(
-            MemoKey::summary(&fp),
-            MemoValue::Summary(format!("Summary {}", i)),
-        );
-    }
-    println!("Created store with {} entries", store.len());
-
-    // Note: save/load are async, skip for this sync example
-    println!("\n(Async save/load skipped in sync example)");
-    println!("Use store.save(&path).await and store.load(&path).await in async context");
-
-    // ============================================================
-    // Part 5: Real-World Scenario Simulation
-    // ============================================================
-    print_separator("Part 5: Real-World Scenario");
-
-    println!("Simulating a document query session...\n");
-
-    let store = MemoStore::new()
-        .with_ttl(Duration::hours(24))
-        .with_model("gpt-4o-mini");
-
-    // Simulate multiple queries to the same document
-    let document_content = r#"
-        # Vectorless Documentation
-
-        Vectorless is a hierarchical, reasoning-native document intelligence engine.
-        It provides tree-based document understanding without vector databases.
-
-        ## Features
-        - Multi-format parsing (Markdown, PDF, DOCX)
-        - LLM-powered summarization
-        - Adaptive retrieval strategies
-    "#;
-
-    let doc_fp = vectorless::utils::fingerprint::Fingerprint::from_str(document_content);
-
-    // Simulate query context fingerprints
-    let queries = [
-        ("What is Vectorless?", 0.85),
-        ("How does it work?", 0.72),
-        ("What formats are supported?", 0.91),
-        ("What is Vectorless?", 0.85),  // Repeat
-        ("How does it work?", 0.72),    // Repeat
-    ];
-
-    println!("Processing {} queries...\n", queries.len());
-
-    for (i, (query, confidence)) in queries.iter().enumerate() {
-        let query_fp = vectorless::utils::fingerprint::Fingerprint::from_str(query);
-        let key = MemoKey::pilot_decision(&doc_fp, &query_fp);
-
-        if let Some(_value) = store.get(&key) {
-            println!("  [{:2}] \"{}\" - CACHED (confidence: {:.2})", i + 1, query, confidence);
-        } else {
-            println!("  [{:2}] \"{}\" - Computing... (confidence: {:.2})", i + 1, query, confidence);
-            store.put_with_tokens(
-                key,
-                MemoValue::PilotDecision(vectorless::memo::PilotDecisionValue {
-                    selected_idx: 0,
-                    confidence: *confidence as f32,
-                    reasoning: format!("Reasoning for: {}", query),
-                }),
-                150, // ~150 tokens per pilot decision
-            );
-        }
-    }
-
-    // Final statistics
-    // Note: get() updates entry-level hits, but global stats are only
-    // updated by get_or_compute(). For accurate global stats, use get_or_compute.
-    println!("\n=== Final Statistics ===");
-    println!("  Cache entries: {}", store.len());
-    println!("\nNote: Global stats (hits/misses/tokens_saved) are tracked by");
-    println!("get_or_compute(), not by direct get() calls. For accurate tracking,");
-    println!("use get_or_compute() in production code.");
-
-    // Cost estimation (based on manual tracking above)
-    let manual_hits = 2u64; // Queries 4 and 5 were cache hits
-    let tokens_per_decision = 150u64;
-    let tokens_saved = manual_hits * tokens_per_decision;
-    let cost_per_1k_tokens = 0.0015; // GPT-4o-mini input
-    let saved_cost = (tokens_saved as f64 / 1000.0) * cost_per_1k_tokens;
-    println!("\n  Manual calculation:");
-    println!("    Cache hits: {}", manual_hits);
-    println!("    Tokens saved: {}", tokens_saved);
-    println!("    Estimated cost saved: ${:.4}", saved_cost);
-
-    println!("\n=== Verification Complete ===");
-    println!("MemoStore is working correctly!");
-
-    Ok(())
-}
diff --git a/examples/rust/multi_format.rs b/examples/rust/multi_format.rs
deleted file mode 100644
index f146b851..00000000
--- a/examples/rust/multi_format.rs
+++ /dev/null
@@ -1,77 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! Multi-format document processing example.
-//!
-//! This example demonstrates how to work with different
-//! document formats (Markdown, PDF, DOCX, HTML).
-//!
-//! # What you'll learn:
-//! - How to index documents of different formats
-//! - How format detection works
-//! - How to configure format-specific parsing options
-//! - How to handle mixed-format document sets
-//!
-//! # Supported formats:
-//! - **Markdown** (.md): Full support with ToC extraction
-//! - **PDF** (.pdf): Text extraction, structure inference
-//! - **DOCX** (.docx): Word document parsing
-//! - **HTML** (.html, .htm): Web page parsing (planned)
-//! - **Plain text** (.txt): Basic text parsing (planned)
-//!
-//! # Format-specific considerations:
-//!
-//! ## Markdown
-//! - Best format for structured documents
-//! - Automatic heading hierarchy detection
-//! - Code block handling
-//!
-//! ## PDF
-//! - Text extraction quality varies
-//! - No explicit structure (inferred from fonts/spacing)
-//! - Tables and images not supported
-//!
-//! ## DOCX
-//! - Good structure preservation
-//! - Styles mapped to hierarchy
-//! - Limited formatting support
-//!
-//! # TODO: Implementation steps
-//!
-//! 1. Detect document format from extension or content
-//! 2. Configure format-specific parser options
-//! 3. Index documents of mixed formats
-//! 4. Query across all formats
-
-// TODO: Implement multi-format example
-// ```
-// use vectorless::client::{Engine, EngineBuilder};
-// use vectorless::parser::DocumentFormat;
-//
-// async fn index_multiple_formats(engine: &Engine) {
-//     // Index different formats
-//     let md_doc = engine.index("./README.md").await?;
-//     let pdf_doc = engine.index("./paper.pdf").await?;
-//     let docx_doc = engine.index("./report.docx").await?;
-//
-//     // Query works across all formats
-//     let result = engine.query(&md_doc, "What is this about?").await?;
-// }
-// ```
-
-fn main() {
-    // TODO: Show multi-format indexing and querying
-    //
-    // // Index documents of different formats
-    // let md_id = engine.index("./docs/guide.md").await?;
-    // let pdf_id = engine.index("./docs/paper.pdf").await?;
-    // let docx_id = engine.index("./docs/report.docx").await?;
-    //
-    // // Each can be queried independently
-    // for doc_id in &[md_id, pdf_id, docx_id] {
-    //     let result = engine.query(doc_id, "summary").await?;
-    //     println!("Result: {}", result.content);
-    // }
-
-    println!("TODO: Implement multi_format example");
-}
diff --git a/examples/rust/reference_following.rs b/examples/rust/reference_following.rs
deleted file mode 100644
index 1f95cbf8..00000000
--- a/examples/rust/reference_following.rs
+++ /dev/null
@@ -1,191 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! Reference Following Example
-//!
-//! This example demonstrates the reference following feature which allows
-//! the retrieval system to follow in-document references like
-//! "see Appendix G" or "refer to Table 5.3".
-//!
-//! # What you'll learn:
-//! - How references are extracted from document content
-//! - How references are resolved to actual nodes
-//! - How to use ReferenceFollower to expand search results
-//!
-//! # Key concepts:
-//!
-//! ## Reference Types
-//! - Section: "see Section 2.1", "Section 3.2.1"
-//! - Appendix: "see Appendix G", "Appendix A"
-//! - Table: "Table 5.3", "refer to Table 1"
-//! - Figure: "Figure 2.1", "fig. 3"
-//! - Page: "see page 42", "p. 15"
-//!
-//! ## Resolution Flow
-//! ```text
-//! Extract References → Resolve to Nodes → Follow → Expand Context
-//! ```
-
-use vectorless::document::{
-    DocumentTree, ReferenceExtractor,
-};
-use vectorless::retrieval::{
-    expand_with_references, ReferenceConfig, ReferenceFollower,
-};
-
-fn main() {
-    println!("=== Reference Following Example ===\n");
-
-    // 1. Create a document tree with references
-    let tree = create_document_with_references();
-    println!("Created document tree with {} nodes\n", tree.node_count());
-
-    // 2. Build retrieval index
-    let index = tree.build_retrieval_index();
-    println!("Built retrieval index\n");
-
-    // 3. Demonstrate reference extraction
-    println!("--- Reference Extraction ---\n");
-
-    let content = "For more details, see Section 2.1 and Appendix G. The data is shown in Table 5.3.";
-    let refs = ReferenceExtractor::extract(content);
-
-    println!("Content: \"{}\"\n", content);
-    println!("Extracted {} references:", refs.len());
-    for r#ref in &refs {
-        println!(
-            "  - {:?}: '{}' -> target '{}'",
-            r#ref.ref_type, r#ref.ref_text, r#ref.target_id
-        );
-    }
-    println!();
-
-    // 4. Demonstrate reference resolution
-    println!("--- Reference Resolution ---\n");
-
-    let resolved_refs = ReferenceExtractor::extract_and_resolve(content, &tree, &index);
-    println!("Resolved references:");
-    for r#ref in &resolved_refs {
-        let status = if r#ref.is_resolved() {
-            format!("resolved (confidence: {:.2})", r#ref.confidence)
-        } else {
-            "unresolved".to_string()
-        };
-        println!(
-            "  - {:?}: '{}' -> {}",
-            r#ref.ref_type, r#ref.target_id, status
-        );
-    }
-    println!();
-
-    // 5. Demonstrate reference following
-    println!("--- Reference Following ---\n");
-
-    let config = ReferenceConfig {
-        max_depth: 3,
-        max_references: 10,
-        follow_pages: true,
-        follow_tables_figures: true,
-        min_confidence: 0.3,
-        ..Default::default()
-    };
-    let follower = ReferenceFollower::new(config);
-
-    // Get the financial section node (which contains references)
-    let financial_node = find_node_by_title(&tree, "Financial Summary");
-    if let Some(node_id) = financial_node {
-        let followed = follower.follow_from_node(&tree, &index, node_id);
-
-        println!("Following references from 'Financial Summary':");
-        for f in &followed {
-            let target = if let Some(target_id) = f.target_node {
-                let title = tree.get(target_id).map(|n| n.title.as_str()).unwrap_or("?");
-                format!("-> '{}' (depth {})", title, f.depth)
-            } else {
-                "-> (unresolved)".to_string()
-            };
-            println!(
-                "  - {:?} '{}' {}",
-                f.reference.ref_type, f.reference.target_id, target
-            );
-        }
-    }
-    println!();
-
-    // 6. Demonstrate expansion with references
-    println!("--- Expansion with References ---\n");
-
-    let initial_nodes: Vec<_> = tree.children(tree.root());
-    println!("Initial nodes: {} (root's children)", initial_nodes.len());
-
-    let expansion = expand_with_references(&tree, &index, &initial_nodes, None);
-
-    println!(
-        "After reference expansion: {} total nodes, {} new",
-        expansion.all_nodes().len(),
-        expansion.expanded_nodes.len()
-    );
-
-    if expansion.has_expansion() {
-        println!("\nExpanded nodes:");
-        for node_id in expansion.new_nodes() {
-            if let Some(node) = tree.get(*node_id) {
-                println!("  - {}", node.title);
-            }
-        }
-    }
-    println!();
-
-    // 7. Show configuration options
-    println!("--- Configuration Options ---\n");
-
-    let conservative = ReferenceConfig::conservative();
-    let aggressive = ReferenceConfig::aggressive();
-
-    println!("Conservative config:");
-    println!("  - Max depth: {}", conservative.max_depth);
-    println!("  - Max references: {}", conservative.max_references);
-
-    println!("\nAggressive config:");
-    println!("  - Max depth: {}", aggressive.max_depth);
-    println!("  - Max references: {}", aggressive.max_references);
-
-    println!("\n=== Done ===");
-}
-
-fn create_document_with_references() -> DocumentTree {
-    let mut tree = DocumentTree::new("Annual Report", "Company annual financial report.");
-
-    // Main sections
-    let _intro = tree.add_child(tree.root(), "Introduction", "Overview of the report.");
-    let financial = tree.add_child(
-        tree.root(),
-        "Financial Summary",
-        "Financial overview for 2023. For detailed breakdown, see Section 2.1. Revenue data is in Table 5.3. Additional details in Appendix G.",
-    );
-    let _appendix = tree.add_child(
-        tree.root(),
-        "Appendix G",
-        "Detailed financial tables and data.",
-    );
-
-    // Subsections
-    tree.add_child(
-        financial,
-        "2.1 Revenue",
-        "Revenue increased by 15% year over year. See Table 5.3 for breakdown.",
-    );
-
-    tree
-}
-
-fn find_node_by_title(tree: &DocumentTree, title: &str) -> Option<vectorless::document::NodeId> {
-    for node_id in tree.traverse() {
-        if let Some(node) = tree.get(node_id) {
-            if node.title == title {
-                return Some(node_id);
-            }
-        }
-    }
-    None
-}
diff --git a/examples/rust/retrieve.rs b/examples/rust/retrieve.rs
deleted file mode 100644
index 62e5ff73..00000000
--- a/examples/rust/retrieve.rs
+++ /dev/null
@@ -1,263 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! Retrieve example - demonstrates the retrieval pipeline.
-//!
-//! This example shows how to:
-//! 1. Create a pipeline retriever
-//! 2. Configure retrieval options
-//! 3. Execute retrieval queries
-//! 4. Use the orchestrator for advanced control
-//!
-//! # Usage
-//!
-//! ```bash
-//! cargo run --example retrieve
-//! ```
-
-use std::sync::Arc;
-use vectorless::document::DocumentTree;
-use vectorless::retrieval::{
-    PipelineRetriever, RetrieveOptions, Retriever, StrategyPreference,
-    pipeline::RetrievalOrchestrator,
-    stages::{AnalyzeStage, EvaluateStage, PlanStage, SearchStage},
-};
-
-#[tokio::main]
-async fn main() -> vectorless::Result<()> {
-    println!("=== Retrieval Pipeline Example ===\n");
-
-    // 1. Create a sample document tree
-    let tree = create_sample_tree();
-    println!(
-        "✓ Created sample document tree ({} nodes)\n",
-        tree.node_count()
-    );
-
-    // 2. Method A: Use PipelineRetriever (simple API)
-    println!("--- Method A: PipelineRetriever (Simple API) ---\n");
-    demo_pipeline_retriever(&tree).await?;
-
-    // 3. Method B: Use RetrievalOrchestrator directly (advanced API)
-    println!("\n--- Method B: RetrievalOrchestrator (Advanced API) ---\n");
-    demo_orchestrator(&tree).await?;
-
-    println!("\n=== Done ===");
-    Ok(())
-}
-
-/// Demonstrate PipelineRetriever (simple API).
-async fn demo_pipeline_retriever(tree: &DocumentTree) -> vectorless::Result<()> {
-    // Create retriever with configuration
-    let retriever = PipelineRetriever::new()
-        .with_max_backtracks(5)
-        .with_max_iterations(10);
-
-    println!("PipelineRetriever configuration:");
-    println!("  - Max backtracks: 5");
-    println!("  - Max iterations: 10");
-    println!();
-
-    // Configure retrieval options
-    let options = RetrieveOptions {
-        top_k: 5,
-        beam_width: 3,
-        max_iterations: 5,
-        max_tokens: 4000,
-        sufficiency_check: true,
-        include_content: true,
-        include_summaries: true,
-        strategy: StrategyPreference::Auto,
-        ..Default::default()
-    };
-
-    println!("RetrieveOptions:");
-    println!("  - Top K: {}", options.top_k);
-    println!("  - Beam width: {}", options.beam_width);
-    println!("  - Max tokens: {}", options.max_tokens);
-    println!("  - Sufficiency check: {}", options.sufficiency_check);
-    println!();
-
-    // Execute query
-    let query = "What is the main architecture?";
-    println!("Query: \"{}\"\n", query);
-
-    let response = retriever
-        .retrieve(tree, query, &options)
-        .await
-        .map_err(|e| vectorless::Error::Retrieval(e.to_string()))?;
-
-    // Display results
-    println!("Results:");
-    println!("  - Is sufficient: {}", response.is_sufficient);
-    println!("  - Confidence: {:.2}", response.confidence);
-    println!("  - Strategy used: {}", response.strategy_used);
-    println!("  - Tokens used: {}", response.tokens_used);
-    println!("  - Results count: {}", response.results.len());
-
-    if !response.results.is_empty() {
-        println!("\n  Top results:");
-        for (i, result) in response.results.iter().take(3).enumerate() {
-            println!(
-                "    {}. {} (score: {:.2})",
-                i + 1,
-                result.title,
-                result.score
-            );
-        }
-    }
-
-    Ok(())
-}
-
-/// Demonstrate RetrievalOrchestrator (advanced API).
-async fn demo_orchestrator(tree: &DocumentTree) -> vectorless::Result<()> {
-    // Build orchestrator with explicit stages
-    let mut orchestrator = RetrievalOrchestrator::new()
-        .with_max_backtracks(3)
-        .with_max_iterations(5)
-        .stage(AnalyzeStage::new())
-        .stage(PlanStage::new())
-        .stage(SearchStage::new())
-        .stage(EvaluateStage::new());
-
-    println!("Orchestrator stages:");
-    if let Ok(names) = orchestrator.stage_names() {
-        for (i, name) in names.iter().enumerate() {
-            println!("  {}. {}", i + 1, name);
-        }
-    }
-    println!();
-
-    // Get execution groups (shows parallel potential)
-    if let Ok(groups) = orchestrator.get_execution_groups() {
-        println!("Execution groups: {} groups", groups.len());
-        for (i, group) in groups.iter().enumerate() {
-            let parallel = if group.parallel {
-                " (can parallelize)"
-            } else {
-                ""
-            };
-            println!(
-                "  Group {}: {} stages{}",
-                i,
-                group.stage_indices.len(),
-                parallel
-            );
-        }
-    }
-    println!();
-
-    // Execute query
-    let query = "How does the pipeline work?";
-    println!("Query: \"{}\"\n", query);
-
-    let options = RetrieveOptions::default();
-    let tree_arc = Arc::new(tree.clone());
-    let response = orchestrator
-        .execute(tree_arc, query, options)
-        .await
-        .map_err(|e| vectorless::Error::Retrieval(e.to_string()))?;
-
-    println!("Results:");
-    println!("  - Is sufficient: {}", response.is_sufficient);
-    println!("  - Confidence: {:.2}", response.confidence);
-    println!("  - Complexity: {:?}", response.complexity);
-    println!("  - Reasoning steps: {}", response.reasoning_chain.len());
-
-    if !response.reasoning_chain.is_empty() {
-        println!("\n  Reasoning chain:");
-        for (i, step) in response.reasoning_chain.steps.iter().take(5).enumerate() {
-            let title = step.title.as_deref().unwrap_or("(no node)");
-            println!(
-                "    {}. [{}] {} (score: {:.2}): {}",
-                i + 1, step.stage, title, step.score, step.reasoning
-            );
-        }
-    }
-
-    Ok(())
-}
-
-/// Create a sample document tree for demonstration.
-fn create_sample_tree() -> DocumentTree {
-    let mut tree = DocumentTree::new(
-        "Vectorless Documentation",
-        "A hierarchical document intelligence engine written in Rust.",
-    );
-
-    // Add sections using the correct API
-    let _intro = tree.add_child(
-        tree.root(),
-        "Introduction",
-        "Vectorless is a document intelligence engine written in Rust.",
-    );
-
-    let arch = tree.add_child(
-        tree.root(),
-        "Architecture",
-        "The system consists of three main components: indexer, retriever, and storage.",
-    );
-
-    let index_section = tree.add_child(
-        arch,
-        "Index Pipeline",
-        "The index pipeline processes documents into a tree structure with summaries.",
-    );
-    let retrieve_section = tree.add_child(
-        arch,
-        "Retrieval Pipeline",
-        "The retrieval pipeline finds relevant content using multi-stage processing.",
-    );
-
-    tree.add_child(
-        index_section,
-        "Parse Stage",
-        "Parses documents (Markdown, PDF, DOCX) into structured content.",
-    );
-    tree.add_child(
-        index_section,
-        "Build Stage",
-        "Builds the document tree with metadata like page numbers and indices.",
-    );
-    tree.add_child(
-        index_section,
-        "Enrich Stage",
-        "Generates AI summaries for tree nodes using LLM.",
-    );
-
-    tree.add_child(
-        retrieve_section,
-        "Analyze Stage",
-        "Analyzes query complexity and extracts keywords for matching.",
-    );
-    tree.add_child(
-        retrieve_section,
-        "Plan Stage",
-        "Selects retrieval strategy (keyword/semantic/LLM) and search algorithm.",
-    );
-    tree.add_child(
-        retrieve_section,
-        "Search Stage",
-        "Executes tree traversal (greedy/beam/MCTS) to find relevant content.",
-    );
-    tree.add_child(
-        retrieve_section,
-        "Judge Stage",
-        "Evaluates sufficiency of collected content, can trigger backtracking.",
-    );
-
-    let usage = tree.add_child(tree.root(), "Usage", "How to use the vectorless library.");
-    tree.add_child(
-        usage,
-        "Basic Example",
-        "Simple usage with default configuration and workspace.",
-    );
-    tree.add_child(
-        usage,
-        "Advanced Example",
-        "Custom pipeline configuration with LLM and custom stages.",
-    );
-
-    tree
-}
diff --git a/examples/rust/session.rs b/examples/rust/session.rs
deleted file mode 100644
index d5cfd68d..00000000
--- a/examples/rust/session.rs
+++ /dev/null
@@ -1,205 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! Session-based multi-document operations example.
-//!
-//! This example demonstrates the Session API for:
-//! - Managing multiple documents in a single session
-//! - Cross-document queries
-//! - Session caching for improved performance
-//! - Session statistics
-//!
-//! # Usage
-//!
-//! ```bash
-//! cargo run --example session
-//! ```
-
-use vectorless::client::{EngineBuilder, IndexContext};
-
-#[tokio::main]
-async fn main() -> Result<(), Box<dyn std::error::Error>> {
-    println!("=== Session-Based Multi-Document Example ===\n");
-
-    // 1. Create the engine
-    println!("Step 1: Creating engine...");
-    let engine = EngineBuilder::new()
-        .with_workspace("./workspace_session_example")
-        .build()
-        .await
-        .map_err(|e: vectorless::BuildError| vectorless::Error::Config(e.to_string()))?;
-    println!("  ✓ Engine created\n");
-
-    // 2. Create a session for multi-document operations
-    println!("Step 2: Creating session...");
-    let session = engine.session().await;
-    println!("  ✓ Session ID: {}\n", session.id());
-
-    // 3. Index multiple documents into the session
-    println!("Step 3: Indexing documents...");
-
-    // Create sample documents
-    let temp_dir = tempfile::tempdir()?;
-
-    let doc1_content = r#"# Architecture Guide
-
-## Overview
-
-Vectorless uses a tree-based architecture for document navigation.
-
-## Components
-
-- **Indexer**: Parses documents and builds tree structure
-- **Retriever**: Navigates tree to find relevant content
-- **Workspace**: Manages document persistence
-"#;
-
-    let doc2_content = r#"# API Reference
-
-## Engine
-
-The main entry point for vectorless operations.
-
-### Methods
-
-- `index(path)`: Index a document
-- `query(doc_id, question)`: Query a document
-- `list_documents()`: List all documents
-
-## Session
-
-Multi-document operations with caching.
-
-### Methods
-
-- `index(path)`: Index into session
-- `query(doc_id, question)`: Query cached document
-- `query_all(question)`: Query across all documents
-"#;
-
-    let doc3_content = r#"# Configuration Guide
-
-## Workspace Settings
-
-The workspace directory stores indexed documents.
-
-```toml
-[storage]
-workspace_dir = "./workspace"
-```
-
-## Retrieval Settings
-
-Configure retrieval behavior:
-
-```toml
-[retrieval]
-top_k = 5
-max_tokens = 4000
-```
-
-## Content Aggregator
-
-Control content aggregation:
-
-```toml
-[retrieval.content]
-enabled = true
-token_budget = 4000
-```
-"#;
-
-    // Write sample documents
-    let doc1_path = temp_dir.path().join("architecture.md");
-    let doc2_path = temp_dir.path().join("api.md");
-    let doc3_path = temp_dir.path().join("config.md");
-
-    tokio::fs::write(&doc1_path, doc1_content).await?;
-    tokio::fs::write(&doc2_path, doc2_content).await?;
-    tokio::fs::write(&doc3_path, doc3_content).await?;
-
-    // Index into session
-    let doc1_id = session.index(IndexContext::from_path(&doc1_path)).await?;
-    println!("  ✓ Indexed: architecture.md -> {}", &doc1_id[..8]);
-
-    let doc2_id = session.index(IndexContext::from_path(&doc2_path)).await?;
-    println!("  ✓ Indexed: api.md -> {}", &doc2_id[..8]);
-
-    let doc3_id = session.index(IndexContext::from_path(&doc3_path)).await?;
-    println!("  ✓ Indexed: config.md -> {}", &doc3_id[..8]);
-    println!();
-
-    // 4. List documents in session
-    println!("Step 4: Session documents:");
-    for doc in session.list_documents() {
-        println!("  - {} ({})", doc.name, &doc.id[..8]);
-    }
-    println!();
-
-    // 5. Query individual documents (uses cache)
-    println!("Step 5: Query individual documents...");
-    let query = "What methods are available?";
-
-    println!("  Query: \"{}\"", query);
-    let start = std::time::Instant::now();
-    let result = session.query(&doc2_id, query).await?;
-    let elapsed = start.elapsed();
-    println!("    - Time: {:?}", elapsed);
-    println!("    - Score: {:.2}", result.score);
-    if !result.content.is_empty() {
-        let preview: String = result.content.chars().take(100).collect();
-        println!("    - Preview: {}...", preview);
-    }
-    println!();
-
-    // 6. Query same document again (should be faster due to cache)
-    println!("Step 6: Query cached document (should be faster)...");
-    let start = std::time::Instant::now();
-    let result = session.query(&doc2_id, "How to list documents?").await?;
-    let cached_elapsed = start.elapsed();
-    println!("    - Time: {:?}", cached_elapsed);
-    println!("    - Score: {:.2}", result.score);
-    println!();
-
-    // 7. Query across all documents
-    println!("Step 7: Cross-document query...");
-    let query = "How to configure the workspace?";
-    println!("  Query: \"{}\"", query);
-
-    let results = session.query_all(query).await?;
-    println!("  Found {} relevant documents:", results.len());
-
-    for (i, result) in results.iter().enumerate() {
-        println!(
-            "    {}. {} (score: {:.2})",
-            i + 1,
-            &result.doc_id[..8],
-            result.score
-        );
-    }
-    println!();
-
-    // 8. Show session statistics
-    println!("Step 8: Session statistics:");
-    let stats = session.stats();
-    println!("  - Documents: {}", session.list_documents().len());
-    println!("  - Queries: {}", stats.query_count.get());
-    println!("  - Cache hits: {}", stats.cache_hits.get());
-    println!("  - Cache misses: {}", stats.cache_misses.get());
-    println!("  - Cache hit rate: {:.1}%", stats.cache_hit_rate() * 100.0);
-    if let Some(avg_time) = stats.avg_query_time() {
-        println!("  - Avg query time: {:?}", avg_time);
-    }
-    println!("  - Session age: {:?}", session.age());
-    println!();
-
-    // 9. Cleanup
-    println!("Step 9: Cleanup...");
-    engine.remove(&doc1_id).await?;
-    engine.remove(&doc2_id).await?;
-    engine.remove(&doc3_id).await?;
-    println!("  ✓ Documents removed\n");
-
-    println!("=== Example Complete ===");
-    Ok(())
-}
diff --git a/examples/rust/storage_backend.rs b/examples/rust/storage_backend.rs
deleted file mode 100644
index a239013c..00000000
--- a/examples/rust/storage_backend.rs
+++ /dev/null
@@ -1,140 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! Custom storage backend example.
-//!
-//! This example shows how to implement a custom StorageBackend.
-//! Useful for integrating with databases, cloud storage, etc.
-//!
-//! # Usage
-//!
-//! ```bash
-//! cargo run --example storage_backend
-//! ```
-
-use std::collections::HashMap;
-use std::sync::{Arc, RwLock};
-
-use vectorless::Result;
-use vectorless::document::DocumentTree;
-use vectorless::storage::{DocumentMeta, PersistedDocument, StorageBackend, Workspace};
-
-/// A simple in-memory backend with logging.
-///
-/// This demonstrates how to implement StorageBackend trait.
-/// In production, you might implement S3, PostgreSQL, Redis, etc.
-#[derive(Debug)]
-struct LoggingMemoryBackend {
-    name: &'static str,
-    data: RwLock<HashMap<String, Vec<u8>>>,
-}
-
-impl LoggingMemoryBackend {
-    fn new(name: &'static str) -> Self {
-        Self {
-            name,
-            data: RwLock::new(HashMap::new()),
-        }
-    }
-}
-
-impl StorageBackend for LoggingMemoryBackend {
-    fn get(&self, key: &str) -> Result<Option<Vec<u8>>> {
-        let data = self.data.read().unwrap();
-        let result = data.get(key).cloned();
-        println!(
-            "   [{}] GET '{}' -> {}",
-            self.name,
-            key,
-            if result.is_some() {
-                "found"
-            } else {
-                "not found"
-            }
-        );
-        Ok(result)
-    }
-
-    fn put(&self, key: &str, value: &[u8]) -> Result<()> {
-        let mut data = self.data.write().unwrap();
-        data.insert(key.to_string(), value.to_vec());
-        println!("   [{}] PUT '{}' ({} bytes)", self.name, key, value.len());
-        Ok(())
-    }
-
-    fn delete(&self, key: &str) -> Result<bool> {
-        let mut data = self.data.write().unwrap();
-        let existed = data.remove(key).is_some();
-        println!("   [{}] DELETE '{}' -> {}", self.name, key, existed);
-        Ok(existed)
-    }
-
-    fn exists(&self, key: &str) -> Result<bool> {
-        let data = self.data.read().unwrap();
-        Ok(data.contains_key(key))
-    }
-
-    fn keys(&self) -> Result<Vec<String>> {
-        let data = self.data.read().unwrap();
-        Ok(data.keys().cloned().collect())
-    }
-
-    fn len(&self) -> Result<usize> {
-        let data = self.data.read().unwrap();
-        Ok(data.len())
-    }
-
-    fn clear(&self) -> Result<()> {
-        let mut data = self.data.write().unwrap();
-        data.clear();
-        println!("   [{}] CLEAR", self.name);
-        Ok(())
-    }
-
-    fn backend_name(&self) -> &'static str {
-        self.name
-    }
-}
-
-#[tokio::main]
-async fn main() -> vectorless::Result<()> {
-    println!("=== Custom Storage Backend Example ===\n");
-
-    // 1. Create custom backend
-    println!("1. Creating custom backend...");
-    let backend = Arc::new(LoggingMemoryBackend::new("MyCustomBackend"));
-    println!("   ✓ Backend: {}\n", backend.backend_name());
-
-    // 2. Create workspace with custom backend
-    println!("2. Creating workspace with custom backend...");
-    let workspace = Workspace::with_backend(backend).await?;
-    println!("   ✓ Workspace created\n");
-
-    // 3. Add a document (watch the logging)
-    println!("3. Adding document (observe backend calls):");
-    let meta = DocumentMeta::new("custom-doc", "Custom Backend Test", "md");
-    let tree = DocumentTree::new("Root", "Testing custom backend!");
-    let doc = PersistedDocument::new(meta, tree);
-    workspace.add(&doc).await?;
-    println!();
-
-    // 4. Load the document
-    println!("4. Loading document:");
-    let loaded = workspace.load_and_cache("custom-doc").await?.unwrap();
-    println!("   ✓ Loaded: {}\n", loaded.meta.name);
-
-    // 5. Show workspace stats
-    println!("5. Workspace stats:");
-    println!("   - Documents: {}", workspace.len().await);
-    println!("   - Cache size: {}", workspace.cache_len().await);
-    println!();
-
-    println!("✓ Custom backend example complete!");
-    println!("\nTip: Implement StorageBackend to integrate with:");
-    println!("  - S3 / GCS / Azure Blob");
-    println!("  - PostgreSQL / MySQL");
-    println!("  - Redis / Memcached");
-    println!("  - Any custom storage system");
-
-    Ok(())
-}
diff --git a/examples/rust/storage_compression.rs b/examples/rust/storage_compression.rs
deleted file mode 100644
index 80a6e10a..00000000
--- a/examples/rust/storage_compression.rs
+++ /dev/null
@@ -1,104 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! Compression example.
-//!
-//! This example demonstrates compression support in storage:
-//! - GzipCodec for compressed storage
-//! - IdentityCodec for uncompressed storage
-//! - Codec trait for custom compression
-//!
-//! # Usage
-//!
-//! ```bash
-//! cargo run --example storage_compression
-//! ```
-
-use vectorless::Result;
-use vectorless::storage::{Codec, GzipCodec, IdentityCodec};
-
-fn main() -> Result<()> {
-    println!("=== Compression Example ===\n");
-
-    // Test data
-    let original = b"Lorem ipsum dolor sit amet, consectetur adipiscing elit. \
-                     Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. \
-                     Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris.";
-    println!("Original data ({} bytes):", original.len());
-    println!("   {:?}...\n", String::from_utf8_lossy(&original[..50]));
-
-    // 1. Identity codec (no compression)
-    println!("1. IdentityCodec (no compression):");
-    let identity = IdentityCodec::new();
-
-    let identity_encoded = identity.encode(original)?;
-    let identity_decoded = identity.decode(&identity_encoded)?;
-
-    println!("   Encoded size: {} bytes", identity_encoded.len());
-    println!(
-        "   Compression ratio: {:.1}%",
-        (identity_encoded.len() as f64 / original.len() as f64) * 100.0
-    );
-    assert_eq!(original.to_vec(), identity_decoded);
-    println!("   ✓ Roundtrip verified\n");
-
-    // 2. Gzip codec with different levels
-    println!("2. GzipCodec with different compression levels:");
-
-    for level in [1, 6, 9] {
-        let gzip = GzipCodec::new(level);
-        let compressed = gzip.encode(original)?;
-
-        println!(
-            "   Level {}: {} bytes ({:.1}% of original)",
-            level,
-            compressed.len(),
-            (compressed.len() as f64 / original.len() as f64) * 100.0
-        );
-    }
-    println!();
-
-    // 3. Gzip roundtrip
-    println!("3. Gzip roundtrip verification:");
-    let gzip = GzipCodec::new(6);
-
-    let encoded = gzip.encode(original)?;
-    let decoded = gzip.decode(&encoded)?;
-
-    assert_eq!(original.to_vec(), decoded);
-    println!(
-        "   ✓ Encoded {} bytes -> {} bytes",
-        original.len(),
-        encoded.len()
-    );
-    println!("   ✓ Decoded back to {} bytes", decoded.len());
-    println!("   ✓ Data integrity verified\n");
-
-    // 4. Empty data handling
-    println!("4. Edge cases:");
-    let empty: &[u8] = &[];
-
-    let empty_encoded = gzip.encode(empty)?;
-    let empty_decoded = gzip.decode(&empty_encoded)?;
-    assert!(empty_decoded.is_empty());
-    println!("   ✓ Empty data handled correctly\n");
-
-    // 5. Comparison
-    println!("5. Summary:");
-    println!("   Original:    {} bytes", original.len());
-    println!("   Identity:    {} bytes (100.0%)", identity_encoded.len());
-    println!(
-        "   Gzip (lvl6): {} bytes ({:.1}%)",
-        encoded.len(),
-        (encoded.len() as f64 / original.len() as f64) * 100.0
-    );
-    println!();
-
-    println!("✓ Compression example complete!");
-    println!("\nUsage tips:");
-    println!("  - Use GzipCodec for large text documents");
-    println!("  - Use IdentityCodec for already-compressed data (PDF, images)");
-    println!("  - Level 6 is a good default (balance of speed vs ratio)");
-
-    Ok(())
-}
diff --git a/examples/rust/storage_migration.rs b/examples/rust/storage_migration.rs
deleted file mode 100644
index af4350bf..00000000
--- a/examples/rust/storage_migration.rs
+++ /dev/null
@@ -1,141 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! Version migration example.
-//!
-//! This example demonstrates how to use the migration system
-//! for upgrading data formats between versions.
-//!
-//! # Usage
-//!
-//! ```bash
-//! cargo run --example storage_migration
-//! ```
-
-use vectorless::storage::{Migration, MigrationContext, Migrator};
-use vectorless::{Error, Result};
-
-/// Example migration from v1 to v2.
-///
-/// Imagine v1 stored data as plain text,
-/// and v2 adds a header prefix.
-#[derive(Debug)]
-struct V1ToV2;
-
-impl Migration for V1ToV2 {
-    fn from_version(&self) -> u32 {
-        1
-    }
-
-    fn to_version(&self) -> u32 {
-        2
-    }
-
-    fn description(&self) -> &str {
-        "Add version header to data format"
-    }
-
-    fn migrate(&self, data: &[u8], _ctx: &MigrationContext) -> Result<Vec<u8>> {
-        // Add a simple header: "V2:" prefix
-        let mut result = b"V2:".to_vec();
-        result.extend_from_slice(data);
-        Ok(result)
-    }
-}
-
-/// Example migration from v2 to v3.
-///
-/// V3 adds compression (simulated with base64-like encoding).
-#[derive(Debug)]
-struct V2ToV3;
-
-impl Migration for V2ToV3 {
-    fn from_version(&self) -> u32 {
-        2
-    }
-
-    fn to_version(&self) -> u32 {
-        3
-    }
-
-    fn description(&self) -> &str {
-        "Add compression to data format"
-    }
-
-    fn migrate(&self, data: &[u8], _ctx: &MigrationContext) -> Result<Vec<u8>> {
-        // Simulate compression by adding prefix
-        let mut result = b"V3:COMPRESSED:".to_vec();
-        result.extend_from_slice(data);
-        Ok(result)
-    }
-}
-
-fn main() -> vectorless::Result<()> {
-    println!("=== Version Migration Example ===\n");
-
-    // 1. Create migrator
-    println!("1. Creating migrator and registering migrations...");
-    let mut migrator = Migrator::new();
-    migrator.register(Box::new(V1ToV2));
-    migrator.register(Box::new(V2ToV3));
-
-    println!("   Registered migrations:");
-    for (from, to, desc) in migrator.list_migrations() {
-        println!("   - v{} -> v{}: {}", from, to, desc);
-    }
-    println!();
-
-    // 2. Check migration paths
-    println!("2. Checking migration paths:");
-    println!("   Can migrate v1 -> v2: {}", migrator.can_migrate(1, 2));
-    println!("   Can migrate v1 -> v3: {}", migrator.can_migrate(1, 3));
-    println!("   Can migrate v2 -> v3: {}", migrator.can_migrate(2, 3));
-    println!("   Can migrate v1 -> v4: {}", migrator.can_migrate(1, 4));
-    println!();
-
-    // 3. Migrate from v1 to v3 (multi-step)
-    println!("3. Migrating data from v1 to v3 (via v2):");
-    let original_data = b"Hello, World!";
-    println!(
-        "   Original (v1): {:?}",
-        String::from_utf8_lossy(original_data)
-    );
-
-    let migrated = migrator.migrate(original_data, 1, 3)?;
-    println!("   Migrated (v3): {:?}", String::from_utf8_lossy(&migrated));
-    println!();
-
-    // 4. Direct migration
-    println!("4. Direct migration v2 -> v3:");
-    let v2_data = b"V2:Some data";
-    let v3_data = migrator.migrate(v2_data, 2, 3)?;
-    println!("   V2: {:?}", String::from_utf8_lossy(v2_data));
-    println!("   V3: {:?}", String::from_utf8_lossy(&v3_data));
-    println!();
-
-    // 5. No migration needed
-    println!("5. Same version (no migration):");
-    let data = b"Already v3";
-    let result = migrator.migrate(data, 3, 3)?;
-    assert_eq!(data.to_vec(), result);
-    println!("   ✓ Data unchanged when from == to");
-    println!();
-
-    // 6. Error case: no path
-    println!("6. Error handling (no migration path):");
-    match migrator.migrate(b"test", 1, 99) {
-        Err(Error::VersionMismatch(msg)) => {
-            println!("   Expected error: {}", msg);
-        }
-        _ => unreachable!(),
-    }
-    println!();
-
-    println!("✓ Migration example complete!");
-    println!("\nKey points:");
-    println!("  - Migrations are registered as v(N) -> v(N+1)");
-    println!("  - Migrator finds paths automatically (BFS)");
-    println!("  - Multi-step migrations are handled transparently");
-
-    Ok(())
-}
diff --git a/examples/rust/storage_workspace.rs b/examples/rust/storage_workspace.rs
deleted file mode 100644
index 794a7145..00000000
--- a/examples/rust/storage_workspace.rs
+++ /dev/null
@@ -1,118 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! Basic workspace usage example.
-//!
-//! This example demonstrates the core storage API:
-//! - Creating an async workspace
-//! - Adding documents
-//! - Loading documents with LRU cache
-//! - Listing and removing documents
-//!
-//! # Usage
-//!
-//! ```bash
-//! cargo run --example storage_workspace
-//! ```
-
-use vectorless::document::DocumentTree;
-use vectorless::storage::{DocumentMeta, PersistedDocument, Workspace};
-
-#[tokio::main]
-async fn main() -> vectorless::Result<()> {
-    println!("=== Storage Workspace Example ===\n");
-
-    // Create a temporary workspace
-    let workspace_path = "./example_workspace";
-
-    // 1. Create a workspace with custom cache size
-    println!("1. Creating workspace at '{}'...", workspace_path);
-    let workspace = Workspace::with_cache_size(workspace_path, 100)
-        .await
-        .map_err(|e| vectorless::Error::Workspace(e.to_string()))?;
-    println!("   ✓ Workspace created\n");
-
-    // 2. Create a document
-    println!("2. Creating a document...");
-    let meta = DocumentMeta::new("doc-001", "Getting Started Guide", "md")
-        .with_description("An introduction to the workspace API")
-        .with_source_path("./docs/getting-started.md");
-
-    let tree = DocumentTree::new("Introduction", "Welcome to Vectorless storage module!");
-
-    let doc = PersistedDocument::new(meta, tree);
-    println!("   ✓ Document created: {}\n", doc.meta.id);
-
-    // 3. Add document to workspace
-    println!("3. Adding document to workspace...");
-    workspace
-        .add(&doc)
-        .await
-        .map_err(|e| vectorless::Error::Workspace(e.to_string()))?;
-    println!("   ✓ Document saved\n");
-
-    // 4. List all documents
-    println!("4. Listing documents:");
-    for id in workspace.list_documents().await {
-        if let Some(meta) = workspace.get_meta(&id).await {
-            println!("   - {} ({})", meta.doc_name, meta.id);
-            if let Some(ref desc) = meta.doc_description {
-                println!("     Description: {}", desc);
-            }
-        }
-    }
-    println!();
-
-    // 5. Load document (uses LRU cache)
-    println!("5. Loading document...");
-    let loaded = workspace
-        .load_and_cache("doc-001")
-        .await
-        .map_err(|e| vectorless::Error::Workspace(e.to_string()))?
-        .expect("Document should exist");
-    println!("   ✓ Loaded: {}", loaded.meta.name);
-    let root = loaded.tree.root();
-    if let Some(node) = loaded.tree.get(root) {
-        println!("   Root node title: {}", node.title);
-    }
-    println!();
-
-    // 6. Cache statistics
-    println!("6. Cache statistics:");
-    let stats = workspace.cache_stats().await;
-    println!("   - Hits: {}", stats.hits);
-    println!("   - Misses: {}", stats.misses);
-    println!("   - Evictions: {}", stats.evictions);
-    println!(
-        "   - Utilization: {:.1}%",
-        workspace.cache_utilization().await * 100.0
-    );
-    println!();
-
-    // 7. Load again (should hit cache)
-    println!("7. Loading document again (should hit cache)...");
-    let _ = workspace
-        .load_and_cache("doc-001")
-        .await
-        .map_err(|e| vectorless::Error::Workspace(e.to_string()))?;
-    let stats = workspace.cache_stats().await;
-    println!("   ✓ Cache hits: {}", stats.hits);
-    println!();
-
-    // 8. Remove document
-    println!("8. Removing document...");
-    let removed = workspace
-        .remove("doc-001")
-        .await
-        .map_err(|e| vectorless::Error::Workspace(e.to_string()))?;
-    println!("   ✓ Removed: {}", removed);
-    println!("   Workspace is empty: {}", workspace.is_empty().await);
-    println!();
-
-    // Cleanup
-    println!("Cleaning up...");
-    std::fs::remove_dir_all(workspace_path).ok();
-    println!("   ✓ Done!");
-
-    Ok(())
-}
diff --git a/examples/rust/strategy_cross_document.rs b/examples/rust/strategy_cross_document.rs
deleted file mode 100644
index ac7432ee..00000000
--- a/examples/rust/strategy_cross_document.rs
+++ /dev/null
@@ -1,192 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! Cross-Document Retrieval Strategy Example.
-//!
-//! This example demonstrates how to search across multiple documents
-//! simultaneously and merge results intelligently.
-//!
-//! # How it works
-//!
-//! 1. **Parallel Search**: Searches all documents in parallel
-//! 2. **Per-Document Scoring**: Each document returns its top matches
-//! 3. **Merge Strategy**: Combines results using configurable strategy
-//! 4. **Deduplication**: Removes duplicate content across documents
-//!
-//! # Merge Strategies
-//!
-//! - **TopK**: Take top-K results across all documents (default)
-//! - **BestPerDocument**: Take best result from each document
-//! - **WeightedByRelevance**: Weight results by document's best score
-//!
-//! # Usage
-//!
-//! ```bash
-//! cargo run --example strategy_cross_document
-//! ```
-
-use vectorless::retrieval::CrossDocumentConfig;
-
-#[tokio::main]
-async fn main() -> vectorless::Result<()> {
-    println!("=== Cross-Document Retrieval Strategy Example ===\n");
-
-    // 1. Create multiple document trees
-    println!("--- Step 1: Document Collection ---\n");
-    let documents = create_document_collection();
-    println!("✓ Created {} sample documents\n", documents.len());
-
-    for (id, title) in &documents {
-        println!("  - {}: {}", id, title);
-    }
-    println!();
-
-    // 2. Demonstrate merge strategies
-    println!("--- Step 2: Merge Strategies ---\n");
-    demo_merge_strategies();
-
-    // 3. Show configuration options
-    println!("\n--- Step 3: Configuration Options ---\n");
-    demo_config_options();
-
-    // 4. Show parallel search benefits
-    println!("\n--- Step 4: Performance Benefits ---\n");
-    demo_performance();
-
-    // 5. Show usage patterns
-    println!("\n--- Step 5: Usage Patterns ---\n");
-    demo_usage_patterns();
-
-    println!("\n=== Done ===");
-    Ok(())
-}
-
-/// Demonstrate different merge strategies.
-fn demo_merge_strategies() {
-    println!("Query: \"configuration options\"\n");
-
-    // TopK merge
-    println!("MergeStrategy::TopK (default)");
-    println!("  → Takes top N results across all documents");
-    println!("  → Results ranked by score regardless of source");
-    println!("  → Best for: Finding the most relevant content\n");
-
-    // BestPerDocument merge
-    println!("MergeStrategy::BestPerDocument");
-    println!("  → Takes best result from each document");
-    println!("  → Ensures diversity in document sources");
-    println!("  → Best for: Overview across all documents\n");
-
-    // WeightedByRelevance merge
-    println!("MergeStrategy::WeightedByRelevance");
-    println!("  → Weights results by document's best score");
-    println!("  → Favors documents with strong matches");
-    println!("  → Best for: When some documents are more relevant\n");
-}
-
-/// Demonstrate configuration options.
-fn demo_config_options() {
-    // Default configuration
-    let default_config = CrossDocumentConfig::default();
-    println!("Default configuration:");
-    println!("  - max_documents: {}", default_config.max_documents);
-    println!("  - max_results_per_doc: {}", default_config.max_results_per_doc);
-    println!("  - max_total_results: {}", default_config.max_total_results);
-    println!("  - min_score: {:.2}", default_config.min_score);
-    println!("  - merge_strategy: {:?}", default_config.merge_strategy);
-    println!();
-
-    // Custom configuration for large collections
-    println!("Custom configuration builder:");
-    println!();
-    println!("```rust");
-    println!("let config = CrossDocumentConfig::new()");
-    println!("    .with_max_documents(50)");
-    println!("    .with_max_results_per_doc(5)");
-    println!("    .with_max_total_results(20)");
-    println!("    .with_min_score(0.3)");
-    println!("    .with_merge_strategy(MergeStrategy::WeightedByRelevance);");
-    println!("```");
-    println!();
-
-    // When to use which configuration
-    println!("Configuration guidelines:");
-    println!("  - Small collection (<10 docs): TopK, max_results=10");
-    println!("  - Medium collection (10-50 docs): WeightedByRelevance, max_results=15");
-    println!("  - Large collection (>50 docs): BestPerDocument, higher min_score");
-}
-
-/// Demonstrate performance benefits.
-fn demo_performance() {
-    println!("Parallel search performance:\n");
-
-    println!("| Documents | Sequential | Parallel | Speedup |");
-    println!("|-----------|------------|----------|---------|");
-    println!("| 5         | 500ms      | 100ms    | 5x      |");
-    println!("| 10        | 1000ms     | 100ms    | 10x     |");
-    println!("| 20        | 2000ms     | 100ms    | 20x     |");
-    println!("| 50        | 5000ms     | 150ms    | 33x     |");
-    println!();
-
-    println!("Benefits of parallel search:");
-    println!("  ✓ Near-constant latency regardless of document count");
-    println!("  ✓ Better resource utilization");
-    println!("  ✓ Scales well with CPU cores");
-    println!();
-
-    println!("When parallel search is most effective:");
-    println!("  - Multiple independent documents");
-    println!("  - Each document has similar search complexity");
-    println!("  - Network/disk I/O is not the bottleneck");
-}
-
-/// Demonstrate usage patterns.
-fn demo_usage_patterns() {
-    println!("Code example:");
-    println!();
-    println!("```rust");
-    println!("use vectorless::retrieval::{{");
-    println!("    CrossDocumentConfig, CrossDocumentStrategy, DocumentEntry,");
-    println!("    MergeStrategy,");
-    println!("}};");
-    println!("use vectorless::document::DocumentTree;");
-    println!();
-    println!("async fn search_across_documents(trees: Vec<(String, DocumentTree)>) {{");
-    println!("    // Configure cross-document search");
-    println!("    let config = CrossDocumentConfig::new()");
-    println!("        .with_max_documents(20)");
-    println!("        .with_max_results_per_doc(3)");
-    println!("        .with_max_total_results(10)");
-    println!("        .with_merge_strategy(MergeStrategy::WeightedByRelevance);");
-    println!();
-    println!("    // Create strategy");
-    println!("    let mut strategy = CrossDocumentStrategy::new(config);");
-    println!();
-    println!("    // Add documents");
-    println!("    for (id, tree) in trees {{");
-    println!("        let entry = DocumentEntry::new(id, tree);");
-    println!("        strategy.add_document(entry);");
-    println!("    }}");
-    println!();
-    println!("    // Search");
-    println!("    let results = strategy.retrieve(\"configuration options\").await?;");
-    println!("}}");
-    println!("```");
-    println!();
-
-    println!("Use cases:");
-    println!("  1. Documentation search across multiple guides");
-    println!("  2. Legal document search across contracts");
-    println!("  3. Research paper search across collections");
-    println!("  4. Code search across multiple repositories");
-}
-
-/// Create a sample document collection.
-fn create_document_collection() -> Vec<(&'static str, &'static str)> {
-    vec![
-        ("user-guide", "User Guide"),
-        ("api-reference", "API Reference"),
-        ("architecture", "Architecture Guide"),
-        ("config-reference", "Configuration Reference"),
-    ]
-}
diff --git a/examples/rust/strategy_hybrid.rs b/examples/rust/strategy_hybrid.rs
deleted file mode 100644
index eb2072ff..00000000
--- a/examples/rust/strategy_hybrid.rs
+++ /dev/null
@@ -1,233 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! Hybrid Retrieval Strategy Example.
-//!
-//! This example demonstrates the Hybrid retrieval strategy that combines
-//! BM25 keyword matching with LLM-based semantic evaluation.
-//!
-//! # How it works
-//!
-//! 1. **BM25 Pre-filtering**: Quickly scores all nodes using keyword matching
-//! 2. **Candidate Selection**: Keeps top candidates based on BM25 scores
-//! 3. **LLM Refinement**: Applies LLM reasoning only to top candidates
-//! 4. **Final Scoring**: Combines BM25 and LLM scores with configurable weights
-//!
-//! # Benefits
-//!
-//! - Reduces LLM API calls (only evaluates top candidates)
-//! - Maintains accuracy through semantic understanding
-//! - Auto-accepts high BM25 scores (skips LLM entirely)
-//! - Auto-rejects low BM25 scores (skips LLM entirely)
-//!
-//! # Usage
-//!
-//! ```bash
-//! cargo run --example strategy_hybrid
-//! ```
-
-use vectorless::document::DocumentTree;
-use vectorless::retrieval::HybridConfig;
-
-#[tokio::main]
-async fn main() -> vectorless::Result<()> {
-    println!("=== Hybrid Retrieval Strategy Example ===\n");
-
-    // 1. Create a sample document tree
-    let tree = create_sample_tree();
-    println!("✓ Created sample document tree ({} nodes)\n", tree.node_count());
-
-    // 2. Show default configuration
-    println!("--- Step 1: Default Configuration ---\n");
-    demo_default_config();
-
-    // 3. Show custom configuration
-    println!("\n--- Step 2: Custom Configuration ---\n");
-    demo_custom_config();
-
-    // 4. Show preset configurations
-    println!("\n--- Step 3: Preset Configurations ---\n");
-    demo_presets();
-
-    // 5. Show usage patterns
-    println!("\n--- Step 4: Usage Patterns ---\n");
-    demo_usage_patterns();
-
-    println!("\n=== Done ===");
-    Ok(())
-}
-
-/// Demonstrate default configuration.
-fn demo_default_config() {
-    let config = HybridConfig::default();
-
-    println!("Default HybridConfig:");
-    println!("  - pre_filter_ratio: {:.0}%", config.pre_filter_ratio * 100.0);
-    println!("  - min_candidates: {}", config.min_candidates);
-    println!("  - max_candidates: {}", config.max_candidates);
-    println!("  - auto_accept_threshold: {:.2}", config.auto_accept_threshold);
-    println!("  - auto_reject_threshold: {:.2}", config.auto_reject_threshold);
-    println!("  - bm25_weight: {:.2}", config.bm25_weight);
-    println!("  - llm_weight: {:.2}", config.llm_weight);
-    println!();
-
-    println!("How hybrid retrieval works:");
-    println!("  1. BM25 scores all nodes using keyword matching (fast)");
-    println!("  2. Keep top 30% of candidates (pre-filter)");
-    println!("  3. Auto-accept if BM25 score >= 0.85 (skip LLM entirely)");
-    println!("  4. Auto-reject if BM25 score <= 0.15 (skip LLM entirely)");
-    println!("  5. For remaining: LLM evaluates semantic relevance");
-    println!("  6. Final score = BM25*0.4 + LLM*0.6");
-}
-
-/// Demonstrate custom configuration.
-fn demo_custom_config() {
-    let config = HybridConfig::new()
-        .with_pre_filter_ratio(0.2) // More aggressive filtering
-        .with_candidate_limits(3, 10)
-        .with_thresholds(0.9, 0.2) // Higher bar for auto-accept
-        .with_weights(0.3, 0.7); // Favor LLM more
-
-    println!("Custom HybridConfig:");
-    println!("  - pre_filter_ratio: {:.0}%", config.pre_filter_ratio * 100.0);
-    println!("  - min_candidates: {}", config.min_candidates);
-    println!("  - max_candidates: {}", config.max_candidates);
-    println!("  - auto_accept_threshold: {:.2}", config.auto_accept_threshold);
-    println!("  - auto_reject_threshold: {:.2}", config.auto_reject_threshold);
-    println!("  - bm25_weight: {:.2}", config.bm25_weight);
-    println!("  - llm_weight: {:.2}", config.llm_weight);
-    println!();
-
-    println!("When to use this config:");
-    println!("  - High-volume queries where cost matters");
-    println!("  - Documents with clear keyword signals");
-    println!("  - When LLM quality is more important than speed");
-    println!();
-
-    println!("Example scenarios:");
-    println!("\n  Scenario 1: Exact keyword match");
-    println!("    Query: \"parse markdown files\"");
-    println!("    BM25 score: 0.92");
-    println!("    → Auto-accepted (>= 0.90), no LLM call needed");
-
-    println!("\n  Scenario 2: No keyword overlap");
-    println!("    Query: \"How do I get started?\"");
-    println!("    BM25 score: 0.10");
-    println!("    → Auto-rejected (<= 0.20), no LLM call needed");
-
-    println!("\n  Scenario 3: Moderate match");
-    println!("    Query: \"improve search quality\"");
-    println!("    BM25 score: 0.55");
-    println!("    → LLM refines: evaluates semantic relevance");
-}
-
-/// Demonstrate preset configurations.
-fn demo_presets() {
-    println!("Available presets:");
-    println!();
-
-    println!("1. HybridConfig::high_quality()");
-    let hq = HybridConfig::high_quality();
-    println!("   - Focus on accuracy over cost");
-    println!("   - pre_filter_ratio: {:.0}%", hq.pre_filter_ratio * 100.0);
-    println!("   - auto_accept_threshold: {:.2}", hq.auto_accept_threshold);
-    println!("   - bm25_weight: {:.2}, llm_weight: {:.2}", hq.bm25_weight, hq.llm_weight);
-    println!();
-
-    println!("2. HybridConfig::low_cost()");
-    let lc = HybridConfig::low_cost();
-    println!("   - Focus on cost efficiency");
-    println!("   - pre_filter_ratio: {:.0}%", lc.pre_filter_ratio * 100.0);
-    println!("   - auto_accept_threshold: {:.2}", lc.auto_accept_threshold);
-    println!("   - bm25_weight: {:.2}, llm_weight: {:.2}", lc.bm25_weight, lc.llm_weight);
-    println!();
-
-    println!("3. HybridConfig::default()");
-    let def = HybridConfig::default();
-    println!("   - Balanced approach");
-    println!("   - pre_filter_ratio: {:.0}%", def.pre_filter_ratio * 100.0);
-    println!("   - auto_accept_threshold: {:.2}", def.auto_accept_threshold);
-    println!("   - bm25_weight: {:.2}, llm_weight: {:.2}", def.bm25_weight, def.llm_weight);
-    println!();
-
-    println!("Cost comparison:");
-    println!("| Config       | LLM Calls | Quality | Use Case |");
-    println!("|--------------|-----------|---------|----------|");
-    println!("| low_cost     | 1-2       | Good    | High volume |");
-    println!("| default      | 2-5       | High    | General use |");
-    println!("| high_quality | 5-10      | Highest | Complex queries |");
-}
-
-/// Demonstrate usage patterns.
-fn demo_usage_patterns() {
-    println!("Code example:");
-    println!();
-    println!("```rust");
-    println!("use vectorless::retrieval::{{HybridConfig, HybridStrategy, LlmStrategy}};");
-    println!("use vectorless::llm::LlmClient;");
-    println!();
-    println!("async fn create_hybrid_retriever(client: LlmClient) {{");
-    println!("    // Create LLM strategy");
-    println!("    let llm_strategy = Box::new(LlmStrategy::new(client));");
-    println!();
-    println!("    // Option 1: Use preset");
-    println!("    let hybrid = HybridStrategy::new(llm_strategy)");
-    println!("        .with_high_quality();");
-    println!();
-    println!("    // Option 2: Custom config");
-    println!("    let config = HybridConfig::new()");
-    println!("        .with_pre_filter_ratio(0.25)");
-    println!("        .with_candidate_limits(3, 8)");
-    println!("        .with_thresholds(0.85, 0.15)");
-    println!("        .with_weights(0.35, 0.65);");
-    println!();
-    println!("    let hybrid = HybridStrategy::new(llm_strategy)");
-    println!("        .with_config(config);");
-    println!("}}");
-    println!("```");
-    println!();
-
-    println!("Benefits of hybrid strategy:");
-    println!("  ✓ 70-90% reduction in LLM API calls vs pure LLM");
-    println!("  ✓ 50-70% reduction in latency");
-    println!("  ✓ 90-95% of pure LLM quality");
-    println!("  ✓ Graceful degradation when LLM unavailable");
-}
-
-/// Create a sample document tree for demonstration.
-fn create_sample_tree() -> DocumentTree {
-    let mut tree = DocumentTree::new(
-        "Vectorless Documentation",
-        "A hierarchical document intelligence engine written in Rust.",
-    );
-
-    let intro = tree.add_child(
-        tree.root(),
-        "Introduction",
-        "Vectorless is a document intelligence engine that uses LLM-powered tree navigation.",
-    );
-
-    tree.add_child(
-        intro,
-        "Key Features",
-        "No embeddings, zero infrastructure, multi-format support.",
-    );
-
-    let arch = tree.add_child(
-        tree.root(),
-        "Architecture",
-        "Three main components: indexer, retriever, storage.",
-    );
-
-    let retrieve = tree.add_child(
-        arch,
-        "Retrieval Pipeline",
-        "Multi-stage retrieval with BM25 and LLM strategies.",
-    );
-
-    tree.add_child(retrieve, "Keyword Strategy", "Fast BM25-based matching.");
-    tree.add_child(retrieve, "Hybrid Strategy", "BM25 pre-filter + LLM refinement.");
-    tree.add_child(retrieve, "Cross-Document", "Multi-document search.");
-
-    tree
-}
diff --git a/examples/rust/strategy_page_range.rs b/examples/rust/strategy_page_range.rs
deleted file mode 100644
index f06635d3..00000000
--- a/examples/rust/strategy_page_range.rs
+++ /dev/null
@@ -1,259 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! Page-Range Retrieval Strategy Example.
-//!
-//! This example demonstrates how to filter retrieval results by page range,
-//! which is particularly useful for PDF documents.
-//!
-//! # How it works
-//!
-//! 1. **Page Filtering**: Only considers nodes within specified page range
-//! 2. **Boundary Handling**: Configurable handling of nodes spanning boundaries
-//! 3. **Context Expansion**: Optionally expands range for surrounding context
-//! 4. **Overlap Detection**: Includes nodes that partially overlap with range
-//!
-//! # Use Cases
-//!
-//! - "What does chapter 3 say about X?" (pages 45-67)
-//! - "Find information in the introduction" (pages 1-10)
-//! - "Search the appendix" (pages 200-220)
-//!
-//! # Usage
-//!
-//! ```bash
-//! cargo run --example strategy_page_range
-//! ```
-
-use vectorless::document::DocumentTree;
-use vectorless::retrieval::{PageRange, PageRangeConfig};
-
-#[tokio::main]
-async fn main() -> vectorless::Result<()> {
-    println!("=== Page-Range Retrieval Strategy Example ===\n");
-
-    // 1. Create a sample PDF-like document tree with page numbers
-    println!("--- Step 1: Document with Page Numbers ---\n");
-    let tree = create_pdf_like_tree();
-    println!("✓ Created document tree ({} nodes)\n", tree.node_count());
-
-    // 2. Demonstrate page range creation
-    println!("--- Step 2: Page Range Options ---\n");
-    demo_page_range_options();
-
-    // 3. Show configuration options
-    println!("\n--- Step 3: Configuration Options ---\n");
-    demo_config_options();
-
-    // 4. Show boundary handling
-    println!("\n--- Step 4: Boundary Handling ---\n");
-    demo_boundary_handling();
-
-    // 5. Show context expansion
-    println!("\n--- Step 5: Context Expansion ---\n");
-    demo_context_expansion();
-
-    // 6. Show usage patterns
-    println!("\n--- Step 6: Usage Patterns ---\n");
-    demo_usage_patterns();
-
-    println!("\n=== Done ===");
-    Ok(())
-}
-
-/// Demonstrate page range options.
-fn demo_page_range_options() {
-    println!("PageRange creation methods:\n");
-
-    // Specific range
-    let _range1 = PageRange::new(10, 20);
-    println!("  PageRange::new(10, 20)");
-    println!("    → Range: pages 10-20 (inclusive)");
-    println!("    → Use case: Search a specific chapter\n");
-
-    // Single page
-    let _range2 = PageRange::single(15);
-    println!("  PageRange::single(15)");
-    println!("    → Range: page 15 only");
-    println!("    → Use case: Search a specific page\n");
-
-    // From page to end
-    let _range3 = PageRange::from(30);
-    println!("  PageRange::from(30)");
-    println!("    → Range: page 30 to end of document");
-    println!("    → Use case: Search appendix or references\n");
-
-    // From beginning to page
-    let _range4 = PageRange::until(10);
-    println!("  PageRange::until(10)");
-    println!("    → Range: beginning to page 10");
-    println!("    → Use case: Search introduction or preface\n");
-
-    // Default (all pages)
-    let _range5 = PageRange::default();
-    println!("  PageRange::default()");
-    println!("    → Range: all pages");
-    println!("    → Use case: No page restriction\n");
-
-    println!("PageRange methods:");
-    println!("  - contains(page): Check if page is in range");
-    println!("  - overlaps(start, end): Check if range overlaps");
-    println!("  - len(): Get number of pages in range");
-    println!("  - is_empty(): Check if range is empty");
-}
-
-/// Demonstrate configuration options.
-fn demo_config_options() {
-    let default_config = PageRangeConfig::default();
-
-    println!("Default PageRangeConfig:");
-    println!("  - range: {:?}", default_config.range);
-    println!("  - include_boundary_nodes: {}", default_config.include_boundary_nodes);
-    println!("  - expand_context_pages: {}", default_config.expand_context_pages);
-    println!("  - min_overlap_ratio: {:.2}", default_config.min_overlap_ratio);
-    println!();
-
-    println!("Custom configuration:");
-    println!();
-    println!("```rust");
-    println!("let config = PageRangeConfig::new(PageRange::new(10, 30))");
-    println!("    .with_boundary_nodes(true)");
-    println!("    .with_context_expansion(2)");
-    println!("    .with_min_overlap_ratio(0.3);");
-    println!("```");
-    println!();
-
-    println!("Configuration guidelines:");
-    println!("  - Strict range: include_boundary_nodes=false, min_overlap_ratio=1.0");
-    println!("  - Include context: expand_context_pages=1-3");
-    println!("  - Lenient matching: min_overlap_ratio=0.1");
-}
-
-/// Demonstrate boundary handling.
-fn demo_boundary_handling() {
-    println!("Boundary handling example:\n");
-
-    println!("Scenario: Section spans pages 9-12, query range is 10-15\n");
-
-    println!("  include_boundary_nodes = false (strict)");
-    println!("    → Section (9-12) overlaps with range (10-15)");
-    println!("    → Included because overlap exists\n");
-
-    println!("  include_boundary_nodes = true (lenient)");
-    println!("    → Same result, but also includes partial overlaps");
-    println!("    → Useful for comprehensive results\n");
-
-    println!("Overlap calculation:");
-    println!("  Section pages: 9-12 (4 pages)");
-    println!("  Query range:   10-15 (6 pages)");
-    println!("  Overlap:       10-12 (3 pages)");
-    println!("  Overlap ratio: 3/4 = 75%\n");
-
-    println!("min_overlap_ratio threshold:");
-    println!("  - 0.1 (10%): Include almost any overlap");
-    println!("  - 0.5 (50%): Require significant overlap");
-    println!("  - 1.0 (100%): Section must be fully within range");
-}
-
-/// Demonstrate context expansion.
-fn demo_context_expansion() {
-    println!("Context expansion example:\n");
-
-    println!("Scenario: Query range is 10-15\n");
-
-    // Without expansion
-    println!("  Without expansion (expand_context_pages=0):");
-    println!("    → Only pages 10-15 searched");
-    println!("    → Might miss related content on pages 9 or 16\n");
-
-    // With expansion
-    println!("  With expansion (expand_context_pages=2):");
-    println!("    → Effective range: 8-17");
-    println!("    → Includes surrounding context for better results\n");
-
-    println!("When to use context expansion:");
-    println!("  ✓ When sections span multiple pages");
-    println!("  ✓ When relevant content might be just outside range");
-    println!("  ✓ For more comprehensive results\n");
-
-    println!("When NOT to use context expansion:");
-    println!("  ✗ When you need strict page boundaries");
-    println!("  ✗ For chapter-specific queries");
-    println!("  ✗ When precision is more important than recall");
-}
-
-/// Demonstrate usage patterns.
-fn demo_usage_patterns() {
-    println!("Code example:");
-    println!();
-    println!("```rust");
-    println!("use vectorless::retrieval::{{PageRange, PageRangeConfig, PageRangeStrategy}};");
-    println!("use vectorless::retrieval::RetrievalStrategy;");
-    println!();
-    println!("async fn search_in_chapter(tree: &DocumentTree) {{");
-    println!("    // Search only in chapter 3 (pages 45-67)");
-    println!("    let range = PageRange::new(45, 67);");
-    println!("    let config = PageRangeConfig::new(range)");
-    println!("        .with_boundary_nodes(true)");
-    println!("        .with_context_expansion(1);");
-    println!();
-    println!("    let strategy = PageRangeStrategy::new(config);");
-    println!("    ");
-    println!("    // Evaluate nodes within page range");
-    println!("    let results = strategy.evaluate_nodes(tree, node_ids, context).await;");
-    println!("}}");
-    println!("```");
-    println!();
-
-    println!("Common use cases:");
-    println!("  1. Chapter search: PageRange::new(45, 67)");
-    println!("  2. Introduction: PageRange::until(10)");
-    println!("  3. Appendix: PageRange::from(200)");
-    println!("  4. Single page: PageRange::single(42)");
-    println!();
-
-    println!("Best practices:");
-    println!("  - Know your document's page structure");
-    println!("  - Use context_expansion for flowing content");
-    println!("  - Use strict boundaries for discrete sections");
-    println!("  - Combine with other strategies (hybrid, keyword)");
-}
-
-/// Create a sample PDF-like document tree with page numbers.
-fn create_pdf_like_tree() -> DocumentTree {
-    let mut tree = DocumentTree::new(
-        "Sample PDF Document",
-        "A sample document simulating PDF structure with page numbers.",
-    );
-
-    // Introduction (pages 1-5)
-    let intro = tree.add_child(tree.root(), "Introduction", "Overview of the document.");
-    tree.set_page_boundaries(intro, 1, 5);
-    tree.add_child_with_pages(intro, "Background", "Background information.", 1, 2);
-    tree.add_child_with_pages(intro, "Motivation", "Why this document exists.", 3, 4);
-    tree.add_child_with_pages(intro, "Scope", "What is covered.", 5, 5);
-
-    // Main Content (pages 6-40)
-    let main = tree.add_child(tree.root(), "Main Content", "Primary content sections.");
-    tree.set_page_boundaries(main, 6, 40);
-
-    let chapter1 = tree.add_child_with_pages(main, "Chapter 1", "Getting started.", 6, 15);
-    tree.add_child_with_pages(chapter1, "Installation", "How to install.", 7, 9);
-    tree.add_child_with_pages(chapter1, "Configuration", "Configuration options.", 10, 12);
-
-    let chapter2 = tree.add_child_with_pages(main, "Chapter 2", "Core concepts.", 16, 28);
-    tree.add_child_with_pages(chapter2, "Architecture", "System architecture.", 16, 20);
-    tree.add_child_with_pages(chapter2, "Data Model", "How data is organized.", 21, 24);
-
-    let chapter3 = tree.add_child_with_pages(main, "Chapter 3", "Advanced usage.", 29, 40);
-    tree.add_child_with_pages(chapter3, "Custom Strategies", "Implementing custom strategies.", 29, 33);
-    tree.add_child_with_pages(chapter3, "Performance", "Optimizing performance.", 34, 37);
-
-    // Appendix (pages 41-50)
-    let appendix = tree.add_child(tree.root(), "Appendix", "Reference materials.");
-    tree.set_page_boundaries(appendix, 41, 50);
-    tree.add_child_with_pages(appendix, "API Reference", "Complete API documentation.", 41, 45);
-    tree.add_child_with_pages(appendix, "Config Reference", "All configuration options.", 46, 48);
-
-    tree
-}
diff --git a/examples/rust/streaming.rs b/examples/rust/streaming.rs
deleted file mode 100644
index d01de51d..00000000
--- a/examples/rust/streaming.rs
+++ /dev/null
@@ -1,172 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! Streaming retrieval example.
-//!
-//! This example demonstrates how to use streaming retrieval
-//! to get results incrementally as they are found.
-//!
-//! # What you'll learn:
-//! - How to use `retrieve_streaming()` for progressive results
-//! - How to handle RetrieveEvent types
-//! - How to display results as they arrive
-//!
-//! # RetrieveEvent types:
-//! - `Started`: Query began, shows planned strategy
-//! - `StageCompleted`: A pipeline stage finished
-//! - `Backtracking`: Search is backtracking for more data
-//! - `Completed`: Query finished with final results
-//! - `Error`: An error occurred
-//!
-//! # Usage
-//!
-//! ```bash
-//! cargo run --example streaming
-//! ```
-
-use vectorless::document::DocumentTree;
-use vectorless::retrieval::{
-    PipelineRetriever, RetrieveEvent, RetrieveOptions, StrategyPreference,
-};
-
-#[tokio::main]
-async fn main() {
-    println!("=== Streaming Retrieval Example ===\n");
-
-    // 1. Create a sample document tree
-    let tree = create_sample_tree();
-    println!("Created sample document tree ({} nodes)\n", tree.node_count());
-
-    // 2. Create a pipeline retriever
-    let retriever = PipelineRetriever::new()
-        .with_max_backtracks(3)
-        .with_max_iterations(5);
-
-    // 3. Configure options (streaming is just a usage pattern, not a flag)
-    let options = RetrieveOptions {
-        top_k: 5,
-        beam_width: 3,
-        max_iterations: 5,
-        max_tokens: 4000,
-        strategy: StrategyPreference::Auto,
-        ..Default::default()
-    };
-
-    // 4. Execute streaming query
-    let query = "What is the architecture?";
-    println!("Query: \"{}\"\n", query);
-    println!("--- Streaming Events ---\n");
-
-    let (_handle, mut rx) = retriever.retrieve_streaming(&tree, query, &options);
-
-    // 5. Process events as they arrive
-    while let Some(event) = rx.recv().await {
-        match event {
-            RetrieveEvent::Started { query, strategy } => {
-                println!("[Started] query=\"{query}\", strategy={strategy}");
-            }
-            RetrieveEvent::StageCompleted { stage, elapsed_ms } => {
-                println!("[StageCompleted] {stage} ({elapsed_ms}ms)");
-            }
-            RetrieveEvent::NodeVisited { node_id, title, score } => {
-                println!("[NodeVisited] {title} (id={node_id}, score={score:.2})");
-            }
-            RetrieveEvent::ContentFound { title, preview, score, .. } => {
-                let short_preview = if preview.len() > 60 {
-                    format!("{}...", &preview[..60])
-                } else {
-                    preview
-                };
-                println!("[ContentFound] {title} (score={score:.2}): {short_preview}");
-            }
-            RetrieveEvent::Backtracking { from, to, reason } => {
-                println!("[Backtracking] {from} -> {to}: {reason}");
-            }
-            RetrieveEvent::SufficiencyCheck { level, tokens } => {
-                println!("[SufficiencyCheck] level={level:?}, tokens={tokens}");
-            }
-            RetrieveEvent::Completed { response } => {
-                println!("\n--- Final Results ---");
-                println!("Confidence:   {:.2}", response.confidence);
-                println!("Sufficient:   {}", response.is_sufficient);
-                println!("Strategy:     {}", response.strategy_used);
-                println!("Tokens used:  {}", response.tokens_used);
-                println!("Results:      {}", response.results.len());
-
-                if !response.results.is_empty() {
-                    println!("\nTop results:");
-                    for (i, result) in response.results.iter().take(3).enumerate() {
-                        println!("  {}. {} (score: {:.2})", i + 1, result.title, result.score);
-                    }
-                }
-                break;
-            }
-            RetrieveEvent::Error { message } => {
-                eprintln!("[Error] {message}");
-                break;
-            }
-        }
-    }
-
-    println!("\n=== Done ===");
-}
-
-/// Create a sample document tree for demonstration.
-fn create_sample_tree() -> DocumentTree {
-    let mut tree = DocumentTree::new(
-        "Vectorless Documentation",
-        "A hierarchical document intelligence engine written in Rust.",
-    );
-
-    let _intro = tree.add_child(
-        tree.root(),
-        "Introduction",
-        "Vectorless is a document intelligence engine written in Rust.",
-    );
-
-    let arch = tree.add_child(
-        tree.root(),
-        "Architecture",
-        "The system consists of three main components: indexer, retriever, and storage.",
-    );
-
-    let index_section = tree.add_child(
-        arch,
-        "Index Pipeline",
-        "The index pipeline processes documents into a tree structure with summaries.",
-    );
-    let retrieve_section = tree.add_child(
-        arch,
-        "Retrieval Pipeline",
-        "The retrieval pipeline finds relevant content using multi-stage processing.",
-    );
-
-    tree.add_child(
-        index_section,
-        "Parse Stage",
-        "Parses documents (Markdown, PDF, DOCX) into structured content.",
-    );
-    tree.add_child(
-        index_section,
-        "Build Stage",
-        "Builds the document tree with metadata like page numbers and indices.",
-    );
-
-    tree.add_child(
-        retrieve_section,
-        "Analyze Stage",
-        "Analyzes query complexity and extracts keywords for matching.",
-    );
-    tree.add_child(
-        retrieve_section,
-        "Plan Stage",
-        "Selects retrieval strategy (keyword/semantic/LLM) and search algorithm.",
-    );
-    tree.add_child(
-        retrieve_section,
-        "Search Stage",
-        "Executes tree traversal (greedy/beam/MCTS) to find relevant content.",
-    );
-
-    tree
-}
diff --git a/python/src/lib.rs b/python/src/lib.rs
index fc2cf730..53cde317 100644
--- a/python/src/lib.rs
+++ b/python/src/lib.rs
@@ -11,8 +11,8 @@ use std::sync::Arc;
 use tokio::runtime::Runtime;
 
 // Use ::vectorless to avoid conflict with the #[pymodule] named vectorless
-use ::vectorless::client::{Engine, EngineBuilder, IndexContext, QueryResult, DocumentInfo};
-use ::vectorless::parser::DocumentFormat;
+use ::vectorless::client::{Engine, EngineBuilder, IndexContext, IndexItem, IndexResult, QueryContext, QueryResult, DocumentInfo};
+use ::vectorless::client::DocumentFormat;
 use ::vectorless::error::Error as RustError;
 
 // ============================================================
@@ -226,6 +226,71 @@ impl PyQueryResult {
     }
 }
 
+// ============================================================
+// IndexResult
+// ============================================================
+
+/// Result of a document indexing operation.
+#[pyclass(name = "IndexResult")]
+pub struct PyIndexResult {
+    inner: IndexResult,
+}
+
+#[pymethods]
+impl PyIndexResult {
+    /// The document ID (convenience for single-document indexing).
+    #[getter]
+    fn doc_id(&self) -> Option<String> {
+        self.inner.doc_id().map(|s| s.to_string())
+    }
+
+    /// All indexed items.
+    #[getter]
+    fn items(&self) -> Vec<PyIndexItem> {
+        self.inner
+            .items
+            .iter()
+            .map(|i| PyIndexItem { inner: i.clone() })
+            .collect()
+    }
+
+    fn __repr__(&self) -> String {
+        format!(
+            "IndexResult(doc_id={:?}, count={})",
+            self.inner.doc_id(),
+            self.inner.items.len()
+        )
+    }
+}
+
+/// A single indexed document item.
+#[pyclass(name = "IndexItem")]
+pub struct PyIndexItem {
+    inner: IndexItem,
+}
+
+#[pymethods]
+impl PyIndexItem {
+    #[getter]
+    fn doc_id(&self) -> &str {
+        &self.inner.doc_id
+    }
+
+    #[getter]
+    fn name(&self) -> &str {
+        &self.inner.name
+    }
+
+    #[getter]
+    fn format(&self) -> String {
+        format!("{:?}", self.inner.format).to_lowercase()
+    }
+
+    fn __repr__(&self) -> String {
+        format!("IndexItem(doc_id='{}', name='{}')", self.inner.doc_id, self.inner.name)
+    }
+}
+
 // ============================================================
 // DocumentInfo
 // ============================================================
@@ -290,10 +355,9 @@ impl PyDocumentInfo {
 ///
 /// Configuration priority (later overrides earlier):
 /// 1. Default configuration
-/// 2. Auto-detected config file (vectorless.toml, config.toml, .vectorless.toml)
-/// 3. Explicit config file (config_path parameter)
-/// 4. Environment variables (OPENAI_API_KEY, VECTORLESS_MODEL, etc.)
-/// 5. Constructor parameters (api_key, model, endpoint) - highest priority
+/// 2. Explicit config file (config_path parameter)
+/// 3. Environment variables (OPENAI_API_KEY, VECTORLESS_MODEL, etc.)
+/// 4. Constructor parameters (api_key, model, endpoint) - highest priority
 ///
 /// # Zero Configuration (Recommended)
 ///
@@ -311,10 +375,10 @@ impl PyDocumentInfo {
 /// engine = Engine(workspace="./data", model="gpt-4o-mini")
 /// ```
 ///
-/// # With Full Config File (Advanced)
+/// # With Config File (Advanced)
 ///
 /// ```python
-/// engine = Engine(config_path="./vectorless.toml")
+/// engine = Engine(workspace="./data", config_path="./vectorless.toml")
 /// ```
 #[pyclass(name = "Engine")]
 pub struct PyEngine {
@@ -335,10 +399,9 @@ impl PyEngine {
     ///
     /// Configuration priority (later overrides earlier):
     ///     1. Default configuration
-    ///     2. Auto-detected config file
-    ///     3. config_path parameter
-    ///     4. Environment variables (OPENAI_API_KEY, VECTORLESS_MODEL, etc.)
-    ///     5. Constructor parameters (api_key, model, endpoint)
+    ///     2. config_path parameter (if provided)
+    ///     3. Environment variables (OPENAI_API_KEY, VECTORLESS_MODEL, etc.)
+    ///     4. Constructor parameters (api_key, model, endpoint)
     ///
     /// Raises:
     ///     VectorlessError: If engine creation fails.
@@ -374,19 +437,16 @@ impl PyEngine {
                 builder = builder.with_workspace(ws);
             }
 
-            // Set model first (without overriding api_key)
             if let Some(m) = &model {
-                builder = builder.with_model(m, None);
+                builder = builder.with_model(m);
             }
 
-            // Set endpoint
             if let Some(e) = &endpoint {
                 builder = builder.with_endpoint(e);
             }
 
-            // Set API key last (this ensures it's not overwritten)
             if let Some(key) = resolved_api_key {
-                builder = builder.with_openai(key);
+                builder = builder.with_key(key);
             }
 
             builder.build().await
@@ -411,17 +471,19 @@ impl PyEngine {
     ///     ctx: IndexContext created from from_file, from_text, or from_bytes.
     ///
     /// Returns:
-    ///     Document ID string.
+    ///     IndexResult with doc_id and metadata.
     ///
     /// Raises:
     ///     VectorlessError: If indexing fails.
-    fn index(&self, ctx: &PyIndexContext) -> PyResult<String> {
+    fn index(&self, ctx: &PyIndexContext) -> PyResult<PyIndexResult> {
         let engine = Arc::clone(&self.inner);
         let index_ctx = ctx.inner.clone();
 
-        self.rt.block_on(async move {
+        let result = self.rt.block_on(async move {
             engine.index(index_ctx).await.map_err(to_py_err)
-        })
+        })?;
+
+        Ok(PyIndexResult { inner: result })
     }
 
     /// Query a document.
@@ -438,8 +500,10 @@ impl PyEngine {
     fn query(&self, doc_id: String, question: String) -> PyResult<PyQueryResult> {
         let engine = Arc::clone(&self.inner);
 
+        let ctx = QueryContext::new(&question).with_doc_id(&doc_id);
+
         let result = self.rt.block_on(async move {
-            engine.query(&doc_id, &question).await.map_err(to_py_err)
+            engine.query(ctx).await.map_err(to_py_err)
         })?;
 
         Ok(PyQueryResult { inner: result })
@@ -452,11 +516,11 @@ impl PyEngine {
     ///
     /// Raises:
     ///     VectorlessError: If listing fails.
-    fn list_docs(&self) -> PyResult<Vec<PyDocumentInfo>> {
+    fn list(&self) -> PyResult<Vec<PyDocumentInfo>> {
         let engine = Arc::clone(&self.inner);
 
         let docs = self.rt.block_on(async move {
-            engine.list_documents().await.map_err(to_py_err)
+            engine.list().await.map_err(to_py_err)
         })?;
 
         Ok(docs
@@ -509,13 +573,6 @@ impl PyEngine {
         self.rt.block_on(async move { engine.exists(&doc_id).await.map_err(to_py_err) })
     }
 
-    /// Get the number of indexed documents.
-    fn len(&self) -> PyResult<usize> {
-        let engine = Arc::clone(&self.inner);
-
-        self.rt.block_on(async move { engine.len().await.map_err(to_py_err) })
-    }
-
     fn __repr__(&self) -> String {
         "Engine(workspace=...)".to_string()
     }
@@ -525,10 +582,7 @@ impl PyEngine {
 // Module Definition
 // ============================================================
 
-/// Vectorless - Hierarchical document intelligence without vectors.
-///
-/// A document intelligence engine that uses tree-based understanding
-/// instead of vector databases.
+/// Vectorless - Reasoning-native document intelligence engine.
 ///
 /// Quick Start:
 ///
@@ -540,16 +594,18 @@ impl PyEngine {
 ///
 /// # Index a document
 /// ctx = IndexContext.from_file("./report.pdf")
-/// doc_id = engine.index(ctx)
+/// result = engine.index(ctx)
 ///
 /// # Query
-/// result = engine.query(doc_id, "What is the revenue?")
-/// print(result.content)
+/// answer = engine.query(result.doc_id, "What is the revenue?")
+/// print(answer.content)
 /// ```
 #[pymodule]
 fn vectorless(_py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_class::<VectorlessError>()?;
     m.add_class::<PyIndexContext>()?;
+    m.add_class::<PyIndexResult>()?;
+    m.add_class::<PyIndexItem>()?;
     m.add_class::<PyQueryResult>()?;
     m.add_class::<PyDocumentInfo>()?;
     m.add_class::<PyEngine>()?;
diff --git a/rust/Cargo.toml b/rust/Cargo.toml
index 11c5933a..fae28620 100644
--- a/rust/Cargo.toml
+++ b/rust/Cargo.toml
@@ -22,98 +22,22 @@ path = "../examples/rust/basic.rs"
 name = "advanced"
 path = "../examples/rust/advanced.rs"
 
-[[example]]
-name = "batch_processing"
-path = "../examples/rust/batch_processing.rs"
-
 [[example]]
 name = "cli_tool"
 path = "../examples/rust/cli_tool.rs"
 
-[[example]]
-name = "content_aggregation"
-path = "../examples/rust/content_aggregation.rs"
-
 [[example]]
 name = "custom_config"
 path = "../examples/rust/custom_config.rs"
 
-[[example]]
-name = "custom_pilot"
-path = "../examples/rust/custom_pilot.rs"
-
 [[example]]
 name = "events"
 path = "../examples/rust/events.rs"
 
-[[example]]
-name = "feedback_learning"
-path = "../examples/rust/feedback_learning.rs"
-
-[[example]]
-name = "html_parser"
-path = "../examples/rust/html_parser.rs"
-
-[[example]]
-name = "index"
-path = "../examples/rust/index.rs"
-
 [[example]]
 name = "markdownflow"
 path = "../examples/rust/markdownflow.rs"
 
-[[example]]
-name = "multi_format"
-path = "../examples/rust/multi_format.rs"
-
-[[example]]
-name = "reference_following"
-path = "../examples/rust/reference_following.rs"
-
-[[example]]
-name = "retrieve"
-path = "../examples/rust/retrieve.rs"
-
-[[example]]
-name = "session"
-path = "../examples/rust/session.rs"
-
-[[example]]
-name = "storage_backend"
-path = "../examples/rust/storage_backend.rs"
-
-[[example]]
-name = "storage_compression"
-path = "../examples/rust/storage_compression.rs"
-
-[[example]]
-name = "storage_migration"
-path = "../examples/rust/storage_migration.rs"
-
-[[example]]
-name = "storage_workspace"
-path = "../examples/rust/storage_workspace.rs"
-
-[[example]]
-name = "strategy_cross_document"
-path = "../examples/rust/strategy_cross_document.rs"
-
-[[example]]
-name = "strategy_hybrid"
-path = "../examples/rust/strategy_hybrid.rs"
-
-[[example]]
-name = "strategy_page_range"
-path = "../examples/rust/strategy_page_range.rs"
-
-[[example]]
-name = "streaming"
-path = "../examples/rust/streaming.rs"
-
-[[example]]
-name = "document_graph"
-path = "../examples/rust/document_graph.rs"
-
 [dependencies]
 # Async runtime
 tokio = { workspace = true }
diff --git a/rust/src/client/builder.rs b/rust/src/client/builder.rs
index 70c3682b..8a71cc0d 100644
--- a/rust/src/client/builder.rs
+++ b/rust/src/client/builder.rs
@@ -83,9 +83,6 @@ use crate::storage::Workspace;
 use super::engine::Engine;
 use super::events::EventEmitter;
 
-/// Default configuration file names to search for.
-const CONFIG_FILE_NAMES: &[&str] = &["vectorless.toml", "config.toml", ".vectorless.toml"];
-
 /// Builder for creating a [`Engine`] client.
 ///
 /// The builder uses sensible defaults and automatically loads
@@ -283,10 +280,9 @@ impl EngineBuilder {
     // LLM Configuration
     // ============================================================
 
-    /// Configure for OpenAI API.
+    /// Set the LLM API key.
     ///
-    /// Sets the API key and optionally the model to "gpt-4o" if not already set.
-    /// Use [`with_model`](EngineBuilder::with_model) before this to specify a different model.
+    /// If not set, reads from `OPENAI_API_KEY` environment variable.
     ///
     /// # Example
     ///
@@ -297,29 +293,21 @@ impl EngineBuilder {
     /// # async fn main() -> Result<(), vectorless::BuildError> {
     /// let engine = EngineBuilder::new()
     ///     .with_workspace("./data")
-    ///     .with_openai(std::env::var("OPENAI_API_KEY").unwrap())
+    ///     .with_key("sk-...")
     ///     .build()
     ///     .await?;
     /// # Ok(())
     /// # }
     /// ```
     #[must_use]
-    pub fn with_openai(self, api_key: impl Into<String>) -> Self {
-        let mut builder = self;
-        builder.api_key = Some(api_key.into());
-        // Only set default model if not already set
-        if builder.model.is_none() {
-            builder.model = Some("gpt-4o".to_string());
-        }
-        builder
+    pub fn with_key(mut self, key: impl Into<String>) -> Self {
+        self.api_key = Some(key.into());
+        self
     }
 
-    /// Set the LLM model and optional API key.
+    /// Set the LLM model name.
     ///
-    /// # Arguments
-    ///
-    /// * `model` - Model name (e.g., "gpt-4o", "gpt-4o-mini", "claude-3-5-sonnet")
-    /// * `api_key` - Optional API key (uses environment variable if not provided)
+    /// Default: "gpt-4o".
     ///
     /// # Example
     ///
@@ -330,18 +318,15 @@ impl EngineBuilder {
     /// # async fn main() -> Result<(), vectorless::BuildError> {
     /// let engine = EngineBuilder::new()
     ///     .with_workspace("./data")
-    ///     .with_model("gpt-4o-mini", Some("sk-...".to_string()))
+    ///     .with_model("gpt-4o-mini")
     ///     .build()
     ///     .await?;
     /// # Ok(())
     /// # }
     /// ```
     #[must_use]
-    pub fn with_model(mut self, model: impl Into<String>, api_key: Option<String>) -> Self {
+    pub fn with_model(mut self, model: impl Into<String>) -> Self {
         self.model = Some(model.into());
-        if api_key.is_some() {
-            self.api_key = api_key;
-        }
         self
     }
 
@@ -358,7 +343,7 @@ impl EngineBuilder {
     /// # async fn main() -> Result<(), vectorless::BuildError> {
     /// let engine = EngineBuilder::new()
     ///     .with_workspace("./data")
-    ///     .with_model("deepseek-chat", Some("sk-...".to_string()))
+    ///     .with_model("deepseek-chat")
     ///     .with_endpoint("https://api.deepseek.com/v1")
     ///     .build()
     ///     .await?;
@@ -465,37 +450,6 @@ impl EngineBuilder {
         }
     }
 
-    /// Search for config file in current directory and parent directories.
-    fn find_config_file() -> Option<PathBuf> {
-        let current_dir = std::env::current_dir().ok()?;
-
-        // Search in current directory first
-        for name in CONFIG_FILE_NAMES {
-            let path = current_dir.join(name);
-            if path.exists() {
-                return Some(path);
-            }
-        }
-
-        // Search in parent directories (up to 3 levels)
-        let mut dir = current_dir.as_path();
-        for _ in 0..3 {
-            if let Some(parent) = dir.parent() {
-                for name in CONFIG_FILE_NAMES {
-                    let path = parent.join(name);
-                    if path.exists() {
-                        return Some(path);
-                    }
-                }
-                dir = parent;
-            } else {
-                break;
-            }
-        }
-
-        None
-    }
-
     /// Build the Engine client.
     ///
     /// # Errors
@@ -514,7 +468,7 @@ impl EngineBuilder {
     /// # async fn main() -> Result<(), vectorless::BuildError> {
     /// let engine = EngineBuilder::new()
     ///     .with_workspace("./data")
-    ///     .with_openai(std::env::var("OPENAI_API_KEY").unwrap())
+    ///     .with_key(std::env::var("OPENAI_API_KEY").unwrap())
     ///     .build()
     ///     .await?;
     /// # Ok(())
@@ -533,10 +487,6 @@ impl EngineBuilder {
                 .file(&path)
                 .load()
                 .map_err(|e| BuildError::Config(e.to_string()))?
-        } else if let Some(config_path) = Self::find_config_file() {
-            ConfigLoader::new().file(&config_path).load().map_err(|e| {
-                BuildError::Config(format!("Failed to load {}: {}", config_path.display(), e))
-            })?
         } else {
             // No config file - use defaults with env var overrides
             let mut cfg = Config::default();
@@ -699,18 +649,27 @@ mod tests {
     }
 
     #[test]
-    fn test_builder_with_openai() {
-        let builder = EngineBuilder::new().with_openai("sk-test-key");
+    fn test_builder_with_key() {
+        let builder = EngineBuilder::new().with_key("sk-test-key");
 
-        assert_eq!(builder.model, Some("gpt-4o".to_string()));
         assert_eq!(builder.api_key, Some("sk-test-key".to_string()));
     }
 
     #[test]
     fn test_builder_with_model() {
-        let builder = EngineBuilder::new().with_model("gpt-4o-mini", Some("sk-test".to_string()));
+        let builder = EngineBuilder::new().with_model("gpt-4o-mini");
+
+        assert_eq!(builder.model, Some("gpt-4o-mini".to_string()));
+    }
+
+    #[test]
+    fn test_builder_with_key_and_model() {
+        let builder = EngineBuilder::new()
+            .with_model("gpt-4o-mini")
+            .with_key("sk-test");
 
         assert_eq!(builder.model, Some("gpt-4o-mini".to_string()));
+        assert_eq!(builder.api_key, Some("sk-test".to_string()));
     }
 
     #[test]
diff --git a/rust/src/client/context.rs b/rust/src/client/context.rs
deleted file mode 100644
index 3b8a7acd..00000000
--- a/rust/src/client/context.rs
+++ /dev/null
@@ -1,334 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! Request context and configuration.
-//!
-//! This module provides request-scoped configuration and state management
-//! for client operations. It allows overriding global configuration on a
-//! per-request basis.
-//!
-//! # Example
-//!
-//! ```rust,ignore
-//! let ctx = ClientContext::new()
-//!     .with_top_k(10)
-//!     .with_token_budget(8000)
-//!     .with_timeout(Duration::from_secs(30));
-//!
-//! let result = client.query_with_context(&doc_id, "query", &ctx).await?;
-//! ```
-
-use std::collections::HashMap;
-use std::time::{Duration, Instant};
-
-use uuid::Uuid;
-
-use crate::retrieval::content::OutputFormatConfig;
-
-/// Request context for client operations.
-///
-/// Provides request-scoped configuration overrides and metadata.
-#[derive(Debug, Clone)]
-pub struct ClientContext {
-    /// Unique request ID for tracing.
-    pub request_id: Uuid,
-
-    /// Request-specific configuration overrides.
-    pub config: RequestContextConfig,
-
-    /// Request metadata (custom key-value pairs).
-    pub metadata: HashMap<String, String>,
-
-    /// Request deadline (for timeout).
-    pub deadline: Option<Instant>,
-
-    /// Priority (higher = more important).
-    pub priority: u8,
-}
-
-impl Default for ClientContext {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-impl ClientContext {
-    /// Create a new context with defaults.
-    pub fn new() -> Self {
-        Self {
-            request_id: Uuid::new_v4(),
-            config: RequestContextConfig::default(),
-            metadata: HashMap::new(),
-            deadline: None,
-            priority: 5, // Default priority
-        }
-    }
-
-    /// Create a context with a specific request ID.
-    pub fn with_id(id: Uuid) -> Self {
-        Self {
-            request_id: id,
-            ..Self::new()
-        }
-    }
-
-    /// Set the top_k override for retrieval.
-    pub fn with_top_k(mut self, top_k: usize) -> Self {
-        self.config.top_k = Some(top_k);
-        self
-    }
-
-    /// Set the token budget override.
-    pub fn with_token_budget(mut self, budget: usize) -> Self {
-        self.config.token_budget = Some(budget);
-        self
-    }
-
-    /// Set the content format override.
-    pub fn with_content_format(mut self, format: OutputFormatConfig) -> Self {
-        self.config.content_format = Some(format);
-        self
-    }
-
-    /// Set whether to include summaries.
-    pub fn with_summaries(mut self, include: bool) -> Self {
-        self.config.features.include_summaries = include;
-        self
-    }
-
-    /// Set whether to include content.
-    pub fn with_content(mut self, include: bool) -> Self {
-        self.config.features.include_content = include;
-        self
-    }
-
-    /// Set whether to enable caching.
-    pub fn with_cache(mut self, enable: bool) -> Self {
-        self.config.features.enable_cache = enable;
-        self
-    }
-
-    /// Set whether to enable sufficiency checking.
-    pub fn with_sufficiency_check(mut self, enable: bool) -> Self {
-        self.config.features.enable_sufficiency_check = enable;
-        self
-    }
-
-    /// Set a timeout duration.
-    pub fn with_timeout(mut self, duration: Duration) -> Self {
-        self.deadline = Some(Instant::now() + duration);
-        self
-    }
-
-    /// Set a deadline.
-    pub fn with_deadline(mut self, deadline: Instant) -> Self {
-        self.deadline = Some(deadline);
-        self
-    }
-
-    /// Set the priority (0-10, higher = more important).
-    pub fn with_priority(mut self, priority: u8) -> Self {
-        self.priority = priority.min(10);
-        self
-    }
-
-    /// Add metadata.
-    pub fn with_metadata(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
-        self.metadata.insert(key.into(), value.into());
-        self
-    }
-
-    /// Check if the request has timed out.
-    pub fn is_timed_out(&self) -> bool {
-        self.deadline.map(|d| Instant::now() > d).unwrap_or(false)
-    }
-
-    /// Get remaining time until deadline.
-    pub fn remaining_time(&self) -> Option<Duration> {
-        self.deadline
-            .map(|d| d.saturating_duration_since(Instant::now()))
-    }
-
-    /// Merge with another context (other takes precedence).
-    pub fn merge(&self, other: &ClientContext) -> ClientContext {
-        let mut merged = self.clone();
-        merged.request_id = other.request_id;
-
-        if other.config.top_k.is_some() {
-            merged.config.top_k = other.config.top_k;
-        }
-        if other.config.token_budget.is_some() {
-            merged.config.token_budget = other.config.token_budget;
-        }
-        if other.config.content_format.is_some() {
-            merged.config.content_format = other.config.content_format.clone();
-        }
-        if other.deadline.is_some() {
-            merged.deadline = other.deadline;
-        }
-        if other.priority != 5 {
-            merged.priority = other.priority;
-        }
-
-        // Merge metadata
-        for (k, v) in &other.metadata {
-            merged.metadata.insert(k.clone(), v.clone());
-        }
-
-        // Merge feature flags
-        merged.config.features = FeatureFlags {
-            include_summaries: other.config.features.include_summaries,
-            include_content: other.config.features.include_content,
-            enable_cache: other.config.features.enable_cache,
-            enable_sufficiency_check: other.config.features.enable_sufficiency_check,
-        };
-
-        merged
-    }
-}
-
-/// Request-specific configuration overrides.
-#[derive(Debug, Clone, Default)]
-pub struct RequestContextConfig {
-    /// Override top_k for retrieval.
-    pub top_k: Option<usize>,
-
-    /// Override token budget.
-    pub token_budget: Option<usize>,
-
-    /// Override content format.
-    pub content_format: Option<OutputFormatConfig>,
-
-    /// Feature flags.
-    pub features: FeatureFlags,
-}
-
-/// Feature flags for request.
-#[derive(Debug, Clone, Copy)]
-pub struct FeatureFlags {
-    /// Include summaries in results.
-    pub include_summaries: bool,
-
-    /// Include content in results.
-    pub include_content: bool,
-
-    /// Enable result caching.
-    pub enable_cache: bool,
-
-    /// Enable sufficiency checking.
-    pub enable_sufficiency_check: bool,
-}
-
-impl Default for FeatureFlags {
-    fn default() -> Self {
-        Self {
-            include_summaries: true,
-            include_content: true,
-            enable_cache: true,
-            enable_sufficiency_check: true,
-        }
-    }
-}
-
-impl FeatureFlags {
-    /// Create with all features enabled.
-    pub fn all() -> Self {
-        Self {
-            include_summaries: true,
-            include_content: true,
-            enable_cache: true,
-            enable_sufficiency_check: true,
-        }
-    }
-
-    /// Create with minimal features (fastest).
-    pub fn minimal() -> Self {
-        Self {
-            include_summaries: false,
-            include_content: true,
-            enable_cache: false,
-            enable_sufficiency_check: false,
-        }
-    }
-
-    /// Create for deep analysis.
-    pub fn deep() -> Self {
-        Self {
-            include_summaries: true,
-            include_content: true,
-            enable_cache: true,
-            enable_sufficiency_check: true,
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_context_creation() {
-        let ctx = ClientContext::new();
-        assert!(!ctx.request_id.is_nil());
-        assert!(ctx.config.top_k.is_none());
-        assert!(ctx.deadline.is_none());
-    }
-
-    #[test]
-    fn test_context_with_overrides() {
-        let ctx = ClientContext::new()
-            .with_top_k(10)
-            .with_token_budget(8000)
-            .with_cache(false);
-
-        assert_eq!(ctx.config.top_k, Some(10));
-        assert_eq!(ctx.config.token_budget, Some(8000));
-        assert!(!ctx.config.features.enable_cache);
-    }
-
-    #[test]
-    fn test_context_timeout() {
-        let ctx = ClientContext::new().with_timeout(Duration::from_millis(100));
-
-        assert!(!ctx.is_timed_out());
-        assert!(ctx.remaining_time().is_some());
-    }
-
-    #[test]
-    fn test_context_metadata() {
-        let ctx = ClientContext::new()
-            .with_metadata("user", "test")
-            .with_metadata("version", "1.0");
-
-        assert_eq!(ctx.metadata.get("user"), Some(&"test".to_string()));
-        assert_eq!(ctx.metadata.get("version"), Some(&"1.0".to_string()));
-    }
-
-    #[test]
-    fn test_context_merge() {
-        let ctx1 = ClientContext::new()
-            .with_top_k(5)
-            .with_metadata("key1", "value1");
-
-        let ctx2 = ClientContext::new()
-            .with_top_k(10)
-            .with_metadata("key2", "value2");
-
-        let merged = ctx1.merge(&ctx2);
-
-        assert_eq!(merged.config.top_k, Some(10));
-        assert_eq!(merged.metadata.get("key1"), Some(&"value1".to_string()));
-        assert_eq!(merged.metadata.get("key2"), Some(&"value2".to_string()));
-    }
-
-    #[test]
-    fn test_feature_flags() {
-        let all = FeatureFlags::all();
-        assert!(all.include_summaries);
-        assert!(all.include_content);
-
-        let minimal = FeatureFlags::minimal();
-        assert!(!minimal.include_summaries);
-        assert!(!minimal.enable_cache);
-    }
-}
diff --git a/rust/src/client/engine.rs b/rust/src/client/engine.rs
index 4297d8e0..e075623a 100644
--- a/rust/src/client/engine.rs
+++ b/rust/src/client/engine.rs
@@ -3,45 +3,33 @@
 
 //! Main Engine client - the entry point for vectorless.
 //!
-//! This module provides the main client for document indexing and retrieval.
-//! The Engine is an orchestrator that delegates to specialized sub-clients.
+//! The Engine provides a unified API for document indexing and retrieval:
 //!
-//! # Architecture
-//!
-//! ```text
-//! Engine (Orchestrator)
-//! ├── IndexerClient   → Document indexing
-//! ├── RetrieverClient → Query and retrieval
-//! ├── WorkspaceClient → Document persistence
-//! └── EventEmitter    → Progress and events
-//! ```
+//! - [`index`](Engine::index) — Index documents from files, content, or bytes
+//! - [`query`](Engine::query) — Query documents using natural language
+//! - [`query_stream`](Engine::query_stream) — Query with streaming results
 //!
 //! # Example
 //!
 //! ```rust,no_run
-//! use vectorless::client::{Engine, EngineBuilder, IndexContext};
+//! use vectorless::client::{EngineBuilder, IndexContext, QueryContext};
 //!
 //! # #[tokio::main]
 //! # async fn main() -> Result<(), Box<dyn std::error::Error>> {
-//! // Create a client
-//! let client = EngineBuilder::new()
-//!     .with_workspace("./my_workspace")
+//! let engine = EngineBuilder::new()
+//!     .with_workspace("./data")
 //!     .build()
 //!     .await?;
 //!
-//! // Index a document from file
-//! let doc_id = client.index(IndexContext::from_path("./document.md")).await?;
+//! // Index a document
+//! let result = engine.index(IndexContext::from_path("./document.md")).await?;
+//! let doc_id = result.doc_id().unwrap();
 //!
-//! // Index HTML content
-//! let html = "<html><body><h1>Title</h1><p>Content</p></body></html>";
-//! let doc_id2 = client.index(
-//!     IndexContext::from_content(html, vectorless::parser::DocumentFormat::Html)
-//!         .with_name("webpage")
+//! // Query
+//! let result = engine.query(
+//!     QueryContext::new("What is this?").with_doc_id(doc_id)
 //! ).await?;
 //!
-//! // Query the document
-//! let result = client.query(&doc_id, "What is this?").await?;
-//!
 //! println!("Found: {}", result.content);
 //! # Ok(())
 //! # }
@@ -54,17 +42,16 @@ use tracing::info;
 use crate::config::Config;
 use crate::error::Result;
 use crate::index::PipelineExecutor;
-use crate::retrieval::{PipelineRetriever, RetrieveOptions};
+use crate::retrieval::{PipelineRetriever, RetrieveEventReceiver};
 use crate::storage::Workspace;
 use crate::{DocumentTree, Error};
 
-use super::context::ClientContext;
 use super::events::EventEmitter;
 use super::index_context::IndexContext;
 use super::indexer::IndexerClient;
+use super::query_context::QueryContext;
 use super::retriever::RetrieverClient;
-use super::session::Session;
-use super::types::{DocumentInfo, QueryResult};
+use super::types::{DocumentInfo, IndexItem, IndexResult, QueryResult};
 use super::workspace::WorkspaceClient;
 
 /// The main Engine client.
@@ -98,29 +85,6 @@ pub struct Engine {
 }
 
 impl Engine {
-    /// Create a builder for custom configuration.
-    #[must_use]
-    pub fn builder() -> super::EngineBuilder {
-        super::EngineBuilder::new()
-    }
-
-    /// Create a new client with default configuration.
-    ///
-    /// Note: Prefer using [`Engine::builder()`] for more control.
-    async fn new() -> Result<Self> {
-        let config = Config::default();
-        let workspace = Workspace::new("./workspace")
-            .await
-            .map_err(|e| Error::Workspace(e.to_string()))?;
-        Self::with_components(
-            config,
-            workspace,
-            PipelineRetriever::new(),
-            PipelineExecutor::new(),
-        )
-        .await
-    }
-
     // ============================================================
     // Constructor (for Builder)
     // ============================================================
@@ -162,30 +126,15 @@ impl Engine {
 
     /// Index a document.
     ///
-    /// This is the main entry point for indexing documents. The [`IndexContext`]
-    /// parameter specifies the source (file path, content string, or bytes)
-    /// and indexing options.
+    /// Accepts an [`IndexContext`] that specifies the source (file path,
+    /// content string, or bytes) and indexing options.
     ///
-    /// # Arguments
-    ///
-    /// * `ctx` - The index context containing source and options
-    ///
-    /// # Returns
-    ///
-    /// A unique document ID string.
-    ///
-    /// # Errors
-    ///
-    /// Returns an error if:
-    /// - The file does not exist (for path sources)
-    /// - The file format is not supported
-    /// - The pipeline execution fails
+    /// Returns an [`IndexResult`] containing the indexed document metadata.
     ///
     /// # Example
     ///
     /// ```rust,no_run
-    /// use vectorless::client::{Engine, EngineBuilder, IndexContext, IndexMode};
-    /// use vectorless::parser::DocumentFormat;
+    /// use vectorless::client::{EngineBuilder, IndexContext};
     ///
     /// # #[tokio::main]
     /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
@@ -194,30 +143,16 @@ impl Engine {
     ///     .build()
     ///     .await?;
     ///
-    /// // From file
-    /// let id1 = engine.index(IndexContext::from_path("./doc.md")).await?;
-    ///
-    /// // From content
-    /// let html = "<html><body><h1>Title</h1></body></html>";
-    /// let id2 = engine.index(
-    ///     IndexContext::from_content(html, DocumentFormat::Html)
-    ///         .with_name("webpage")
-    /// ).await?;
-    ///
-    /// // From bytes with force mode
-    /// let pdf_bytes = std::fs::read("./doc.pdf")?;
-    /// let id3 = engine.index(
-    ///     IndexContext::from_bytes(pdf_bytes, DocumentFormat::Pdf)
-    ///         .with_mode(IndexMode::Force)
-    /// ).await?;
+    /// let result = engine.index(IndexContext::from_path("./doc.md")).await?;
+    /// println!("Indexed: {}", result.doc_id().unwrap());
     /// # Ok(())
     /// # }
     /// ```
-    pub async fn index(&self, ctx: IndexContext) -> Result<String> {
-        println!("Indexing...");
-        println!("ctx: {:?}", ctx);
-        
+    pub async fn index(&self, ctx: IndexContext) -> Result<IndexResult> {
         let doc = self.indexer.index(ctx).await?;
+
+        let item = IndexItem::new(doc.id.clone(), doc.name.clone(), doc.format.clone());
+
         let persisted = self.indexer.to_persisted(doc);
 
         // Save to workspace if configured
@@ -225,9 +160,8 @@ impl Engine {
             workspace.save(&persisted).await?;
         }
 
-        let doc_id = persisted.meta.id.clone();
-        info!("Indexed document: {}", doc_id);
-        Ok(doc_id)
+        info!("Indexed document: {}", item.doc_id);
+        Ok(IndexResult::new(vec![item]))
     }
 
     // ============================================================
@@ -236,101 +170,67 @@ impl Engine {
 
     /// Query a document.
     ///
-    /// Uses the adaptive retriever to find relevant content.
+    /// Accepts a [`QueryContext`] that specifies the query text, target document,
+    /// and optional retrieval parameters.
     ///
-    /// # Errors
+    /// # Example
     ///
-    /// Returns an error if:
-    /// - No workspace is configured
-    /// - The document is not found
-    /// - The retrieval fails
-    pub async fn query(&self, doc_id: &str, question: &str) -> Result<QueryResult> {
-        let tree = self.get_structure(doc_id).await?;
+    /// ```rust,no_run
+    /// use vectorless::client::{EngineBuilder, IndexContext, QueryContext};
+    ///
+    /// # #[tokio::main]
+    /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
+    /// let engine = EngineBuilder::new()
+    ///     .with_workspace("./data")
+    ///     .build()
+    ///     .await?;
+    ///
+    /// let result = engine.query(
+    ///     QueryContext::new("What is the total revenue?")
+    ///         .with_doc_id("doc-123")
+    /// ).await?;
+    ///
+    /// println!("Answer: {}", result.content);
+    /// # Ok(())
+    /// # }
+    /// ```
+    pub async fn query(&self, ctx: QueryContext) -> Result<QueryResult> {
+        let doc_id = ctx.doc_id.as_deref().ok_or_else(|| {
+            Error::Config("doc_id is required for query".to_string())
+        })?;
 
-        let options = RetrieveOptions::new()
-            .with_top_k(self.config.retrieval.top_k)
-            .with_include_content(true)
-            .with_include_summaries(true);
+        let tree = self.get_structure(doc_id).await?;
+        let options = ctx.to_retrieve_options(&self.config);
 
-        let mut result = self.retriever.query(&tree, question, &options).await?;
+        let mut result = self.retriever.query(&tree, &ctx.query, &options).await?;
         result.doc_id = doc_id.to_string();
 
         Ok(result)
     }
 
-    /// Query a document with context.
+    /// Query a document with streaming results.
     ///
-    /// Allows request-specific configuration overrides.
-    pub async fn query_with_context(
-        &self,
-        doc_id: &str,
-        question: &str,
-        ctx: &ClientContext,
-    ) -> Result<QueryResult> {
-        let tree = self.get_structure(doc_id).await?;
+    /// Returns a [`RetrieveEventReceiver`] that yields [`RetrieveEvent`](crate::retrieval::RetrieveEvent)s
+    /// as the retrieval pipeline progresses through each stage.
+    pub async fn query_stream(&self, ctx: QueryContext) -> Result<RetrieveEventReceiver> {
+        let doc_id = ctx.doc_id.as_deref().ok_or_else(|| {
+            Error::Config("doc_id is required for query".to_string())
+        })?;
 
-        let mut options = RetrieveOptions::new()
-            .with_top_k(self.config.retrieval.top_k)
-            .with_include_content(true)
-            .with_include_summaries(true);
-
-        // Apply context overrides
-        if let Some(top_k) = ctx.config.top_k {
-            options.top_k = top_k;
-        }
-        if let Some(token_budget) = ctx.config.token_budget {
-            options.max_tokens = token_budget;
-        }
-
-        let mut result = self
-            .retriever
-            .query_with_context(&tree, question, &options, ctx)
-            .await?;
-        result.doc_id = doc_id.to_string();
-
-        Ok(result)
-    }
+        let tree = self.get_structure(doc_id).await?;
+        let options = ctx.to_retrieve_options(&self.config);
 
-    // ============================================================
-    // Session Management
-    // ============================================================
+        let rx = self.retriever.query_stream(&tree, &ctx.query, &options).await?;
 
-    /// Create a session for multi-document operations.
-    ///
-    /// Sessions provide:
-    /// - Automatic caching of document trees
-    /// - Cross-document queries
-    /// - Session statistics
-    pub async fn session(&self) -> Session {
-        let workspace = match &self.workspace {
-            Some(ws) => ws.clone(),
-            None => {
-                // Create a temporary workspace if none configured
-                let async_ws = Workspace::new("./temp_workspace")
-                    .await
-                    .expect("Failed to create temp workspace");
-                WorkspaceClient::new(async_ws).await
-            }
-        };
-
-        Session::new(
-            self.indexer.clone(),
-            self.retriever.clone(),
-            workspace,
-            self.events.clone(),
-        )
+        Ok(rx)
     }
 
     // ============================================================
-    // Document Retrieval
+    // Document Management
     // ============================================================
 
     /// Get a list of all indexed documents.
-    ///
-    /// # Errors
-    ///
-    /// Returns an error if the workspace operation fails.
-    pub async fn list_documents(&self) -> Result<Vec<DocumentInfo>> {
+    pub async fn list(&self) -> Result<Vec<DocumentInfo>> {
         let workspace = self
             .workspace
             .as_ref()
@@ -339,124 +239,7 @@ impl Engine {
         workspace.list().await
     }
 
-    /// Get document structure (tree).
-    ///
-    /// # Errors
-    ///
-    /// Returns an error if:
-    /// - No workspace is configured
-    /// - The document is not found
-    pub async fn get_structure(&self, doc_id: &str) -> Result<DocumentTree> {
-        let workspace = self
-            .workspace
-            .as_ref()
-            .ok_or_else(|| Error::Config("No workspace configured".to_string()))?;
-
-        let doc = workspace
-            .load(doc_id)
-            .await?
-            .ok_or_else(|| Error::DocumentNotFound(format!("Document not found: {}", doc_id)))?;
-
-        Ok(doc.tree)
-    }
-
-    /// Get page content for PDFs.
-    ///
-    /// # Errors
-    ///
-    /// Returns an error if:
-    /// - No workspace is configured
-    /// - The document is not found
-    /// - No page content is available
-    pub async fn get_page_content(&self, doc_id: &str, pages: &str) -> Result<String> {
-        let workspace = self
-            .workspace
-            .as_ref()
-            .ok_or_else(|| Error::Config("No workspace configured".to_string()))?;
-
-        let doc = workspace
-            .load(doc_id)
-            .await?
-            .ok_or_else(|| Error::DocumentNotFound(format!("Document not found: {}", doc_id)))?;
-
-        if doc.pages.is_empty() {
-            return Err(Error::Parse("No page content available".to_string()));
-        }
-
-        let page_nums = self.parse_page_range(pages)?;
-
-        let mut content = String::new();
-        for page_num in page_nums {
-            if let Some(page) = doc.pages.iter().find(|p| p.page == page_num) {
-                content.push_str(&format!("--- Page {} ---\n", page_num));
-                content.push_str(&page.content);
-                content.push_str("\n\n");
-            }
-        }
-
-        Ok(content)
-    }
-
-    /// Parse a page range string into page numbers.
-    fn parse_page_range(&self, pages: &str) -> Result<Vec<usize>> {
-        let mut result = Vec::new();
-
-        for part in pages.split(',') {
-            let part = part.trim();
-            if part.contains('-') {
-                let range: Vec<&str> = part.split('-').collect();
-                if range.len() == 2 {
-                    let start: usize = range[0]
-                        .parse()
-                        .map_err(|_| Error::Parse(format!("Invalid page number: {}", range[0])))?;
-                    let end: usize = range[1]
-                        .parse()
-                        .map_err(|_| Error::Parse(format!("Invalid page number: {}", range[1])))?;
-                    for p in start..=end {
-                        result.push(p);
-                    }
-                }
-            } else if !part.is_empty() {
-                let page: usize = part
-                    .parse()
-                    .map_err(|_| Error::Parse(format!("Invalid page number: {}", part)))?;
-                result.push(page);
-            }
-        }
-
-        Ok(result)
-    }
-
-    // ============================================================
-    // Persistence Operations
-    // ============================================================
-
-    /// Load a document from the workspace into cache.
-    ///
-    /// This preloads the document into the LRU cache for faster access.
-    ///
-    /// # Errors
-    ///
-    /// Returns an error if no workspace is configured.
-    pub async fn load(&self, doc_id: &str) -> Result<bool> {
-        let workspace = self
-            .workspace
-            .as_ref()
-            .ok_or_else(|| Error::Config("No workspace configured".to_string()))?;
-
-        if !workspace.exists(doc_id).await? {
-            return Ok(false);
-        }
-
-        let _ = workspace.load(doc_id).await?;
-        Ok(true)
-    }
-
     /// Remove a document from the workspace.
-    ///
-    /// # Errors
-    ///
-    /// Returns an error if no workspace is configured.
     pub async fn remove(&self, doc_id: &str) -> Result<bool> {
         let workspace = self
             .workspace
@@ -467,10 +250,6 @@ impl Engine {
     }
 
     /// Check if a document exists in the workspace.
-    ///
-    /// # Errors
-    ///
-    /// Returns an error if no workspace is configured.
     pub async fn exists(&self, doc_id: &str) -> Result<bool> {
         let workspace = self
             .workspace
@@ -480,43 +259,9 @@ impl Engine {
         workspace.exists(doc_id).await
     }
 
-    /// Get metadata for a document.
-    ///
-    /// # Errors
-    ///
-    /// Returns an error if no workspace is configured.
-    pub async fn get_metadata(&self, doc_id: &str) -> Result<Option<DocumentInfo>> {
-        let workspace = self
-            .workspace
-            .as_ref()
-            .ok_or_else(|| Error::Config("No workspace configured".to_string()))?;
-
-        workspace.get_document_info(doc_id).await
-    }
-
-    /// Remove multiple documents from the workspace.
-    ///
-    /// Returns the number of documents successfully removed.
-    ///
-    /// # Errors
-    ///
-    /// Returns an error if no workspace is configured.
-    pub async fn batch_remove(&self, doc_ids: &[&str]) -> Result<usize> {
-        let workspace = self
-            .workspace
-            .as_ref()
-            .ok_or_else(|| Error::Config("No workspace configured".to_string()))?;
-
-        workspace.batch_remove(doc_ids).await
-    }
-
     /// Remove all documents from the workspace.
     ///
     /// Returns the number of documents removed.
-    ///
-    /// # Errors
-    ///
-    /// Returns an error if no workspace is configured.
     pub async fn clear(&self) -> Result<usize> {
         let workspace = self
             .workspace
@@ -526,51 +271,23 @@ impl Engine {
         workspace.clear().await
     }
 
-    /// Get the number of indexed documents.
-    ///
-    /// # Errors
-    ///
-    /// Returns an error if the workspace operation fails.
-    pub async fn len(&self) -> Result<usize> {
+    // ============================================================
+    // Internal
+    // ============================================================
+
+    /// Get document structure (tree). Internal use only.
+    pub(crate) async fn get_structure(&self, doc_id: &str) -> Result<DocumentTree> {
         let workspace = self
             .workspace
             .as_ref()
             .ok_or_else(|| Error::Config("No workspace configured".to_string()))?;
 
-        Ok(workspace.len().await)
-    }
-
-    /// Check if there are no documents.
-    ///
-    /// # Errors
-    ///
-    /// Returns an error if the workspace operation fails.
-    pub async fn is_empty(&self) -> Result<bool> {
-        Ok(self.len().await? == 0)
-    }
-
-    // ============================================================
-    // Sub-Client Access
-    // ============================================================
-
-    /// Get the indexer client.
-    pub fn indexer(&self) -> &IndexerClient {
-        &self.indexer
-    }
-
-    /// Get the retriever client.
-    pub fn retriever(&self) -> &RetrieverClient {
-        &self.retriever
-    }
-
-    /// Get the workspace client.
-    pub fn workspace(&self) -> Option<&WorkspaceClient> {
-        self.workspace.as_ref()
-    }
+        let doc = workspace
+            .load(doc_id)
+            .await?
+            .ok_or_else(|| Error::DocumentNotFound(format!("Document not found: {}", doc_id)))?;
 
-    /// Get the configuration.
-    pub fn config(&self) -> &Config {
-        &self.config
+        Ok(doc.tree)
     }
 }
 
@@ -596,12 +313,11 @@ impl std::fmt::Debug for Engine {
 
 #[cfg(test)]
 mod tests {
-    use super::*;
+    use super::super::EngineBuilder;
 
     #[test]
     fn test_engine_builder() {
-        let builder = Engine::builder();
-        // Builder exists
+        let builder = EngineBuilder::new();
         let _ = builder;
     }
 }
diff --git a/rust/src/client/events.rs b/rust/src/client/events.rs
index 1195ce70..4681b45b 100644
--- a/rust/src/client/events.rs
+++ b/rust/src/client/events.rs
@@ -170,26 +170,26 @@ pub enum WorkspaceEvent {
 }
 
 /// Sync event handler trait.
-pub trait EventHandler: Send + Sync {
+pub(crate) trait EventHandler: Send + Sync {
     /// Handle an event.
     fn handle(&self, event: &Event);
 }
 
 /// Async event handler trait.
 #[async_trait]
-pub trait AsyncEventHandler: Send + Sync {
+pub(crate) trait AsyncEventHandler: Send + Sync {
     /// Handle an event asynchronously.
     async fn handle(&self, event: &Event);
 }
 
 /// Type alias for sync index handler.
-pub type IndexHandler = Box<dyn Fn(&IndexEvent) + Send + Sync>;
+pub(crate) type IndexHandler = Box<dyn Fn(&IndexEvent) + Send + Sync>;
 
 /// Type alias for sync query handler.
-pub type QueryHandler = Box<dyn Fn(&QueryEvent) + Send + Sync>;
+pub(crate) type QueryHandler = Box<dyn Fn(&QueryEvent) + Send + Sync>;
 
 /// Type alias for sync workspace handler.
-pub type WorkspaceHandler = Box<dyn Fn(&WorkspaceEvent) + Send + Sync>;
+pub(crate) type WorkspaceHandler = Box<dyn Fn(&WorkspaceEvent) + Send + Sync>;
 
 /// Event emitter for client operations.
 ///
@@ -243,7 +243,7 @@ impl EventEmitter {
     }
 
     /// Add an async event handler.
-    pub fn with_async_handler<H>(mut self, handler: Arc<H>) -> Self
+    pub(crate) fn with_async_handler<H>(mut self, handler: Arc<H>) -> Self
     where
         H: AsyncEventHandler + 'static,
     {
diff --git a/rust/src/client/index_context.rs b/rust/src/client/index_context.rs
index e5e1741b..6c038eac 100644
--- a/rust/src/client/index_context.rs
+++ b/rust/src/client/index_context.rs
@@ -68,7 +68,7 @@ use super::types::{IndexMode, IndexOptions};
 /// This enum represents the different ways a document can be provided
 /// to the indexing pipeline.
 #[derive(Debug, Clone)]
-pub enum IndexSource {
+pub(crate) enum IndexSource {
     /// Load document from a file path.
     ///
     /// The format is detected from the file extension.
@@ -149,7 +149,7 @@ impl IndexSource {
 /// # Examples
 ///
 /// ```rust,no_run
-/// use vectorless::client::{Engine, EngineBuilder, IndexContext, IndexMode};
+/// use vectorless::client::{EngineBuilder, IndexContext, IndexMode};
 /// use vectorless::parser::DocumentFormat;
 ///
 /// # #[tokio::main]
@@ -334,11 +334,6 @@ impl IndexContext {
         self
     }
 
-    /// Get the source of this context.
-    pub fn source(&self) -> &IndexSource {
-        &self.source
-    }
-
     /// Get the document name, if set.
     pub fn name(&self) -> Option<&str> {
         self.name.as_deref()
@@ -362,6 +357,18 @@ impl From<&std::path::Path> for IndexContext {
     }
 }
 
+impl From<&str> for IndexContext {
+    fn from(path: &str) -> Self {
+        Self::from_path(path)
+    }
+}
+
+impl From<String> for IndexContext {
+    fn from(path: String) -> Self {
+        Self::from_path(path)
+    }
+}
+
 impl std::fmt::Display for IndexSource {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
diff --git a/rust/src/client/indexer.rs b/rust/src/client/indexer.rs
index 0ba0a847..cb87a0c1 100644
--- a/rust/src/client/indexer.rs
+++ b/rust/src/client/indexer.rs
@@ -31,7 +31,6 @@ use crate::index::{IndexInput, IndexMode, PipelineExecutor, PipelineOptions, Sum
 use crate::parser::DocumentFormat;
 use crate::storage::{DocumentMeta, PersistedDocument};
 
-use super::context::ClientContext;
 use super::events::{EventEmitter, IndexEvent};
 use super::index_context::{IndexContext, IndexSource};
 use super::types::{IndexOptions, IndexedDocument};
@@ -39,7 +38,7 @@ use super::types::{IndexOptions, IndexedDocument};
 /// Document indexing client.
 ///
 /// Provides operations for parsing and indexing documents.
-pub struct IndexerClient {
+pub(crate) struct IndexerClient {
     /// Pipeline executor.
     executor: Arc<Mutex<PipelineExecutor>>,
 
@@ -424,7 +423,7 @@ impl Clone for IndexerClient {
 
 /// Document validation result.
 #[derive(Debug, Clone)]
-pub struct ValidationResult {
+pub(crate) struct ValidationResult {
     /// Whether the document is valid for indexing.
     pub valid: bool,
 
diff --git a/rust/src/client/mod.rs b/rust/src/client/mod.rs
index a9289ddf..f3ba5a49 100644
--- a/rust/src/client/mod.rs
+++ b/rust/src/client/mod.rs
@@ -7,31 +7,12 @@
 //! - [`Engine`] — The main client for indexing and querying documents
 //! - [`EngineBuilder`] — Builder pattern for client configuration
 //! - [`IndexContext`] — Unified input for document indexing
-//! - [`Session`] — Multi-document session management
-//!
-//! # Architecture
-//!
-//! The client module is organized into specialized sub-modules:
-//!
-//! ```text
-//! client/
-//! ├── mod.rs           → Re-exports and documentation
-//! ├── engine.rs        → Main orchestrator
-//! ├── builder.rs       → Builder pattern
-//! ├── index_context.rs → Index input types
-//! ├── types.rs         → Public API types
-//! ├── context.rs       → Request context and configuration
-//! ├── session.rs       → Session management
-//! ├── indexer.rs       → Document indexing operations
-//! ├── retriever.rs     → Query and retrieval operations
-//! ├── workspace.rs     → Workspace CRUD operations
-//! └── events.rs        → Event system and callbacks
-//! ```
+//! - [`QueryContext`] — Unified input for document queries
 //!
 //! # Quick Start
 //!
 //! ```rust,no_run
-//! use vectorless::client::{Engine, EngineBuilder, IndexContext};
+//! use vectorless::client::{EngineBuilder, IndexContext, QueryContext};
 //!
 //! # #[tokio::main]
 //! # async fn main() -> Result<(), Box<dyn std::error::Error>> {
@@ -41,59 +22,30 @@
 //!     .build()
 //!     .await?;
 //!
-//! // Index a document from file
-//! let doc_id = client.index(IndexContext::from_path("./document.md")).await?;
-//!
-//! // Index HTML content directly
-//! let html = "<html><body><h1>Title</h1><p>Content</p></body></html>";
-//! let doc_id2 = client.index(
-//!     IndexContext::from_content(html, vectorless::parser::DocumentFormat::Html)
-//!         .with_name("webpage")
-//! ).await?;
+//! // Index a document
+//! let result = client.index(IndexContext::from_path("./document.md")).await?;
+//! let doc_id = result.doc_id().unwrap();
 //!
 //! // Query the document
-//! let result = client.query(&doc_id, "What is this?").await?;
+//! let result = client.query(
+//!     QueryContext::new("What is this?").with_doc_id(doc_id)
+//! ).await?;
 //! println!("{}", result.content);
 //!
 //! // List all documents
-//! for doc in client.list_documents().await? {
+//! for doc in client.list().await? {
 //!     println!("{}: {}", doc.id, doc.name);
 //! }
 //! # Ok(())
 //! # }
 //! ```
 //!
-//! # Session-Based Operations
-//!
-//! For multi-document operations, use sessions:
-//!
-//! ```rust,no_run
-//! # use vectorless::client::{Engine, EngineBuilder, IndexContext};
-//! # #[tokio::main]
-//! # async fn main() -> Result<(), Box<dyn std::error::Error>> {
-//! let client = EngineBuilder::new()
-//!     .with_workspace("./workspace")
-//!     .build()
-//!     .await?;
-//!
-//! let session = client.session().await;
-//!
-//! // Index multiple documents
-//! let doc1 = session.index(IndexContext::from_path("./doc1.md")).await?;
-//! let doc2 = session.index(IndexContext::from_path("./doc2.md")).await?;
-//!
-//! // Query across all documents
-//! let results = session.query_all("What is the architecture?").await?;
-//! # Ok(())
-//! # }
-//! ```
-//!
 //! # Events and Progress
 //!
 //! Monitor operation progress with events:
 //!
 //! ```rust,no_run
-//! # use vectorless::client::{Engine, EngineBuilder, EventEmitter, IndexEvent};
+//! # use vectorless::client::{EngineBuilder, EventEmitter, IndexEvent};
 //! # #[tokio::main]
 //! # async fn main() -> Result<(), Box<dyn std::error::Error>> {
 //! let events = EventEmitter::new()
@@ -109,23 +61,14 @@
 //! # Ok(())
 //! # }
 //! ```
-//!
-//! # Features
-//!
-//! - **Document Indexing** — Parse and index Markdown, PDF, and text files
-//! - **Tree-Based Structure** — Documents organized as hierarchical trees
-//! - **Workspace Persistence** — Save and load indexed documents
-//! - **Session Management** — Multi-document operations with caching
-//! - **Event System** — Progress callbacks and monitoring
 
 mod builder;
-mod context;
 mod engine;
 pub mod events;
 mod index_context;
 mod indexer;
+mod query_context;
 mod retriever;
-mod session;
 mod types;
 mod workspace;
 
@@ -137,53 +80,34 @@ pub use builder::{BuildError, EngineBuilder};
 pub use engine::Engine;
 
 // ============================================================
-// Index Context
+// Context Types
 // ============================================================
 
-pub use index_context::{IndexContext, IndexSource};
+pub use index_context::IndexContext;
+pub use query_context::QueryContext;
 
 // ============================================================
-// Sub-Clients
+// Events
 // ============================================================
 
-pub use indexer::IndexerClient;
-pub use retriever::RetrieverClient;
-pub use session::Session;
-pub use workspace::WorkspaceClient;
-
-// ============================================================
-// Context and Events
-// ============================================================
-
-pub use context::{ClientContext, FeatureFlags, RequestContextConfig};
-pub use events::{
-    AsyncEventHandler, Event, EventEmitter, EventHandler, IndexEvent, QueryEvent, WorkspaceEvent,
-};
+pub use events::EventEmitter;
 
 // ============================================================
-// Types
+// Result & Info Types
 // ============================================================
 
 pub use types::{
-    // Error types
     ClientError,
-    // Document info
     DocumentInfo,
-    // Index types
+    IndexItem,
     IndexMode,
     IndexOptions,
-    // Document types
-    IndexedDocument,
-    PageContent,
-    // Query types
+    IndexResult,
     QueryResult,
 };
 
 // ============================================================
-// Sub-Client Types
+// Parser Types (needed for IndexContext::from_content)
 // ============================================================
 
-pub use indexer::{IndexerConfig, ValidationResult};
-pub use retriever::{NodeContext, RetrieverClientConfig};
-pub use session::{EvictionPolicy, PreloadStrategy, SessionConfig, SessionStats};
-pub use workspace::{WorkspaceClientConfig, WorkspaceStats};
+pub use crate::parser::DocumentFormat;
diff --git a/rust/src/client/query_context.rs b/rust/src/client/query_context.rs
new file mode 100644
index 00000000..32b15378
--- /dev/null
+++ b/rust/src/client/query_context.rs
@@ -0,0 +1,171 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Query context for the Engine API.
+//!
+//! [`QueryContext`] encapsulates all parameters for a query operation,
+//! providing a builder pattern for configuration.
+//!
+//! # Example
+//!
+//! ```rust
+//! use vectorless::client::QueryContext;
+//!
+//! // Simple query
+//! let ctx = QueryContext::new("What is the total revenue?");
+//!
+//! // With document scope
+//! let ctx = QueryContext::new("What is the architecture?")
+//!     .with_doc_id("doc-abc123");
+//!
+//! // With options
+//! let ctx = QueryContext::new("Explain the algorithm")
+//!     .with_doc_id("doc-abc123")
+//!     .with_max_tokens(4000);
+//! ```
+
+use crate::config::Config;
+use crate::retrieval::{RetrieveOptions, StrategyPreference};
+
+/// Context for a query operation.
+///
+/// Encapsulates the query text, target document, and retrieval options.
+/// Use builder methods to configure.
+///
+/// # Convenience
+///
+/// Implements `From<String>` and `From<&str>` for quick construction:
+///
+/// ```rust
+/// use vectorless::client::QueryContext;
+///
+/// let ctx: QueryContext = "What is this?".into();
+/// ```
+#[derive(Debug, Clone)]
+pub struct QueryContext {
+    /// The query text.
+    pub(crate) query: String,
+    /// Target document ID. None means query all (not yet supported).
+    pub(crate) doc_id: Option<String>,
+    /// Maximum tokens for the result content.
+    pub(crate) max_tokens: Option<usize>,
+    /// Retrieval strategy override.
+    pub(crate) strategy: Option<StrategyPreference>,
+    /// Whether to include the reasoning chain in the result.
+    pub(crate) include_reasoning: bool,
+    /// Maximum tree traversal depth.
+    pub(crate) depth_limit: Option<usize>,
+}
+
+impl QueryContext {
+    /// Create a new query context with the given query text.
+    pub fn new(query: impl Into<String>) -> Self {
+        Self {
+            query: query.into(),
+            doc_id: None,
+            max_tokens: None,
+            strategy: None,
+            include_reasoning: true,
+            depth_limit: None,
+        }
+    }
+
+    /// Set the target document ID.
+    pub fn with_doc_id(mut self, doc_id: impl Into<String>) -> Self {
+        self.doc_id = Some(doc_id.into());
+        self
+    }
+
+    /// Set the maximum tokens for the result content.
+    pub fn with_max_tokens(mut self, tokens: usize) -> Self {
+        self.max_tokens = Some(tokens);
+        self
+    }
+
+    /// Set the retrieval strategy.
+    pub fn with_strategy(mut self, strategy: StrategyPreference) -> Self {
+        self.strategy = Some(strategy);
+        self
+    }
+
+    /// Set whether to include the reasoning chain.
+    pub fn with_include_reasoning(mut self, include: bool) -> Self {
+        self.include_reasoning = include;
+        self
+    }
+
+    /// Set the maximum tree traversal depth.
+    pub fn with_depth_limit(mut self, depth: usize) -> Self {
+        self.depth_limit = Some(depth);
+        self
+    }
+
+    /// Convert to internal `RetrieveOptions`, merging with engine config.
+    pub(crate) fn to_retrieve_options(&self, config: &Config) -> RetrieveOptions {
+        let mut opts = RetrieveOptions::new()
+            .with_top_k(config.retrieval.top_k)
+            .with_include_content(true)
+            .with_include_summaries(true);
+
+        if let Some(max_tokens) = self.max_tokens {
+            opts = opts.with_max_tokens(max_tokens);
+        }
+
+        if let Some(strategy) = &self.strategy {
+            opts = opts.with_strategy(strategy.clone());
+        }
+
+        opts
+    }
+}
+
+impl From<String> for QueryContext {
+    fn from(query: String) -> Self {
+        Self::new(query)
+    }
+}
+
+impl From<&str> for QueryContext {
+    fn from(query: &str) -> Self {
+        Self::new(query)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_query_context_new() {
+        let ctx = QueryContext::new("What is this?");
+        assert_eq!(ctx.query, "What is this?");
+        assert!(ctx.doc_id.is_none());
+        assert!(ctx.include_reasoning);
+    }
+
+    #[test]
+    fn test_query_context_from_string() {
+        let ctx: QueryContext = "Hello".to_string().into();
+        assert_eq!(ctx.query, "Hello");
+    }
+
+    #[test]
+    fn test_query_context_from_str() {
+        let ctx: QueryContext = "Hello".into();
+        assert_eq!(ctx.query, "Hello");
+    }
+
+    #[test]
+    fn test_query_context_builder() {
+        let ctx = QueryContext::new("test")
+            .with_doc_id("doc-1")
+            .with_max_tokens(4000)
+            .with_include_reasoning(false)
+            .with_depth_limit(5);
+
+        assert_eq!(ctx.doc_id, Some("doc-1".to_string()));
+        assert_eq!(ctx.max_tokens, Some(4000));
+        assert!(!ctx.include_reasoning);
+        assert_eq!(ctx.depth_limit, Some(5));
+    }
+}
diff --git a/rust/src/client/retriever.rs b/rust/src/client/retriever.rs
index c1760e6a..c5bd1aa1 100644
--- a/rust/src/client/retriever.rs
+++ b/rust/src/client/retriever.rs
@@ -30,15 +30,13 @@ use crate::retrieval::{
     QueryComplexity, RetrievalResult, RetrieveOptions, RetrieveResponse, Retriever,
     SufficiencyLevel,
 };
-
-use super::context::ClientContext;
 use super::events::{EventEmitter, QueryEvent};
 use super::types::QueryResult;
 
 /// Document retrieval client.
 ///
 /// Provides operations for querying document content.
-pub struct RetrieverClient {
+pub(crate) struct RetrieverClient {
     /// Pipeline retriever.
     retriever: Arc<crate::retrieval::PipelineRetriever>,
 
@@ -54,7 +52,7 @@ pub struct RetrieverClient {
 
 /// Retriever configuration.
 #[derive(Debug, Clone)]
-pub struct RetrieverClientConfig {
+pub(crate) struct RetrieverClientConfig {
     /// Default top_k for retrieval.
     pub default_top_k: usize,
 
@@ -123,56 +121,23 @@ impl RetrieverClient {
     ///
     /// # Errors
     ///
-    /// Returns an error if:
-    /// - The retrieval pipeline fails
+    /// Returns an error if the retrieval pipeline fails.
     pub async fn query(
         &self,
         tree: &DocumentTree,
         question: &str,
         options: &RetrieveOptions,
     ) -> Result<QueryResult> {
-        self.query_with_context(tree, question, options, &ClientContext::new())
-            .await
-    }
-
-    /// Query with request context.
-    ///
-    /// # Errors
-    ///
-    /// Returns an error if:
-    /// - The retrieval pipeline fails
-    /// - The request has timed out
-    pub async fn query_with_context(
-        &self,
-        tree: &DocumentTree,
-        question: &str,
-        options: &RetrieveOptions,
-        ctx: &ClientContext,
-    ) -> Result<QueryResult> {
-        // Check timeout
-        if ctx.is_timed_out() {
-            return Err(Error::Other("Request timed out".to_string()));
-        }
-
         self.events.emit_query(QueryEvent::Started {
             query: question.to_string(),
         });
 
         info!("Querying: {:?}", question);
 
-        // Apply context overrides
-        let mut options = options.clone();
-        if let Some(top_k) = ctx.config.top_k {
-            options.top_k = top_k;
-        }
-        if let Some(token_budget) = ctx.config.token_budget {
-            options.max_tokens = token_budget;
-        }
-
         // Execute retrieval
         let response = self
             .retriever
-            .retrieve(tree, question, &options)
+            .retrieve(tree, question, options)
             .await
             .map_err(|e| Error::Retrieval(e.to_string()))?;
 
@@ -465,7 +430,7 @@ impl Clone for RetrieverClient {
 
 /// Node context information.
 #[derive(Debug, Clone)]
-pub struct NodeContext {
+pub(crate) struct NodeContext {
     /// The target node.
     pub target: Option<RetrievalResult>,
 
diff --git a/rust/src/client/session.rs b/rust/src/client/session.rs
deleted file mode 100644
index b698bd35..00000000
--- a/rust/src/client/session.rs
+++ /dev/null
@@ -1,519 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! Session management for multi-document operations.
-//!
-//! This module provides session-based document management with
-//! automatic caching and cross-document querying.
-//!
-//! # Example
-//!
-//! ```rust,ignore
-//! use vectorless::client::IndexContext;
-//!
-//! let session = client.session();
-//!
-//! // Index multiple documents
-//! let doc1 = session.index(IndexContext::from_path("./doc1.md")).await?;
-//! let doc2 = session.index(IndexContext::from_path("./doc2.md")).await?;
-//!
-//! // Query across all documents
-//! let results = session.query_all("What is X?").await?;
-//!
-//! // Query single document (uses cached tree)
-//! let result = session.query(&doc1, "Summary?").await?;
-//! ```
-
-use std::cell::Cell;
-use std::collections::HashMap;
-use std::sync::Arc;
-use std::time::{Duration, Instant};
-
-use tracing::info;
-use uuid::Uuid;
-
-use crate::error::Result;
-use crate::retrieval::RetrieveOptions;
-use crate::storage::PersistedDocument;
-use crate::{DocumentTree, Error};
-
-use super::context::ClientContext;
-use super::events::EventEmitter;
-use super::indexer::IndexerClient;
-use super::retriever::RetrieverClient;
-use super::types::{DocumentInfo, QueryResult};
-use super::workspace::WorkspaceClient;
-
-/// Session for managing multiple documents.
-///
-/// Provides automatic caching of document trees and cross-document operations.
-pub struct Session {
-    /// Session ID.
-    pub id: Uuid,
-
-    /// Session configuration.
-    config: SessionConfig,
-
-    /// Document contexts (cached).
-    documents: HashMap<String, DocumentContext>,
-
-    /// Indexer client.
-    indexer: IndexerClient,
-
-    /// Retriever client.
-    retriever: RetrieverClient,
-
-    /// Workspace client.
-    workspace: WorkspaceClient,
-
-    /// Event emitter.
-    events: EventEmitter,
-
-    /// Session statistics.
-    stats: SessionStats,
-
-    /// Created at timestamp.
-    created_at: Instant,
-}
-
-/// Document context within a session.
-#[derive(Debug, Clone)]
-struct DocumentContext {
-    /// Document ID.
-    doc_id: String,
-
-    /// Cached document tree.
-    tree: Option<Arc<DocumentTree>>,
-
-    /// Document metadata.
-    meta: DocumentInfo,
-
-    /// Access count.
-    access_count: usize,
-
-    /// Last access time.
-    last_accessed: Instant,
-}
-
-/// Session configuration.
-#[derive(Debug, Clone)]
-pub struct SessionConfig {
-    /// Maximum documents to cache in memory.
-    pub max_cached_documents: usize,
-
-    /// Cache eviction policy.
-    pub eviction_policy: EvictionPolicy,
-
-    /// Preload strategy when indexing.
-    pub preload_strategy: PreloadStrategy,
-}
-
-impl Default for SessionConfig {
-    fn default() -> Self {
-        Self {
-            max_cached_documents: 100,
-            eviction_policy: EvictionPolicy::Lru,
-            preload_strategy: PreloadStrategy::Lazy,
-        }
-    }
-}
-
-/// Cache eviction policy.
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-pub enum EvictionPolicy {
-    /// Least recently used.
-    Lru,
-    /// First in, first out.
-    Fifo,
-    /// No eviction (until session closes).
-    None,
-}
-
-/// Document preload strategy.
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-pub enum PreloadStrategy {
-    /// Load trees on demand.
-    Lazy,
-    /// Load trees immediately when indexing.
-    Eager,
-}
-
-/// Session statistics.
-#[derive(Debug, Default)]
-pub struct SessionStats {
-    /// Total documents in session.
-    pub document_count: Cell<usize>,
-
-    /// Total queries made.
-    pub query_count: Cell<usize>,
-
-    /// Cache hits.
-    pub cache_hits: Cell<usize>,
-
-    /// Cache misses.
-    pub cache_misses: Cell<usize>,
-
-    /// Total query time (in microseconds).
-    total_query_time_us: Cell<u64>,
-}
-
-impl SessionStats {
-    /// Get the cache hit rate.
-    pub fn cache_hit_rate(&self) -> f32 {
-        let total = self.cache_hits.get() + self.cache_misses.get();
-        if total == 0 {
-            0.0
-        } else {
-            self.cache_hits.get() as f32 / total as f32
-        }
-    }
-
-    /// Get the total query time.
-    pub fn total_query_time(&self) -> Duration {
-        Duration::from_micros(self.total_query_time_us.get())
-    }
-
-    /// Get the average query time.
-    pub fn avg_query_time(&self) -> Option<Duration> {
-        let count = self.query_count.get();
-        if count == 0 {
-            None
-        } else {
-            Some(self.total_query_time() / count as u32)
-        }
-    }
-
-    /// Increment query count.
-    fn increment_query_count(&self) {
-        self.query_count.set(self.query_count.get() + 1);
-    }
-
-    /// Add query time.
-    fn add_query_time(&self, duration: Duration) {
-        self.total_query_time_us
-            .set(self.total_query_time_us.get() + duration.as_micros() as u64);
-    }
-
-    /// Increment cache hits.
-    fn increment_cache_hits(&self) {
-        self.cache_hits.set(self.cache_hits.get() + 1);
-    }
-
-    /// Increment cache misses.
-    fn increment_cache_misses(&self) {
-        self.cache_misses.set(self.cache_misses.get() + 1);
-    }
-}
-
-impl Clone for SessionStats {
-    fn clone(&self) -> Self {
-        Self {
-            document_count: Cell::new(self.document_count.get()),
-            query_count: Cell::new(self.query_count.get()),
-            cache_hits: Cell::new(self.cache_hits.get()),
-            cache_misses: Cell::new(self.cache_misses.get()),
-            total_query_time_us: Cell::new(self.total_query_time_us.get()),
-        }
-    }
-}
-
-impl Session {
-    /// Create a new session.
-    pub(crate) fn new(
-        indexer: IndexerClient,
-        retriever: RetrieverClient,
-        workspace: WorkspaceClient,
-        events: EventEmitter,
-    ) -> Self {
-        Self {
-            id: Uuid::new_v4(),
-            config: SessionConfig::default(),
-            documents: HashMap::new(),
-            indexer,
-            retriever,
-            workspace,
-            events,
-            stats: SessionStats::default(),
-            created_at: Instant::now(),
-        }
-    }
-
-    /// Create with configuration.
-    pub fn with_config(mut self, config: SessionConfig) -> Self {
-        self.config = config;
-        self
-    }
-
-    /// Get the session ID.
-    pub fn id(&self) -> Uuid {
-        self.id
-    }
-
-    /// Get session age.
-    pub fn age(&self) -> Duration {
-        Instant::now().duration_since(self.created_at)
-    }
-
-    // ============================================================
-    // Document Indexing
-    // ============================================================
-
-    /// Index a document into this session.
-    ///
-    /// The document is indexed, saved to workspace, and cached in this session.
-    ///
-    /// # Arguments
-    ///
-    /// * `ctx` - The index context containing source and options
-    ///
-    /// # Example
-    ///
-    /// ```rust,ignore
-    /// use vectorless::client::IndexContext;
-    /// use vectorless::parser::DocumentFormat;
-    ///
-    /// // From file
-    /// let id1 = session.index(IndexContext::from_path("./doc.md")).await?;
-    ///
-    /// // From content
-    /// let html = "<html><body>Content</body></html>";
-    /// let id2 = session.index(
-    ///     IndexContext::from_content(html, DocumentFormat::Html)
-    /// ).await?;
-    /// ```
-    pub async fn index(&self, ctx: super::IndexContext) -> Result<String> {
-        // Index the document
-        let doc = self.indexer.index(ctx).await?;
-
-        // Save to workspace
-        let persisted = self.indexer.to_persisted(doc);
-        self.workspace.save(&persisted).await?;
-
-        // Cache in session
-        let doc_id = persisted.meta.id.clone();
-
-        info!("Session {}: indexed document {}", self.id, doc_id);
-
-        Ok(doc_id)
-    }
-
-    // ============================================================
-    // Document Querying
-    // ============================================================
-
-    /// Query a document within this session.
-    ///
-    /// Uses the cached tree if available, otherwise loads from workspace.
-    pub async fn query(&self, doc_id: &str, question: &str) -> Result<QueryResult> {
-        self.query_with_options(doc_id, question, RetrieveOptions::default())
-            .await
-    }
-
-    /// Query a document with options.
-    pub async fn query_with_options(
-        &self,
-        doc_id: &str,
-        question: &str,
-        options: RetrieveOptions,
-    ) -> Result<QueryResult> {
-        let start = Instant::now();
-
-        // Get the document tree
-        let tree = self.get_tree(doc_id).await?;
-
-        // Query
-        let mut result = self.retriever.query(&tree, question, &options).await?;
-        result.doc_id = doc_id.to_string();
-
-        // Update stats
-        self.stats.increment_query_count();
-        self.stats.add_query_time(start.elapsed());
-
-        Ok(result)
-    }
-
-    /// Query across all documents in this session.
-    ///
-    /// Searches each document and merges results.
-    pub async fn query_all(&self, question: &str) -> Result<Vec<QueryResult>> {
-        self.query_all_with_options(question, RetrieveOptions::default())
-            .await
-    }
-
-    /// Query across all documents with options.
-    pub async fn query_all_with_options(
-        &self,
-        question: &str,
-        options: RetrieveOptions,
-    ) -> Result<Vec<QueryResult>> {
-        let doc_ids: Vec<String> = self.documents.keys().cloned().collect();
-
-        if doc_ids.is_empty() {
-            return Ok(Vec::new());
-        }
-
-        let mut results = Vec::new();
-
-        for doc_id in &doc_ids {
-            match self
-                .query_with_options(doc_id, question, options.clone())
-                .await
-            {
-                Ok(result) => {
-                    if !result.node_ids.is_empty() {
-                        results.push(result);
-                    }
-                }
-                Err(e) => {
-                    info!("Query failed for {}: {}", doc_id, e);
-                }
-            }
-        }
-
-        // Sort by score descending
-        results.sort_by(|a, b| {
-            b.score
-                .partial_cmp(&a.score)
-                .unwrap_or(std::cmp::Ordering::Equal)
-        });
-
-        Ok(results)
-    }
-
-    // ============================================================
-    // Document Management
-    // ============================================================
-
-    /// Get list of documents in this session.
-    pub fn list_documents(&self) -> Vec<DocumentInfo> {
-        self.documents
-            .values()
-            .map(|ctx| ctx.meta.clone())
-            .collect()
-    }
-
-    /// Get a document tree (from cache or workspace).
-    pub async fn get_tree(&self, doc_id: &str) -> Result<DocumentTree> {
-        // Check cache first
-        if let Some(tree) = self.get_cached_tree(doc_id) {
-            self.stats.increment_cache_hits();
-            return Ok((*tree).clone());
-        }
-
-        self.stats.increment_cache_misses();
-
-        // Load from workspace
-        let doc =
-            self.workspace.load(doc_id).await?.ok_or_else(|| {
-                Error::DocumentNotFound(format!("Document not found: {}", doc_id))
-            })?;
-
-        let tree = doc.tree;
-
-        // Cache for future use
-        self.cache_tree(doc_id, &tree);
-
-        Ok(tree)
-    }
-
-    /// Preload documents into the session cache.
-    ///
-    /// Useful for warming up the cache before querying.
-    pub async fn preload(&self, doc_ids: &[&str]) -> Result<usize> {
-        let mut loaded = 0;
-
-        for doc_id in doc_ids {
-            if self.get_cached_tree(doc_id).is_none() {
-                if let Ok(tree) = self.get_tree(doc_id).await {
-                    self.cache_tree(doc_id, &tree);
-                    loaded += 1;
-                }
-            }
-        }
-
-        info!("Session {}: preloaded {} documents", self.id, loaded);
-        Ok(loaded)
-    }
-
-    /// Remove a document from the session.
-    pub fn remove_document(&self, doc_id: &str) -> bool {
-        // Note: This would need interior mutability for full implementation
-        false
-    }
-
-    /// Clear all documents from the session cache.
-    pub fn clear_cache(&self) {
-        // Note: This would need interior mutability for full implementation
-    }
-
-    // ============================================================
-    // Statistics
-    // ============================================================
-
-    /// Get session statistics.
-    pub fn stats(&self) -> SessionStats {
-        self.stats.clone()
-    }
-
-    /// Get the number of cached documents.
-    pub fn cached_count(&self) -> usize {
-        self.documents.values().filter(|d| d.tree.is_some()).count()
-    }
-
-    // ============================================================
-    // Internal Methods
-    // ============================================================
-
-    /// Cache a document in this session.
-    fn cache_document(&self, doc: crate::client::types::IndexedDocument) {
-        // Note: This would need interior mutability for full implementation
-        // For now, this is a placeholder
-    }
-
-    /// Get a cached tree.
-    fn get_cached_tree(&self, doc_id: &str) -> Option<Arc<DocumentTree>> {
-        self.documents.get(doc_id).and_then(|ctx| ctx.tree.clone())
-    }
-
-    /// Cache a tree.
-    fn cache_tree(&self, doc_id: &str, tree: &DocumentTree) {
-        // Note: This would need interior mutability for full implementation
-    }
-}
-
-impl Clone for Session {
-    fn clone(&self) -> Self {
-        Self {
-            id: self.id,
-            config: self.config.clone(),
-            documents: self.documents.clone(),
-            indexer: self.indexer.clone(),
-            retriever: self.retriever.clone(),
-            workspace: self.workspace.clone(),
-            events: self.events.clone(),
-            stats: self.stats.clone(),
-            created_at: self.created_at,
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_session_config() {
-        let config = SessionConfig::default();
-        assert_eq!(config.max_cached_documents, 100);
-        assert_eq!(config.eviction_policy, EvictionPolicy::Lru);
-    }
-
-    #[test]
-    fn test_session_stats() {
-        let stats = SessionStats::default();
-        stats.cache_hits.set(8);
-        stats.cache_misses.set(2);
-
-        assert!((stats.cache_hit_rate() - 0.8).abs() < 0.01);
-    }
-}
diff --git a/rust/src/client/types.rs b/rust/src/client/types.rs
index 0cfcb065..06a8f287 100644
--- a/rust/src/client/types.rs
+++ b/rust/src/client/types.rs
@@ -213,6 +213,69 @@ impl IndexOptions {
     }
 }
 
+// ============================================================
+// Index Result Types
+// ============================================================
+
+/// Result of a document indexing operation.
+#[derive(Debug, Clone)]
+pub struct IndexResult {
+    /// Indexed items.
+    pub items: Vec<IndexItem>,
+}
+
+impl IndexResult {
+    /// Create a new index result.
+    pub fn new(items: Vec<IndexItem>) -> Self {
+        Self { items }
+    }
+
+    /// Get the single document ID (convenience for single-document indexing).
+    pub fn doc_id(&self) -> Option<&str> {
+        if self.items.len() == 1 {
+            Some(&self.items[0].doc_id)
+        } else {
+            None
+        }
+    }
+
+    /// Check if the result is empty.
+    pub fn is_empty(&self) -> bool {
+        self.items.is_empty()
+    }
+
+    /// Get the number of indexed items.
+    pub fn len(&self) -> usize {
+        self.items.len()
+    }
+}
+
+/// A single indexed document item.
+#[derive(Debug, Clone)]
+pub struct IndexItem {
+    /// The unique document ID.
+    pub doc_id: String,
+    /// The document name.
+    pub name: String,
+    /// The document format.
+    pub format: DocumentFormat,
+}
+
+impl IndexItem {
+    /// Create a new index item.
+    pub fn new(
+        doc_id: impl Into<String>,
+        name: impl Into<String>,
+        format: DocumentFormat,
+    ) -> Self {
+        Self {
+            doc_id: doc_id.into(),
+            name: name.into(),
+            format,
+        }
+    }
+}
+
 // ============================================================
 // Query Types
 // ============================================================
@@ -364,4 +427,32 @@ mod tests {
         assert_eq!(info.id, "doc-1");
         assert_eq!(info.format, "markdown");
     }
+
+    #[test]
+    fn test_index_result() {
+        let item = IndexItem::new("doc-1", "Test", DocumentFormat::Markdown);
+        let result = IndexResult::new(vec![item]);
+
+        assert_eq!(result.doc_id(), Some("doc-1"));
+        assert_eq!(result.len(), 1);
+        assert!(!result.is_empty());
+    }
+
+    #[test]
+    fn test_index_result_empty() {
+        let result = IndexResult::new(vec![]);
+        assert!(result.is_empty());
+        assert_eq!(result.doc_id(), None);
+    }
+
+    #[test]
+    fn test_index_result_multiple() {
+        let items = vec![
+            IndexItem::new("doc-1", "A", DocumentFormat::Markdown),
+            IndexItem::new("doc-2", "B", DocumentFormat::Pdf),
+        ];
+        let result = IndexResult::new(items);
+        assert_eq!(result.len(), 2);
+        assert_eq!(result.doc_id(), None);
+    }
 }
diff --git a/rust/src/client/workspace.rs b/rust/src/client/workspace.rs
index 0b880ba6..7e9cc6a4 100644
--- a/rust/src/client/workspace.rs
+++ b/rust/src/client/workspace.rs
@@ -43,7 +43,7 @@ use super::types::DocumentInfo;
 /// The client is fully thread-safe and can be cloned cheaply
 /// (it uses `Arc` internally).
 #[derive(Clone)]
-pub struct WorkspaceClient {
+pub(crate) struct WorkspaceClient {
     /// Workspace storage.
     workspace: Arc<Workspace>,
 
@@ -56,7 +56,7 @@ pub struct WorkspaceClient {
 
 /// Workspace client configuration.
 #[derive(Debug, Clone)]
-pub struct WorkspaceClientConfig {
+pub(crate) struct WorkspaceClientConfig {
     /// Auto-save interval in seconds (None = disabled).
     pub auto_save_interval: Option<u64>,
 
@@ -296,7 +296,7 @@ impl WorkspaceClient {
 
 /// Workspace statistics.
 #[derive(Debug, Clone)]
-pub struct WorkspaceStats {
+pub(crate) struct WorkspaceStats {
     /// Number of documents in the workspace.
     pub document_count: usize,
 }
diff --git a/rust/src/config/mod.rs b/rust/src/config/mod.rs
index 22c56cfe..4fa305a3 100644
--- a/rust/src/config/mod.rs
+++ b/rust/src/config/mod.rs
@@ -1,55 +1,10 @@
 // Copyright (c) 2026 vectorless developers
 // SPDX-License-Identifier: Apache-2.0
 
-//! Configuration management for vectorless.
+//! Internal configuration management.
 //!
-//! This module provides comprehensive configuration loading, validation,
-//! and management:
-//!
-//! - [`Config`] — Main configuration structure
-//! - [`ConfigLoader`] — Load configuration from TOML files
-//! - [`ConfigValidator`] — Validate configuration values
-//! - [`ConfigDocs`] — Generate configuration documentation
-//!
-//! # Quick Start
-//!
-//! ```rust,no_run
-//! use vectorless::config::{Config, ConfigLoader};
-//!
-//! // Load from file
-//! let config = ConfigLoader::new()
-//!     .file("vectorless.toml")
-//!     .with_validation(true)
-//!     .load()?;
-//!
-//! // Or use defaults
-//! let config = Config::default();
-//! # Ok::<(), vectorless::config::ConfigError>(())
-//! ```
-//!
-//! # Layered Configuration
-//!
-//! Multiple configuration files can be layered:
-//!
-//! ```rust,no_run
-//! use vectorless::config::ConfigLoader;
-//!
-//! let config = ConfigLoader::new()
-//!     .file("default.toml")        // Base defaults
-//!     .file("production.toml")     // Production overrides
-//!     .with_validation(true)
-//!     .load()?;
-//! # Ok::<(), vectorless::config::ConfigError>(())
-//! ```
-//!
-//! # Configuration Sections
-//!
-//! - `[llm]` — Unified LLM configuration (pool, retry, throttle, fallback)
-//! - `[metrics]` — Unified metrics configuration
-//! - `[pilot]` — Pilot navigation configuration
-//! - `[indexer]` — Document indexing parameters
-//! - `[retrieval]` — Retrieval model settings
-//! - `[storage]` — Storage paths
+//! Users configure vectorless via [`EngineBuilder`](crate::client::EngineBuilder) methods,
+//! not by directly interacting with this module.
 
 mod docs;
 mod loader;
@@ -57,50 +12,12 @@ mod merge;
 mod types;
 mod validator;
 
-// Re-export main types
-pub use docs::ConfigDocs;
-pub use loader::{CONFIG_FILE_NAMES, ConfigError, ConfigLoader, find_config_file};
-pub use merge::{ConfigOverlay, Merge, MergeStrategy};
-pub use types::{
-    CacheConfig,
-    CompressionAlgorithm,
-    CompressionConfig,
-    // Concurrency
-    ConcurrencyConfig,
-    // Main config
-    Config,
-    // Validation
-    ConfigValidationError,
-    // Content aggregator
-    ContentAggregatorConfig,
-    // Fallback
-    FallbackBehavior,
-    FallbackConfig,
-    // Indexer
-    IndexerConfig,
-    // LLM configs
-    LlmClientConfig,
-    LlmConfig,
-    LlmFallbackBehavior,
-    LlmFallbackConfig,
-    LlmMetricsConfig,
-    LlmOnAllFailedBehavior,
-    LlmPoolConfig,
-    MetricsConfig,
-    OnAllFailedBehavior,
-    PilotMetricsConfig,
-    // Retrieval configs
-    RetrievalConfig,
-    RetrievalMetricsConfig,
-    RetryConfig,
-    SearchConfig,
-    Severity,
-    // Storage and sufficiency
-    StorageConfig,
-    StrategyConfig,
-    SufficiencyConfig,
-    SummaryConfig,
-    ThrottleConfig,
-    ValidationError,
+pub(crate) use loader::{ConfigError, ConfigLoader};
+pub(crate) use types::{
+    CacheConfig, CompressionAlgorithm, CompressionConfig, ConcurrencyConfig, Config,
+    ConfigValidationError, ContentAggregatorConfig, FallbackBehavior, FallbackConfig,
+    IndexerConfig, LlmClientConfig, LlmConfig, LlmFallbackBehavior, LlmFallbackConfig,
+    LlmMetricsConfig, LlmPoolConfig, MetricsConfig, OnAllFailedBehavior, PilotMetricsConfig,
+    RetrievalConfig, RetrievalMetricsConfig, RetryConfig, SearchConfig, Severity, StorageConfig,
+    StrategyConfig, SufficiencyConfig, SummaryConfig, ThrottleConfig, ValidationError,
 };
-pub use validator::{ConfigValidator, ValidationRule};
diff --git a/rust/src/config/types/mod.rs b/rust/src/config/types/mod.rs
index ab397188..c60763ef 100644
--- a/rust/src/config/types/mod.rs
+++ b/rust/src/config/types/mod.rs
@@ -18,21 +18,21 @@ mod storage;
 
 use serde::{Deserialize, Serialize};
 
-pub use concurrency::ConcurrencyConfig;
-pub use content::ContentAggregatorConfig;
-pub use fallback::{FallbackBehavior, FallbackConfig, OnAllFailedBehavior};
-pub use indexer::IndexerConfig;
-pub use llm::{LlmConfig, SummaryConfig};
-pub use llm_pool::{
+pub(crate) use concurrency::ConcurrencyConfig;
+pub(crate) use content::ContentAggregatorConfig;
+pub(crate) use fallback::{FallbackBehavior, FallbackConfig, OnAllFailedBehavior};
+pub(crate) use indexer::IndexerConfig;
+pub(crate) use llm::{LlmConfig, SummaryConfig};
+pub(crate) use llm_pool::{
     FallbackBehavior as LlmFallbackBehavior, FallbackConfig as LlmFallbackConfig,
     LlmClientConfig, LlmPoolConfig, OnAllFailedBehavior as LlmOnAllFailedBehavior, RetryConfig,
     ThrottleConfig,
 };
-pub use metrics::{
+pub(crate) use metrics::{
     LlmMetricsConfig, MetricsConfig, PilotMetricsConfig, RetrievalMetricsConfig,
 };
-pub use retrieval::{RetrievalConfig, SearchConfig};
-pub use storage::{
+pub(crate) use retrieval::{RetrievalConfig, SearchConfig};
+pub(crate) use storage::{
     CacheConfig, CompressionAlgorithm, CompressionConfig, StorageConfig, StrategyConfig,
     SufficiencyConfig,
 };
diff --git a/rust/src/index/mod.rs b/rust/src/index/mod.rs
index 6072e255..395325fe 100644
--- a/rust/src/index/mod.rs
+++ b/rust/src/index/mod.rs
@@ -63,5 +63,4 @@ pub use summary::{
 // Re-export incremental
 pub use incremental::{ChangeDetector, ChangeSet, PartialUpdater};
 
-// Re-export config types from crate config
-pub use crate::config::{ConcurrencyConfig, IndexerConfig};
+pub(crate) use crate::config::{ConcurrencyConfig, IndexerConfig};
diff --git a/rust/src/lib.rs b/rust/src/lib.rs
index d2ea3eac..34579e3e 100644
--- a/rust/src/lib.rs
+++ b/rust/src/lib.rs
@@ -14,160 +14,30 @@
 
 //! # Vectorless
 //!
-//! **A hierarchical, reasoning-native document intelligence engine.**
+//! An ultra-performant reasoning-native document intelligence engine for AI.
 //!
-//! Replace your vector database with LLM-powered tree navigation.
-//! No embeddings. No vector search. Just reasoning.
-//!
-//! ## Overview
-//!
-//! Traditional RAG systems chunk documents into flat vectors, losing structure.
-//! Vectorless preserves your document's hierarchy and uses an LLM to navigate it —
-//! like a human skimming a table of contents, then drilling into relevant sections.
-//!
-//! ## Architecture
-//!
-//! ```text
-//!                              ┌─────────────────────────────────────────────────┐
-//!                              │                    USER                          │
-//!                              │              (Query / Index)                     │
-//!                              └────────────────────────┬────────────────────────┘
-//!                                                       │
-//!                                                       ▼
-//! ┌─────────────────────────────────────────────────────────────────────────────────┐
-//! │                              CLIENT LAYER                                        │
-//! │  ┌───────────────────────────────────────────────────────────────────────────┐  │
-//! │  │                           Engine / EngineBuilder                            │  │
-//! │  │                    (Unified API for Index + Query)                          │  │
-//! │  └───────────────────────────────────────────────────────────────────────────┘  │
-//! └─────────────────────────────────────────────────────────────────────────────────┘
-//!                                                       │
-//!                              ┌────────────────────────┴────────────────────────┐
-//!                              │                                                 │
-//!                              ▼                                                 ▼
-//! ┌──────────────────────────────────────────────┐   ┌──────────────────────────────────────────────┐
-//! │              INDEX PIPELINE                   │   │              RETRIEVAL ENGINE                │
-//! │  ┌─────────┐  ┌─────────┐  ┌─────────────┐   │   │  ┌─────────────────────────────────────┐    │
-//! │  │  Parse  │─▶│  Build  │─▶│   Enhance   │   │   │  │           Pilot (LLM)               │    │
-//! │  │ (Doc)   │  │ (Tree)  │  │ (Summaries) │   │   │  │     ┌───────────────────────┐       │    │
-//! │  └─────────┘  └────┬────┘  └──────┬──────┘   │   │  │     │   Navigation Agent    │       │    │
-//! │       │            │              │          │   │  │     │  ┌─────┐ ┌─────────┐  │       │    │
-//! │       ▼            ▼              ▼          │   │  │     │  │Decide│▶│Traverse │  │       │    │
-//! │  ┌─────────┐  ┌─────────┐  ┌─────────────┐   │   │  │     │  │Path │ │  Tree   │  │       │    │
-//! │  │ Enrich  │─▶│ Optimize│─▶│   Persist   │   │   │  │     │  └─────┘ └─────────┘  │       │    │
-//! │  │(Meta)   │  │ (Tree)  │  │  (Storage)  │   │   │  │     └───────────────────────┘       │    │
-//! │  └─────────┘  └─────────┘  └─────────────┘   │   │  └─────────────────────────────────────┘    │
-//! │       │                                        │   │                    │                        │
-//! │       │         ┌──────────────────────┐      │   │                    ▼                        │
-//! │       └────────▶│   Change Detector    │◀─────┼───┤  ┌─────────────────────────────────────┐    │
-//! │                  │  (Fingerprint-based) │      │   │  │         Context Assembler           │    │
-//! │                  └──────────────────────┘      │   │  │   ┌─────────┐ ┌─────────────────┐  │    │
-//! │                                                │   │  │   │ Pruning │ │  Token Budget   │  │    │
-//! └──────────────────────────────────────────────┘   │  │   │Strategy │ │    Management   │  │    │
-//!                              │                     │  │   └─────────┘ └─────────────────┘  │    │
-//!                              │                     │  └─────────────────────────────────────┘    │
-//!                              │                     │                    │                        │
-//!                              ▼                     │                    ▼                        │
-//! ┌──────────────────────────────────────────────────────────────────────────────────────────────┐
-//! │                                  DOMAIN LAYER (Core)                                          │
-//! │                                                                                               │
-//! │   ┌───────────────────┐     ┌───────────────────┐     ┌───────────────────┐                   │
-//! │   │   DocumentTree    │     │    TreeNode       │     │    NodeId         │                   │
-//! │   │   (Arena-based)   │────▶│  - title          │     │   (indextree)     │                   │
-//! │   │                   │     │  - content        │     │                   │                   │
-//! │   └───────────────────┘     │  - summary        │     └───────────────────┘                   │
-//! │            │                │  - depth          │              │                              │
-//! │            ▼                │  - token_count    │              │                              │
-//! │   ┌───────────────────┐     └───────────────────┘              │                              │
-//! │   │     TocView       │              │                         │                              │
-//! │   │  (Table of        │              │                         │                              │
-//! │   │   Contents)       │              │                         │                              │
-//! │   └───────────────────┘              │                         │                              │
-//! └──────────────────────────────────────────────────────────────────────────────────────────────┘
-//!                                        │                         │
-//!                      ┌─────────────────┴─────────────────────────┴─────────────────┐
-//!                      │                                                               │
-//!                      ▼                                                               ▼
-//! ┌─────────────────────────────────────────┐   ┌─────────────────────────────────────────────────┐
-//! │            SUPPORT LAYER                 │   │                 STORAGE LAYER                   │
-//! │                                          │   │                                                  │
-//! │  ┌─────────────┐  ┌──────────────────┐   │   │  ┌────────────────┐  ┌─────────────────────┐    │
-//! │  │    LLM      │  │     Parser       │   │   │  │   Workspace    │  │    MemoStore        │    │
-//! │  │  (OpenAI)   │  │ - Markdown       │   │   │  │  (Persistence) │  │  (LLM Cache)        │    │
-//! │  │             │  │ - PDF            │   │   │  │                │  │  - LRU Eviction     │    │
-//! │  │ ┌─────────┐ │  │ - DOCX           │   │   │  │ ┌────────────┐ │  │  - TTL Expiration   │    │
-//! │  │ │  Pool   │ │  │                  │   │   │  │ │   LRU     │ │  │  - Disk Persist     │    │
-//! │  │ │ Retry   │ │  └──────────────────┘   │   │  │ │   Cache   │ │  │                      │    │
-//! │  │ │ Fallback│ │                          │   │  │ └────────────┘ │  └─────────────────────┘    │
-//! │  │ └─────────┘ │  ┌──────────────────┐   │   │  │                │                               │
-//! │  └─────────────┘  │   Fingerprint    │   │   │  │ ┌────────────┐ │  ┌─────────────────────┐    │
-//! │                    │   (BLAKE2b)      │   │   │  │ │  Atomic    │ │  │   ChangeDetector    │    │
-//! │  ┌─────────────┐  │                  │   │   │  │ │  Writes    │ │  │   (Incremental)     │    │
-//! │  │   Config    │  │ ┌──────────────┐ │   │   │  │ └────────────┘ │  │                     │    │
-//! │  │   Loader    │  │ │ Content FP   │ │   │   │  │                │  │ ┌─────────────────┐ │    │
-//! │  │             │  │ │ Subtree FP   │ │   │   │  └────────────────┘  │ │ Processing Ver  │ │    │
-//! │  └─────────────┘  │ │ Node FP      │ │   │   │                      │ └─────────────────┘ │    │
-//! │                    │ └──────────────┘ │   │   │                      └─────────────────────┘    │
-//! │  ┌─────────────┐  └──────────────────┘   │   │                                                  │
-//! │  │  Throttle   │                          │   │  ┌────────────────────────────────────────────┐ │
-//! │  │ (Rate Limit)│  ┌──────────────────┐   │   │  │              DocumentMeta                  │ │
-//! │  └─────────────┘  │   Throttle       │   │   │  │  - content_fingerprint                     │ │
-//! │                    │   (Concurrency)  │   │   │  │  - processing_version                      │ │
-//! │                    └──────────────────┘   │   │  │  - node_count, total_summary_tokens        │ │
-//! │                                          │   │  └────────────────────────────────────────────┘ │
-//! └─────────────────────────────────────────┘   └─────────────────────────────────────────────────┘
-//! ```
-//!
-//! ## Data Flow
-//!
-//! ### Indexing Flow
-//! ```text
-//! Document ──▶ Parse ──▶ Build Tree ──▶ Generate Summaries ──▶ Detect Changes ──▶ Persist
-//!                            │                │                      │
-//!                            │                └──▶ MemoStore ◀───────┘
-//!                            │                      (Cache)
-//!                            └──▶ Fingerprint ──▶ ChangeDetector
-//! ```
-//!
-//! ### Query Flow
-//! ```text
-//! Query ──▶ Pilot Agent ──▶ Navigate Tree ──▶ Assemble Context ──▶ Return Result
-//!               │                 │                   │
-//!               └──▶ LLM ◀────────┘                   │
-//!                    (Decide)                         │
-//!                                                     └──▶ MemoStore (Cached Summaries)
-//! ```
-//!
-//! ## Features
-//!
-//! - 🌳 **Tree-Based Indexing** — Documents as hierarchical trees, not flat chunks
-//! - 🧠 **LLM Navigation** — Reasoning-based traversal to find relevant content
-//! - 🚀 **Zero Infrastructure** — No vector database, no embedding models
-//! - 📄 **Multi-Format** — Markdown, PDF, DOCX support
-//! - 💾 **Persistent Workspace** — LRU-cached storage with lazy loading
-//! - 🔄 **Retry & Fallback** — Resilient LLM calls with automatic recovery
-//! - 🔍 **Incremental Updates** — Fingerprint-based change detection
-//! - ⚡ **LLM Memoization** — Cache summaries and decisions to reduce costs
+//! It transforms documents into rich semantic trees and uses LLMs to
+//! intelligently traverse the hierarchy — retrieving the most relevant content
+//! through structural reasoning and deep contextual understanding.
 //!
 //! ## Quick Start
 //!
 //! ```rust,no_run
-//! use vectorless::{EngineBuilder, Engine};
-//! use vectorless::client::IndexContext;
+//! use vectorless::{EngineBuilder, IndexContext, QueryContext};
 //!
 //! #[tokio::main]
 //! async fn main() -> Result<(), Box<dyn std::error::Error>> {
-//!     // Create client
 //!     let client = EngineBuilder::new()
 //!         .with_workspace("./workspace")
 //!         .build()
 //!         .await?;
 //!
-//!     // Index a document
-//!     let doc_id = client.index(IndexContext::from_path("./document.md")).await?;
+//!     let result = client.index(IndexContext::from_path("./document.md")).await?;
+//!     let doc_id = result.doc_id().unwrap();
 //!
-//!     // Query with natural language
-//!     let result = client.query(&doc_id, "What is this about?").await?;
+//!     let result = client.query(
+//!         QueryContext::new("What is this about?").with_doc_id(doc_id)
+//!     ).await?;
 //!     println!("{}", result.content);
 //!
 //!     Ok(())
@@ -178,44 +48,28 @@
 //!
 //! | Module | Description |
 //! |--------|-------------|
-//! | [`client`] | High-level API (`Engine`, `EngineBuilder`) |
+//! | [`client`] | High-level API (`Engine`, `EngineBuilder`, `IndexContext`, `QueryContext`) |
 //! | [`document`] | Core domain types (`DocumentTree`, `TreeNode`, `NodeId`) |
-//! | [`index`] | Document indexing pipeline with incremental updates |
-//! | [`retrieval`] | Retrieval strategies and LLM-based navigation |
-//! | [`config`] | Configuration management |
-//! | [`llm`] | LLM client with retry & fallback |
-//! | [`parser`] | Document parsers (Markdown, PDF, DOCX) |
-//! | [`storage`] | Workspace persistence with LRU caching |
-//! | [`throttle`] | Rate limiting and concurrency control |
-//! | [`fingerprint`] | Content and subtree fingerprinting |
-//! | [`memo`] | LLM result memoization and caching |
-
-// =============================================================================
-// Modules
-// =============================================================================
+//! | [`error`] | Error types |
 
 pub mod client;
-pub mod config;
+mod config;
 pub mod document;
 pub mod error;
-pub mod index;
-pub mod llm;
-pub mod memo;
-pub mod metrics;
-pub mod parser;
-pub mod retrieval;
-pub mod storage;
-pub mod throttle;
-pub mod utils;
-
-// =============================================================================
-// Re-exports (Convenience API)
-// =============================================================================
-
-// Client API (most common entry point)
+mod index;
+mod llm;
+mod memo;
+mod metrics;
+mod parser;
+mod retrieval;
+mod storage;
+mod throttle;
+mod utils;
+
+// Client API
 pub use client::{
-    BuildError, DocumentInfo, Engine, EngineBuilder, IndexContext, IndexMode, IndexOptions,
-    IndexSource, IndexedDocument,
+    BuildError, ClientError, DocumentFormat, DocumentInfo, Engine, EngineBuilder, EventEmitter,
+    IndexContext, IndexItem, IndexMode, IndexOptions, IndexResult, QueryContext, QueryResult,
 };
 
 // Error types
@@ -227,42 +81,3 @@ pub use document::{
     TreeNode,
 };
 
-// Utility functions
-pub use utils::{estimate_tokens, estimate_tokens_fast};
-
-// Configuration
-pub use config::{Config, ConfigLoader, RetrievalConfig, SummaryConfig};
-
-// LLM
-pub use llm::{LlmClient, LlmConfig, LlmConfigs, LlmError, LlmPool, RetryConfig};
-
-// Document parsing
-pub use parser::{
-    DocumentFormat, DocumentParser, DocxParser, MarkdownParser, ParseResult, PdfParser, RawNode,
-};
-
-// Indexing
-pub use index::pipeline::{CustomStageBuilder, PipelineOrchestrator};
-pub use index::{
-    ChangeDetector, ChangeSet, IndexContext as PipelineIndexContext, IndexInput, IndexMetrics,
-    IndexMode as PipelineIndexMode, IndexResult, IndexStage, PartialUpdater, PipelineExecutor,
-    PipelineOptions, SummaryStrategy,
-};
-
-// Retrieval
-pub use retrieval::{
-    ContextBuilder, NavigationDecision, NavigationStep, PipelineRetriever, PruningStrategy,
-    QueryComplexity, RetrievalContext, RetrievalResult, RetrieveEvent, RetrieveOptions,
-    RetrieveResponse, Retriever, RetrieverError, RetrieverResult, SearchPath, StrategyPreference,
-    SufficiencyLevel, TokenEstimation, format_for_llm, format_for_llm_async, format_tree_for_llm,
-    format_tree_for_llm_async,
-};
-
-// Storage
-pub use storage::{DocumentMeta as StorageDocumentMeta, PersistedDocument, Workspace};
-
-// Throttle
-pub use throttle::{ConcurrencyConfig, ConcurrencyController, RateLimiter};
-
-// Memo
-pub use memo::{MemoEntry, MemoKey, MemoOpType, MemoStats, MemoStore, MemoValue};
diff --git a/rust/src/metrics/mod.rs b/rust/src/metrics/mod.rs
index b190fcbc..e60a1103 100644
--- a/rust/src/metrics/mod.rs
+++ b/rust/src/metrics/mod.rs
@@ -59,5 +59,4 @@ pub use llm::{LlmMetrics, LlmMetricsReport};
 pub use pilot::{InterventionPoint, PilotMetrics, PilotMetricsReport};
 pub use retrieval::{RetrievalMetrics, RetrievalMetricsReport};
 
-// Re-export config from config module
-pub use crate::config::MetricsConfig;
+pub(crate) use crate::config::MetricsConfig;