diff --git a/README.md b/README.md
index 2ee4b516..9d2b6e88 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
-

+
Reasoning-native Document Intelligence Engine
@@ -43,7 +43,8 @@ from vectorless import Engine, IndexContext
engine = Engine(workspace="./data")
# Index a document (PDF, Markdown, DOCX, HTML)
-doc_id = engine.index(IndexContext.from_file("./report.pdf"))
+result = engine.index(IndexContext.from_file("./report.pdf"))
+doc_id = result.doc_id
# Query
result = engine.query(doc_id, "What is the total revenue?")
@@ -60,7 +61,7 @@ vectorless = "0.1"
```
```rust
-use vectorless::client::{Engine, EngineBuilder, IndexContext};
+use vectorless::client::{EngineBuilder, IndexContext, QueryContext};
#[tokio::main]
async fn main() -> vectorless::Result<()> {
@@ -70,10 +71,13 @@ async fn main() -> vectorless::Result<()> {
.await?;
// Index
- let doc_id = engine.index(IndexContext::from_path("./report.pdf")).await?;
+ let result = engine.index(IndexContext::from_path("./report.pdf")).await?;
+ let doc_id = result.doc_id().unwrap();
// Query
- let result = engine.query(&doc_id, "What is the total revenue?").await?;
+ let result = engine.query(
+ QueryContext::new("What is the total revenue?").with_doc_id(doc_id)
+ ).await?;
println!("Answer: {}", result.content);
Ok(())
diff --git a/docs/design/with-title.png b/docs/design/with-title.png
new file mode 100644
index 00000000..9f8a6c34
Binary files /dev/null and b/docs/design/with-title.png differ
diff --git a/examples/rust/advanced.rs b/examples/rust/advanced.rs
index 946f619b..bc89d756 100644
--- a/examples/rust/advanced.rs
+++ b/examples/rust/advanced.rs
@@ -16,7 +16,7 @@
//! cargo run --example advanced
//! ```
-use vectorless::{Engine, IndexContext};
+use vectorless::{EngineBuilder, IndexContext, QueryContext};
#[tokio::main]
async fn main() -> vectorless::Result<()> {
@@ -24,7 +24,7 @@ async fn main() -> vectorless::Result<()> {
// Method 1: Use explicit config file path
// This loads all settings from the specified config file
- let client = Engine::builder()
+ let client = EngineBuilder::new()
.with_config_path("./config.toml") // or "./my_vectorless.toml"
.build()
.await
@@ -33,11 +33,14 @@ async fn main() -> vectorless::Result<()> {
println!("✓ Client created with config file\n");
// Index a document
- let doc_id = client.index(IndexContext::from_path("./README.md")).await?;
+ let result = client.index(IndexContext::from_path("./README.md")).await?;
+ let doc_id = result.doc_id().unwrap().to_string();
println!("✓ Indexed: {}\n", doc_id);
// Query
- let result = client.query(&doc_id, "What features does Vectorless provide?").await?;
+ let result = client
+ .query(QueryContext::new("What features does Vectorless provide?").with_doc_id(&doc_id))
+ .await?;
println!("Query: What features does Vectorless provide?");
println!("Score: {:.2}", result.score);
if !result.content.is_empty() {
@@ -55,7 +58,7 @@ async fn main() -> vectorless::Result<()> {
println!(" 2. Auto-detected config file (vectorless.toml, config.toml, .vectorless.toml)");
println!(" 3. Explicit config file (with_config_path)");
println!(" 4. Environment variables (OPENAI_API_KEY, VECTORLESS_MODEL, etc.)");
- println!(" 5. Builder methods (with_openai, with_model, etc.)");
+ println!(" 5. Builder methods (with_key, with_model, with_endpoint)");
println!();
println!("Environment Variables:");
println!(" OPENAI_API_KEY - LLM API key");
diff --git a/examples/rust/basic.rs b/examples/rust/basic.rs
index 7064d889..5d5df2bd 100644
--- a/examples/rust/basic.rs
+++ b/examples/rust/basic.rs
@@ -3,44 +3,46 @@
//! Basic usage example for Vectorless.
//!
-//! This example demonstrates the core API in ~30 lines.
-//!
//! # Usage
//!
//! ```bash
//! cargo run --example basic
//! ```
-use vectorless::{Engine, IndexContext};
+use vectorless::{EngineBuilder, IndexContext, QueryContext};
#[tokio::main]
async fn main() -> vectorless::Result<()> {
println!("=== Vectorless Basic Example ===\n");
- // 1. Create a client
- let client = Engine::builder()
+ // 1. Create an engine
+ let engine = EngineBuilder::new()
.with_workspace("./workspace")
.build()
.await
.map_err(|e: vectorless::BuildError| vectorless::Error::Config(e.to_string()))?;
- println!("✓ Client created\n");
+ println!("Engine created\n");
// 2. Index a document
- let doc_id = client.index(IndexContext::from_path("./README.md")).await?;
- println!("✓ Indexed: {}\n", doc_id);
+ let result = engine.index(IndexContext::from_path("./README.md")).await?;
+ let doc_id = result.doc_id().unwrap().to_string();
+ println!("Indexed: {}\n", doc_id);
// 3. List documents
println!("Documents:");
- for doc in client.list_documents().await? {
+ for doc in engine.list().await? {
println!(" - {} ({})", doc.name, doc.id);
}
println!();
// 4. Query
- match client.query(&doc_id, "What is vectorless?").await {
+ match engine
+ .query(QueryContext::new("What is vectorless?").with_doc_id(&doc_id))
+ .await
+ {
Ok(result) => {
- println!("Query score: {:.2}", result.score);
+ println!("Score: {:.2}", result.score);
if !result.content.is_empty() {
let preview: String = result.content.chars().take(150).collect();
println!("Result: {}...", preview);
@@ -50,14 +52,9 @@ async fn main() -> vectorless::Result<()> {
}
println!();
- // 5. Clone for concurrent use (client is Clone + Send + Sync)
- let _client1 = client.clone();
- let _client2 = client.clone();
- println!("✓ Client cloned for concurrent use\n");
-
- // 6. Cleanup
- client.remove(&doc_id).await?;
- println!("✓ Removed: {}", doc_id);
+ // 5. Cleanup
+ engine.remove(&doc_id).await?;
+ println!("Removed: {}", doc_id);
println!("\n=== Done ===");
Ok(())
diff --git a/examples/rust/batch_processing.rs b/examples/rust/batch_processing.rs
deleted file mode 100644
index 1e0d11ee..00000000
--- a/examples/rust/batch_processing.rs
+++ /dev/null
@@ -1,1156 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! Batch document processing example.
-//!
-//! This example demonstrates how to efficiently process
-//! multiple documents in batch mode using sessions.
-//!
-//! # Usage
-//!
-//! ```bash
-//! cargo run --example batch_processing
-//! ```
-
-use vectorless::client::{EngineBuilder, IndexContext};
-
-#[tokio::main]
-async fn main() -> Result<(), Box
> {
- println!("=== Batch Document Processing Example ===\n");
-
- // 1. Create engine and session
- println!("Step 1: Setting up...");
- let engine = EngineBuilder::new()
- .with_workspace("./workspace_batch_example")
- .build()
- .await
- .map_err(|e: vectorless::BuildError| vectorless::Error::Config(e.to_string()))?;
-
- let session = engine.session().await;
- println!(" ✓ Session created: {}\n", session.id());
-
- // 2. Create sample documents
- println!("Step 2: Creating sample documents...");
- let temp_dir = tempfile::tempdir()?;
-
- let documents = vec![
- (
- "intro.md",
- r#"# Introduction
-
-Welcome to the vectorless library. This is a document intelligence engine.
-
-## Features
-
-- Tree-based navigation
-- Multi-format support
-- Session management
-"#,
- ),
- (
- "api.md",
- r#"# API Reference
-
-## Engine
-
-The main client for document operations.
-
-### Methods
-
-- `index(path)`: Index a document
-- `query(question)`: Query indexed content
-
-## Session
-
-Multi-document operations with caching.
-
-### Methods
-
-- `index(path)`: Index into session
-- `query_all(question)`: Query across all documents
-"#,
- ),
- (
- "guide.md",
- r#"# User Guide
-
-## Getting Started
-
-First, create a client with workspace configuration.
-
-## Best Practices
-
-- Use sessions for multi-document operations
-- Enable caching for better performance
-- Monitor events for debugging
-"#,
- ),
- (
- "advanced.md",
- r#"# Advanced Topics
-
-## Performance Tuning
-
-Configure retrieval parameters for optimal performance.
-
-### Parameters
-
-- `top_k`: Number of results
-- `max_tokens`: Token budget
-
-## Custom Pilots
-
-Implement custom navigation logic.
-"#,
- ),
- (
- "reference.md",
- r#"# Reference
-
-## Configuration
-
-All configuration is done via TOML files.
-
-### Example
-
-```toml
-[retrieval]
-top_k = 5
-max_tokens = 4000
-```
-"#,
- ),
- (
- "examples.md",
- r#"# Examples
-
-## Basic Usage
-
-Simple indexing and querying example.
-
-## Batch Processing
-
-Process multiple documents concurrently.
-
-## Session Usage
-
-Multi-document operations with caching.
-"#,
- ),
- (
- "faq.md",
- r#"# FAQ
-
-## Common Questions
-
-**Q: How do I index a document?**
-A: Use `engine.index(path)` method.
-
-**Q: How to query?**
-A: Use `engine.query(doc_id, question)` method.
-
-**Q: What formats are supported?**
-A: Markdown, PDF, DOCX, HTML.
-"#,
- ),
- (
- "changelog.md",
- r#"# Changelog
-
-## Version 0.1.0
-
-- Initial release
-- Basic indexing support
-- Simple retrieval
-
-## Version 0.2.0
-
-- Session support
-- Event system
-- Content aggregator
-"#,
- ),
- (
- "contributing.md",
- r#"# Contributing
-
-## How to Contribute
-
-We welcome contributions! Please follow these steps:
-
-1. Fork the repository
-2. Create a feature branch
-3. Submit a pull request
-
-## Code Style
-
-- Run `cargo fmt`
-- Run `cargo clippy`
-- Add tests
-"#,
- ),
- (
- "license.md",
- r#"# License
-
-Apache License, Version 2.0
-
-Copyright 2026 vectorless developers
-"#,
- ),
- (
- "architecture.md",
- r#"# Architecture
-
-## Overview
-
-Vectorless uses a tree-based architecture.
-
-## Components
-
-- Parser: Document parsing
-- Indexer: Tree building
-- Retriever: Content search
-- Storage: Persistence
-"#,
- ),
- (
- "security.md",
- r#"# Security
-
-## Security Considerations
-
-- API keys are stored securely
-- No sensitive data in logs
-- Input validation
-
-## Best Practices
-
-- Use environment variables
-- Rotate keys periodically
-"#,
- ),
- (
- "performance.md",
- r#"# Performance
-
-## Optimization Tips
-
-- Use caching effectively
-- Configure appropriate batch sizes
-- Monitor memory usage
-
-## Benchmarks
-
-Run `cargo bench` for performance metrics.
-"#,
- ),
- (
- "testing.md",
- r#"# Testing
-
-## Running Tests
-
-```bash
-cargo test
-```
-
-## Test Coverage
-
-- Unit tests
-- Integration tests
-- Example tests
-"#,
- ),
- (
- "deployment.md",
- r#"# Deployment
-
-## Production Setup
-
-- Configure workspace directory
-- Set up logging
-- Monitor performance
-
-## Configuration
-
-Use TOML configuration files.
-"#,
- ),
- (
- "troubleshooting.md",
- r#"# Troubleshooting
-
-## Common Issues
-
-### Indexing Fails
-
-Check file format and permissions.
-
-### Query Returns Empty
-
-Ensure document is indexed.
-
-### Performance Issues
-
-Reduce batch size or enable caching.
-"#,
- ),
- (
- "integrations.md",
- r#"# Integrations
-
-## LLM Providers
-
-- OpenAI
-- Anthropic
-- Local models
-
-## Storage Backends
-
-- File system (default)
-- S3 (planned)
-"#,
- ),
- (
- "migrations.md",
- r#"# Migrations
-
-## Version Migrations
-
-### 0.1.x to 0.2.x
-
-- Update configuration format
-- Re-index documents
-"#,
- ),
- (
- "roadmap.md",
- r#"# Roadmap
-
-## Future Plans
-
-### Short Term
-
-- Streaming support
-- More formats
-
-### Long Term
-
-- Distributed indexing
-- Real-time updates
-"#,
- ),
- (
- "credits.md",
- r#"# Credits
-
-## Contributors
-
-Thanks to all contributors!
-
-## Libraries
-
-Built with Rust and many open-source libraries.
-"#,
- ),
- (
- "index.md",
- r#"# Index
-
-## Quick Links
-
-- [Introduction](intro.md)
-- [API Reference](api.md)
-- [User Guide](guide.md)
-
-## Search
-
-Use the search functionality to find specific content.
-"#,
- ),
- (
- "search.md",
- r#"# Search
-
-## Search Functionality
-
-### Basic Search
-
-```rust
-let results = engine.query(&doc_id, "search term").await?;
-```
-
-### Advanced Search
-
-Use sessions for cross-document search.
-"#,
- ),
- (
- "export.md",
- r#"# Export
-
-## Exporting Data
-
-### JSON Export
-
-```rust
-let json = tree.to_structure_json();
-```
-
-### Custom Formats
-
-Implement custom exporters as needed.
-"#,
- ),
- (
- "import.md",
- r#"# Import
-
-## Importing Data
-
-### From Files
-
-```rust
-let doc_id = engine.index("./document.md").await?;
-```
-
-### From Memory
-
-Use the content directly with parsers.
-"#,
- ),
- (
- "validation.md",
- r#"# Validation
-
-## Input Validation
-
-### Document Paths
-
-Must exist and be readable.
-
-### Configuration
-
-Validated on load with helpful errors.
-
-### Queries
-
-Sanitized before processing.
-"#,
- ),
- (
- "formatting.md",
- r#"# Formatting
-
-## Content Formatting
-
-### Markdown
-
-Standard CommonMark with extensions.
-
-### Code Blocks
-
-Syntax highlighting support.
-
-### Tables
-
-Basic table parsing.
-"#,
- ),
- (
- "localization.md",
- r#"# Localization
-
-## Internationalization
-
-Currently English-only.
-
-## Future Support
-
-Planned i18n support for:
-- Error messages
-- UI strings
-- Documentation
-"#,
- ),
- (
- "accessibility.md",
- r#"# Accessibility
-
-## Accessibility
-
-### Documentation
-
-Clear and comprehensive docs.
-
-### API Design
-
-Consistent and intuitive naming.
-
-### Error Messages
-
-Helpful and actionable.
-"#,
- ),
- (
- "glossary.md",
- r#"# Glossary
-
-## Terms
-
-- **Document Tree**: Hierarchical structure
-- **Session**: Multi-document context
-- **Workspace**: Document storage
-- **Retrieval**: Content search
-"#,
- ),
- (
- "appendix.md",
- r#"# Appendix
-
-## Additional Resources
-
-- [GitHub Repository](https://github.com)
-- [Documentation Site](https://docs.vectorless.dev)
-- [Community Discord](https://discord.gg)
-"#,
- ),
- (
- "summary.md",
- r#"# Summary
-
-## Overview
-
-This documentation covers all aspects of vectorless.
-
-## Next Steps
-
-- Try the examples
-- Join the community
-- Contribute!
-"#,
- ),
- (
- "conclusion.md",
- r#"# Conclusion
-
-## Thank You
-
-Thanks for using vectorless!
-
-## Feedback
-
-We'd love to hear from you. Open an issue on GitHub.
-"#,
- ),
- (
- "revision.md",
- r#"# Revision History
-
-## Document Versions
-
-| Version | Date | Changes |
-|---------|------------|---------------------------|
-| 1.0 | 2026-01-01 | Initial version |
-| 1.1 | 2026-02-01 | Session support |
-"#,
- ),
- (
- "feedback.md",
- r#"# Feedback
-
-## Providing Feedback
-
-We value your input!
-
-### Channels
-
-- GitHub Issues
-- Discord Community
-- Email Support
-
-### What to Share
-
-- Bug reports
-- Feature requests
-- Documentation improvements
-"#,
- ),
- (
- "support.md",
- r#"# Support
-
-## Getting Help
-
-### Documentation
-
-Start with the user guide.
-
-### Community
-
-Join our Discord for discussions.
-
-### Enterprise
-
-Contact us for enterprise support.
-"#,
- ),
- (
- "updates.md",
- r#"# Updates
-
-## Staying Updated
-
-### Version Updates
-
-Check the changelog for updates.
-
-### Security Updates
-
-Apply security patches promptly.
-
-### Deprecations
-
-Watch for deprecation notices.
-"#,
- ),
- (
- "resources.md",
- r#"# Resources
-
-## External Resources
-
-### Official
-
-- Documentation: docs.vectorless.dev
-- GitHub: github.com/vectorless
-- Discord: discord.gg/vectorless
-
-### Community
-
-- Blog posts
-- Tutorial videos
-- Example projects
-"#,
- ),
- (
- "contact.md",
- r#"# Contact
-
-## Contact Information
-
-### General Inquiries
-
-Email: hello@vectorless.dev
-
-### Security Issues
-
-Email: security@vectorless.dev
-
-### Enterprise Sales
-
-Email: enterprise@vectorless.dev
-"#,
- ),
- (
- "privacy.md",
- r#"# Privacy Policy
-
-## Data Handling
-
-Vectorless processes documents locally.
-
-## No Tracking
-
-We don't track usage or content.
-
-## API Keys
-
-Stored securely in configuration files.
-"#,
- ),
- (
- "terms.md",
- r#"# Terms of Service
-
-## Usage Terms
-
-By using vectorless, you agree to:
-
-- Use responsibly
-- Follow applicable laws
-- Respect rate limits
-
-## Changes
-
-Terms may be updated. Check for revisions.
-"#,
- ),
- (
- "legal.md",
- r#"# Legal
-
-## Licensing
-
-Apache License 2.0
-
-## Copyright
-
-Copyright 2026 vectorless developers
-
-## Trademarks
-
-Vectorless is a trademark.
-"#,
- ),
- (
- "versioning.md",
- r#"# Versioning
-
-## Semantic Versioning
-
-We follow semver:
-
-- MAJOR: Breaking changes
-- MINOR: New features
-- PATCH: Bug fixes
-
-## Current Version
-
-0.1.10
-"#,
- ),
- (
- "compatibility.md",
- r#"# Compatibility
-
-## Supported Versions
-
-- Rust 1.70+
-- Tokio 1.x
-
-## Platform Support
-
-- Linux
-- macOS
-- Windows
-
-## Breaking Changes
-
-Documented in changelog.
-"#,
- ),
- (
- "installation.md",
- r#"# Installation
-
-## Requirements
-
-- Rust 1.70+
-- Tokio runtime
-
-## Install
-
-```bash
-cargo install vectorless
-```
-
-## Verify
-
-```bash
-vectorless --version
-```
-"#,
- ),
- (
- "quickstart.md",
- r#"# Quick Start
-
-## 5-Minute Setup
-
-1. Install vectorless
-2. Create a client
-3. Index a document
-4. Query!
-
-```rust
-let client = Engine::builder().build()?;
-let doc_id = client.index("./doc.md").await?;
-let result = client.query(&doc_id, "What is this?").await?;
-```
-"#,
- ),
- (
- "tutorial.md",
- r#"# Tutorial
-
-## Introduction
-
-This tutorial covers basic usage.
-
-## Step 1: Setup
-
-Create a client with workspace.
-
-## Step 2: Index
-
-Index your first document.
-
-## Step 3: Query
-
-Ask questions about your document.
-
-## Step 4: Next
-
-Explore advanced features.
-"#,
- ),
- (
- "examples_overview.md",
- r#"# Examples Overview
-
-## Available Examples
-
-| Example | Description |
-|-----------------|--------------------------------|
-| basic.rs | Basic usage |
-| session.rs | Multi-document operations |
-| events.rs | Event callbacks |
-| batch.rs | Batch processing |
-
-## Running Examples
-
-```bash
-cargo run --example
-```
-"#,
- ),
- (
- "configuration.md",
- r#"# Configuration
-
-## Configuration File
-
-Use `config.toml` for settings:
-
-```toml
-[storage]
-workspace_dir = "./workspace"
-
-[retrieval]
-top_k = 5
-max_tokens = 4000
-```
-
-## Environment Variables
-
-- `OPENAI_API_KEY`: LLM API key
-"#,
- ),
- (
- "optimization.md",
- r#"# Optimization
-
-## Performance Tips
-
-- Use sessions for caching
-- Batch document indexing
-- Configure appropriate token limits
-
-## Memory Management
-
-Documents are cached in sessions.
-
-## Concurrency
-
-Use `buffer_unordered` for parallel indexing.
-"#,
- ),
- (
- "errors.md",
- r#"# Error Handling
-
-## Error Types
-
-- `ConfigError`: Configuration issues
-- `ParseError`: Document parsing failures
-- `RetrievalError`: Query failures
-
-## Handling Errors
-
-```rust
-match result {
- Ok(response) => { /* success */ },
- Err(Error::Parse(msg)) => { /* handle parse error */ },
- Err(e) => { /* other error */ },
-}
-```
-"#,
- ),
- (
- "logging.md",
- r#"# Logging
-
-## Log Levels
-
-- ERROR: Serious issues
-- WARN: Potential issues
-- INFO: General information
-- DEBUG: Detailed information
-- TRACE: Very detailed
-
-## Enabling Logs
-
-```bash
-RUST_LOG=debug cargo run
-```
-"#,
- ),
- (
- "metrics.md",
- r#"# Metrics
-
-## Available Metrics
-
-- Query count
-- Cache hit rate
-- Average query time
-
-## Accessing Metrics
-
-```rust
-let stats = session.stats();
-println!("Cache hit rate: {:.1}%", stats.cache_hit_rate() * 100.0);
-```
-"#,
- ),
- (
- "health.md",
- r#"# Health Checks
-
-## Workspace Health
-
-Check workspace integrity:
-
-```rust
-let docs = engine.list_documents();
-println!("{} documents indexed", docs.len());
-```
-
-## Session Health
-
-Monitor session statistics regularly.
-"#,
- ),
- (
- "backup.md",
- r#"# Backup
-
-## Backing Up
-
-Copy the workspace directory:
-
-```bash
-cp -r ./workspace ./workspace_backup
-```
-
-## Restoration
-
-Restore by copying back:
-
-```bash
-cp -r ./workspace_backup ./workspace
-```
-"#,
- ),
- (
- "recovery.md",
- r#"# Recovery
-
-## Corrupted Documents
-
-Remove and re-index:
-
-```rust
-engine.remove(&doc_id)?;
-engine.index(&path).await?;
-```
-
-## Session Recovery
-
-Create a new session if issues occur.
-"#,
- ),
- (
- "monitoring.md",
- r#"# Monitoring
-
-## Production Monitoring
-
-Use events for real-time monitoring:
-
-```rust
-let events = EventEmitter::new()
- .on_query(|e| {
- // Log to monitoring system
- });
-```
-
-## Alerts
-
-Set up alerts for error rates.
-"#,
- ),
- (
- "scaling.md",
- r#"# Scaling
-
-## Horizontal Scaling
-
-Run multiple instances with shared storage.
-
-## Vertical Scaling
-
-Increase resources for single instance.
-
-## Considerations
-
-- Storage backend
-- Cache coordination
-- Rate limiting
-"#,
- ),
- (
- "security_config.md",
- r#"# Security Configuration
-
-## API Keys
-
-Store securely:
-
-```toml
-[summary]
-api_key = "${OPENAI_API_KEY}"
-```
-
-## Network Security
-
-Use HTTPS for all API calls.
-
-## Access Control
-
-Implement authentication for production.
-"#,
- ),
- ];
-
- for (name, content) in &documents {
- let path = temp_dir.path().join(name);
- std::fs::write(&path, content)?;
- }
-
- println!(" ✓ Created {} sample documents\n", documents.len());
-
- // 3. Batch indexing with progress
- println!("Step 3: Batch indexing...");
- let start = std::time::Instant::now();
- let mut doc_ids = Vec::new();
-
- for (name, _) in &documents {
- let path = temp_dir.path().join(name);
- match session.index(IndexContext::from_path(&path)).await {
- Ok(doc_id) => {
- doc_ids.push(doc_id);
- }
- Err(e) => {
- eprintln!(" ✗ Failed to index {}: {}", name, e);
- }
- }
- }
-
- let elapsed = start.elapsed();
- println!(" ✓ Indexed {} documents in {:?}", doc_ids.len(), elapsed);
- println!(
- " - Rate: {:.1} docs/sec",
- doc_ids.len() as f64 / elapsed.as_secs_f64()
- );
- println!();
-
- // 4. Show session stats
- println!("Step 4: Session statistics:");
- let stats = session.stats();
- println!(
- " - Documents in session: {}",
- session.list_documents().len()
- );
- println!(" - Queries: {}", stats.query_count.get());
- println!();
-
- // 5. Batch query with progress
- println!("Step 5: Batch querying...");
- let queries = vec![
- "What is vectorless?",
- "How to index?",
- "Configuration options",
- "API methods",
- "Performance tips",
- "Error handling",
- "Logging setup",
- "Security considerations",
- "Scaling options",
- "Getting help",
- ];
-
- let start = std::time::Instant::now();
- let mut success_count = 0;
-
- for query in &queries {
- match session.query_all(query).await {
- Ok(results) => {
- if !results.is_empty() {
- success_count += 1;
- }
- }
- Err(e) => {
- eprintln!(" ✗ Query failed: {}", e);
- }
- }
- }
-
- let elapsed = start.elapsed();
- println!(" ✓ Completed {} queries in {:?}", queries.len(), elapsed);
- println!(
- " - Success rate: {:.0}%",
- (success_count as f64 / queries.len() as f64) * 100.0
- );
- println!(
- " - Rate: {:.1} queries/sec",
- queries.len() as f64 / elapsed.as_secs_f64()
- );
- println!();
-
- // 6. Final statistics
- println!("Step 6: Final statistics:");
- let stats = session.stats();
- println!(" - Total documents: {}", session.list_documents().len());
- println!(" - Total queries: {}", stats.query_count.get());
- println!(" - Cache hits: {}", stats.cache_hits.get());
- println!(" - Cache misses: {}", stats.cache_misses.get());
- println!(" - Cache hit rate: {:.1}%", stats.cache_hit_rate() * 100.0);
- if let Some(avg_time) = stats.avg_query_time() {
- println!(" - Avg query time: {:?}", avg_time);
- }
- println!(" - Session age: {:?}", session.age());
- println!();
-
- // 7. Cleanup
- println!("Step 7: Cleanup...");
- for doc_id in &doc_ids {
- engine.remove(doc_id).await?;
- }
- println!(" ✓ Removed {} documents\n", doc_ids.len());
-
- println!("=== Example Complete ===");
- Ok(())
-}
diff --git a/examples/rust/content_aggregation.rs b/examples/rust/content_aggregation.rs
deleted file mode 100644
index 8437ccd0..00000000
--- a/examples/rust/content_aggregation.rs
+++ /dev/null
@@ -1,196 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! Content Aggregation Accuracy Example
-//!
-//! This example demonstrates the content aggregation module's ability to:
-//! 1. Score content relevance
-//! 2. Allocate token budget
-//! 3. Build structured output
-//!
-//! # Usage
-//!
-//! ```bash
-//! cargo run --example content_aggregation
-//! ```
-
-use indextree::Arena;
-use vectorless::document::NodeId;
-use vectorless::retrieval::content::{
- AllocationStrategy, BudgetAllocator, ContentAggregator, ContentAggregatorConfig, ContentChunk,
- OutputFormat, RelevanceScorer, ScoringContext, ScoringStrategyConfig, StructureBuilder,
-};
-
-fn make_node_id() -> NodeId {
- let mut arena = Arena::new();
- let node = vectorless::document::TreeNode {
- title: "Test".to_string(),
- structure: String::new(),
- content: String::new(),
- summary: String::new(),
- depth: 0,
- start_index: 0,
- end_index: 0,
- start_page: None,
- end_page: None,
- node_id: None,
- physical_index: None,
- token_count: None,
- references: Vec::new(),
- };
- NodeId(arena.new_node(node))
-}
-
-fn main() {
- println!("=== Content Aggregation Accuracy Demo ===\n");
-
- // 1. Demonstrate Relevance Scoring
- println!("1. Relevance Scoring Demo");
- println!("---------------------------");
-
- let query = "What is the architecture of vectorless?";
- let scorer = RelevanceScorer::new(query, ScoringStrategyConfig::KeywordWithBM25);
-
- let chunks = vec![
- ContentChunk::new(
- make_node_id(),
- "Architecture Overview".to_string(),
- "Vectorless uses a tree-based architecture for document navigation. The system consists of multiple stages: parsing, indexing, and retrieval.".to_string(),
- 0,
- ),
- ContentChunk::new(
- make_node_id(),
- "Installation Guide".to_string(),
- "To install vectorless, add it to your Cargo.toml file. Then run cargo build to compile.".to_string(),
- 1,
- ),
- ContentChunk::new(
- make_node_id(),
- "Core Components".to_string(),
- "The architecture includes Pilot for navigation, Judge for sufficiency checking, and multiple search algorithms like beam search and greedy search.".to_string(),
- 1,
- ),
- ];
-
- let ctx = ScoringContext::default();
-
- println!("Query: \"{}\"", query);
- println!("\nScored chunks:");
- for chunk in &chunks {
- let relevance = scorer.score_chunk(chunk, &ctx);
- println!(
- " - '{}' (depth {}): score {:.3}",
- chunk.title, chunk.depth, relevance.score
- );
- println!(
- " Components: keyword={:.2}, bm25={:.2}, depth_penalty={:.2}, density={:.2}",
- relevance.components.keyword_score,
- relevance.components.bm25_score,
- relevance.components.depth_penalty,
- relevance.components.density_score,
- );
- }
-
- // 2. Demonstrate Budget Allocation
- println!("\n\n2. Budget Allocation Demo");
- println!("---------------------------");
-
- let scored: Vec<_> = chunks
- .iter()
- .map(|chunk| scorer.score_chunk(chunk, &ctx))
- .collect();
-
- let strategies = vec![
- ("Greedy", AllocationStrategy::Greedy),
- (
- "Hierarchical (20%/level)",
- AllocationStrategy::Hierarchical { min_per_level: 0.2 },
- ),
- ];
-
- for (name, strategy) in strategies {
- let allocator = BudgetAllocator::new(200).with_strategy(strategy);
-
- let result = allocator.allocate(scored.clone(), 2);
-
- println!("\n{} Strategy:", name);
- println!(" Tokens used: {}/{}", result.tokens_used, 200);
- println!(" Items selected: {}", result.selected.len());
- println!(" Avg score: {:.3}", result.stats.avg_score);
-
- for content in &result.selected {
- let trunc = if content.is_truncated() {
- " [truncated]"
- } else {
- ""
- };
- println!(
- " - '{}' ({} tokens, score {:.2}){}",
- content.title, content.tokens, content.score, trunc
- );
- }
- }
-
- // 3. Demonstrate Structure Building
- println!("\n\n3. Structure Building Demo");
- println!("---------------------------");
-
- let formats = vec![
- ("Markdown", OutputFormat::Markdown),
- ("Flat", OutputFormat::Flat),
- ];
-
- let allocator = BudgetAllocator::new(500).with_strategy(AllocationStrategy::Greedy);
- let result = allocator.allocate(scored.clone(), 2);
-
- for (name, format) in formats {
- let builder = StructureBuilder::new(format);
- let tree = vectorless::document::DocumentTree::new("Test", "");
- let structured = builder.build(result.selected.clone(), &tree);
-
- println!(
- "\n{} Output ({} chars, {} tokens):",
- name,
- structured.content.len(),
- structured.metadata.total_tokens
- );
- let preview = if structured.content.len() > 300 {
- format!("{}...", &structured.content[..300])
- } else {
- structured.content.clone()
- };
- println!("{}", preview.lines().take(8).collect::>().join("\n"));
- }
-
- // 4. Demonstrate Full Aggregation Pipeline
- println!("\n\n4. Full Aggregation Pipeline Demo");
- println!("-----------------------------------");
-
- let configs = vec![
- ("Default (4000 tokens)", ContentAggregatorConfig::default()),
- (
- "Conservative (1000 tokens)",
- ContentAggregatorConfig::new()
- .with_token_budget(1000)
- .with_min_relevance(0.3),
- ),
- (
- "High Precision (2000 tokens, 0.5 threshold)",
- ContentAggregatorConfig::new()
- .with_token_budget(2000)
- .with_min_relevance(0.5),
- ),
- ];
-
- for (name, config) in configs {
- println!("\n{} Config:", name);
- println!(" Token budget: {}", config.token_budget);
- println!(" Min relevance: {:.1}", config.min_relevance_score);
-
- let aggregator = ContentAggregator::new(config);
- // Note: Full aggregation requires a DocumentTree with actual content
- let _ = aggregator; // Suppress unused warning
- }
-
- println!("\n=== Demo Complete ===");
-}
diff --git a/examples/rust/custom_config.rs b/examples/rust/custom_config.rs
index 12eaedc4..b916143b 100644
--- a/examples/rust/custom_config.rs
+++ b/examples/rust/custom_config.rs
@@ -12,7 +12,7 @@
//! cargo run --example custom_config
//! ```
-use vectorless::{Engine, IndexContext};
+use vectorless::{EngineBuilder, IndexContext, QueryContext};
#[tokio::main]
async fn main() -> vectorless::Result<()> {
@@ -32,9 +32,10 @@ async fn main() -> vectorless::Result<()> {
// ============================================================
// Example: Use DeepSeek API
- let client = Engine::builder()
+ let client = EngineBuilder::new()
.with_workspace("./workspace")
- .with_model("deepseek-chat", Some("sk-your-deepseek-key".to_string()))
+ .with_model("deepseek-chat")
+ .with_key("sk-your-deepseek-key")
.with_endpoint("https://api.deepseek.com/v1")
.build()
.await
@@ -43,11 +44,14 @@ async fn main() -> vectorless::Result<()> {
println!("✓ Client created with custom settings\n");
// Index a document
- let doc_id = client.index(IndexContext::from_path("./README.md")).await?;
+ let index_result = client.index(IndexContext::from_path("./README.md")).await?;
+ let doc_id = index_result.doc_id().unwrap().to_string();
println!("✓ Indexed: {}\n", doc_id);
// Query
- let result = client.query(&doc_id, "What is Vectorless?").await?;
+ let result = client
+ .query(QueryContext::new("What is Vectorless?").with_doc_id(&doc_id))
+ .await?;
println!("Query: What is Vectorless?");
println!("Score: {:.2}", result.score);
if !result.content.is_empty() {
@@ -64,25 +68,27 @@ async fn main() -> vectorless::Result<()> {
// ============================================================
// Azure OpenAI:
- // let client = Engine::builder()
+ // let client = EngineBuilder::new()
// .with_workspace("./workspace")
- // .with_model("gpt-4o", Some("your-azure-key".to_string()))
+ // .with_model("gpt-4o")
+ // .with_key("your-azure-key")
// .with_endpoint("https://your-resource.openai.azure.com/openai/deployments/your-deployment")
// .build()
// .await?;
// Local LLM (e.g., Ollama with OpenAI-compatible API):
- // let client = Engine::builder()
+ // let client = EngineBuilder::new()
// .with_workspace("./workspace")
- // .with_model("llama3", None) // No API key needed
+ // .with_model("llama3")
// .with_endpoint("http://localhost:11434/v1")
// .build()
// .await?;
// Anthropic Claude (via OpenAI-compatible proxy):
- // let client = Engine::builder()
+ // let client = EngineBuilder::new()
// .with_workspace("./workspace")
- // .with_model("claude-3-5-sonnet-20241022", Some("sk-ant-...".to_string()))
+ // .with_model("claude-3-5-sonnet-20241022")
+ // .with_key("sk-ant-...")
// .with_endpoint("https://api.anthropic.com/v1")
// .build()
// .await?;
diff --git a/examples/rust/custom_pilot.rs b/examples/rust/custom_pilot.rs
deleted file mode 100644
index 15f4542e..00000000
--- a/examples/rust/custom_pilot.rs
+++ /dev/null
@@ -1,286 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! Custom Pilot implementation example.
-//!
-//! This example demonstrates how to implement a custom Pilot
-//! that provides navigation guidance during retrieval.
-//!
-//! # What you'll learn:
-//! - How to implement the Pilot trait
-//! - When to intervene (START, FORK, BACKTRACK, EVALUATE)
-//! - How to provide ranked candidates
-//! - How to integrate custom Pilot with the retrieval pipeline
-//!
-//! # Key concepts:
-//!
-//! ## Intervention Points
-//! - START: Before search begins - analyze query, set direction
-//! - FORK: At branch points - rank candidates, guide path selection
-//! - BACKTRACK: When search fails - suggest alternatives
-//! - EVALUATE: After content found - check sufficiency
-//!
-//! ## Score Merging
-//! ```text
-//! final_score = alpha * algorithm_score + beta * llm_score
-//! ```
-
-use async_trait::async_trait;
-use std::collections::HashSet;
-use vectorless::document::{DocumentTree, NodeId};
-use vectorless::retrieval::pilot::{
- InterventionPoint, Pilot, PilotConfig, PilotDecision, RankedCandidate, SearchDirection,
- SearchState,
-};
-
-/// A custom Pilot that uses simple keyword matching for guidance.
-///
-/// This demonstrates the Pilot trait implementation without requiring
-/// an actual LLM client.
-pub struct KeywordPilot {
- config: PilotConfig,
-}
-
-impl KeywordPilot {
- /// Create a new KeywordPilot.
- pub fn new() -> Self {
- Self {
- config: PilotConfig::default(),
- }
- }
-
- /// Score a node title based on keyword overlap with the query.
- fn score_by_keywords(&self, query: &str, title: &str) -> f32 {
- let query_lower = query.to_lowercase();
- let title_lower = title.to_lowercase();
-
- let query_words: HashSet<&str> = query_lower
- .split_whitespace()
- .filter(|w| w.len() > 2)
- .collect();
-
- let title_words: HashSet<&str> = title_lower
- .split_whitespace()
- .filter(|w| w.len() > 2)
- .collect();
-
- if query_words.is_empty() || title_words.is_empty() {
- return 0.0;
- }
-
- let overlap = query_words.intersection(&title_words).count();
- overlap as f32 / query_words.len().max(1) as f32
- }
-}
-
-impl Default for KeywordPilot {
- fn default() -> Self {
- Self::new()
- }
-}
-
-#[async_trait]
-impl Pilot for KeywordPilot {
- fn name(&self) -> &str {
- "keyword_pilot"
- }
-
- fn should_intervene(&self, state: &SearchState<'_>) -> bool {
- // Intervene at fork points with multiple candidates
- if state.candidates.len() > 2 {
- return true;
- }
-
- // Intervene when best score is low
- if state.best_score < 0.3 {
- return true;
- }
-
- // Intervene during backtracking
- if state.is_backtracking {
- return true;
- }
-
- false
- }
-
- async fn decide(&self, state: &SearchState<'_>) -> PilotDecision {
- // Rank candidates by keyword overlap
- let mut ranked: Vec = state
- .candidates
- .iter()
- .filter_map(|&node_id| {
- state.tree.get(node_id).map(|node| {
- let score = self.score_by_keywords(state.query, &node.title);
- RankedCandidate::new(node_id, score)
- })
- })
- .collect();
-
- ranked.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
-
- // Determine direction
- let direction = if ranked.is_empty() {
- SearchDirection::backtrack("No candidates available", vec![])
- } else if ranked[0].score > 0.5 {
- SearchDirection::go_deeper(format!("Strong match: {:.2}", ranked[0].score))
- } else if ranked[0].score > 0.2 {
- SearchDirection::go_deeper(format!("Moderate match: {:.2}", ranked[0].score))
- } else {
- SearchDirection::backtrack("No strong matches found", vec![])
- };
-
- let confidence = ranked.first().map(|c| c.score).unwrap_or(0.0);
-
- PilotDecision {
- ranked_candidates: ranked,
- direction,
- confidence,
- reasoning: "Keyword-based decision".to_string(),
- intervention_point: InterventionPoint::Fork,
- }
- }
-
- async fn guide_start(&self, tree: &DocumentTree, query: &str) -> Option {
- // Score root's children
- let children = tree.children(tree.root());
- let mut ranked: Vec = children
- .iter()
- .filter_map(|&node_id| {
- tree.get(node_id).map(|node| {
- let score = self.score_by_keywords(query, &node.title);
- RankedCandidate::new(node_id, score)
- })
- })
- .collect();
-
- ranked.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
-
- let confidence = ranked.first().map(|c| c.score).unwrap_or(0.0);
-
- Some(PilotDecision {
- ranked_candidates: ranked,
- direction: SearchDirection::go_deeper("Starting search"),
- confidence,
- reasoning: "Keyword-based start guidance".to_string(),
- intervention_point: InterventionPoint::Start,
- })
- }
-
- async fn guide_backtrack(&self, state: &SearchState<'_>) -> Option {
- // Find unvisited alternatives
- let mut alternatives = Vec::new();
- for node_id in state.tree.children(state.tree.root()) {
- if !state.visited.contains(&node_id) {
- alternatives.push(node_id);
- }
- }
-
- let ranked: Vec = alternatives
- .iter()
- .take(5)
- .map(|&node_id| RankedCandidate::new(node_id, 0.5))
- .collect();
-
- Some(PilotDecision {
- ranked_candidates: ranked,
- direction: SearchDirection::backtrack("Backtrack guidance", alternatives),
- confidence: 0.5,
- reasoning: "Suggesting alternative branches".to_string(),
- intervention_point: InterventionPoint::Backtrack,
- })
- }
-
- fn config(&self) -> &PilotConfig {
- &self.config
- }
-
- fn is_active(&self) -> bool {
- true
- }
-
- fn reset(&self) {
- // No state to reset
- }
-}
-
-fn main() {
- println!("=== Custom Pilot Example ===\n");
-
- // 1. Create the custom pilot
- let pilot = KeywordPilot::new();
- println!("Created KeywordPilot\n");
-
- // 2. Create a sample document tree
- let tree = create_sample_tree();
- println!("Created sample tree with {} nodes\n", tree.node_count());
-
- // 3. Create search state for demonstration
- let query = "What is the architecture?";
- let candidates: Vec = tree.children(tree.root());
- let visited: HashSet = HashSet::new();
- let state = SearchState::new(&tree, query, &[], &candidates, &visited);
-
- println!("Query: \"{}\"", query);
- println!("Candidates: {}", candidates.len());
- println!("Should intervene: {}\n", pilot.should_intervene(&state));
-
- // 4. Demonstrate keyword scoring
- println!("Keyword scoring:");
- for node_id in tree.children(tree.root()) {
- if let Some(node) = tree.get(node_id) {
- let score = pilot.score_by_keywords(query, &node.title);
- println!(" - '{}': {:.2}", node.title, score);
- }
- }
-
- // 5. Show how to integrate with retrieval
- println!("\n--- Integration Example ---\n");
- println!("To use with Engine:");
- println!("```rust");
- println!("use std::sync::Arc;");
- println!("use vectorless::Engine;");
- println!();
- println!("let pilot = Arc::new(KeywordPilot::new());");
- println!("let engine = Engine::builder()");
- println!(" .with_workspace(\"./workspace\")");
- println!(" .with_pilot(pilot)");
- println!(" .build()");
- println!(" .await?;");
- println!("```");
-
- println!("\n=== Done ===");
-}
-
-fn create_sample_tree() -> DocumentTree {
- let mut tree = DocumentTree::new(
- "Vectorless Documentation",
- "A hierarchical document intelligence engine written in Rust.",
- );
-
- let arch = tree.add_child(
- tree.root(),
- "Architecture",
- "The system consists of three main components.",
- );
- tree.add_child(
- arch,
- "Index Pipeline",
- "Processes documents into a tree structure.",
- );
- tree.add_child(
- arch,
- "Retrieval Pipeline",
- "Finds relevant content using multi-stage processing.",
- );
-
- let usage = tree.add_child(tree.root(), "Usage", "How to use the vectorless library.");
- tree.add_child(usage, "Basic Example", "Simple usage with default configuration.");
- tree.add_child(
- usage,
- "Advanced Example",
- "Custom pipeline configuration with LLM.",
- );
-
- tree
-}
diff --git a/examples/rust/document_graph.rs b/examples/rust/document_graph.rs
deleted file mode 100644
index d765e3b5..00000000
--- a/examples/rust/document_graph.rs
+++ /dev/null
@@ -1,290 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! Document Graph example.
-//!
-//! Demonstrates how to:
-//! 1. Build a document graph from multiple documents
-//! 2. Explore cross-document relationships (shared keywords, edges)
-//! 3. Use graph-aware retrieval with different merge strategies
-//!
-//! # What is a Document Graph?
-//!
-//! A workspace-scoped weighted graph connecting documents by shared concepts.
-//! Nodes = documents, Edges = relationships (shared keywords with weights).
-//!
-//! # Key outputs:
-//! - Document nodes with top keywords
-//! - Bidirectional edges with Jaccard similarity and shared keyword evidence
-//! - Keyword inverted index for cross-document lookup
-//! - Graph-boosted retrieval ranking
-//!
-//! # Usage
-//!
-//! ```bash
-//! cargo run --example document_graph
-//! ```
-
-use std::collections::HashMap;
-
-use vectorless::document::{
- DocumentGraph, DocumentGraphConfig, DocumentGraphNode, WeightedKeyword,
-};
-use vectorless::index::graph_builder::DocumentGraphBuilder;
-
-#[tokio::main]
-async fn main() {
- println!("=== Document Graph Example ===\n");
-
- // -------------------------------------------------------
- // Part 1: Build the graph manually (low-level API)
- // -------------------------------------------------------
- println!("--- Part 1: Build Graph Manually ---\n");
- demo_manual_graph();
-
- // -------------------------------------------------------
- // Part 2: Build the graph with DocumentGraphBuilder
- // -------------------------------------------------------
- println!("\n--- Part 2: Build Graph with Builder ---\n");
- let graph = demo_builder();
-
- // -------------------------------------------------------
- // Part 3: Explore the graph
- // -------------------------------------------------------
- println!("\n--- Part 3: Explore the Graph ---\n");
- demo_explore(&graph);
-
- // -------------------------------------------------------
- // Part 4: Keyword-based document lookup
- // -------------------------------------------------------
- println!("\n--- Part 4: Keyword Lookup ---\n");
- demo_keyword_lookup(&graph);
-
- // -------------------------------------------------------
- // Part 5: Show graph-boosted retrieval concept
- // -------------------------------------------------------
- println!("\n--- Part 5: Graph-Boosted Retrieval ---\n");
- demo_graph_boosted_retrieval(&graph);
-
- println!("\n=== Done ===");
-}
-
-/// Manually build a small graph to show the data model.
-fn demo_manual_graph() {
- let mut graph = DocumentGraph::new();
-
- // Add document nodes
- graph.add_node(DocumentGraphNode {
- doc_id: "rust-book".to_string(),
- title: "The Rust Programming Language".to_string(),
- format: "md".to_string(),
- top_keywords: vec![
- WeightedKeyword { keyword: "ownership".to_string(), weight: 0.95 },
- WeightedKeyword { keyword: "borrowing".to_string(), weight: 0.90 },
- WeightedKeyword { keyword: "lifetimes".to_string(), weight: 0.80 },
- WeightedKeyword { keyword: "traits".to_string(), weight: 0.70 },
- ],
- node_count: 42,
- });
-
- graph.add_node(DocumentGraphNode {
- doc_id: "rust-async".to_string(),
- title: "Async Programming in Rust".to_string(),
- format: "md".to_string(),
- top_keywords: vec![
- WeightedKeyword { keyword: "async".to_string(), weight: 0.95 },
- WeightedKeyword { keyword: "tokio".to_string(), weight: 0.85 },
- WeightedKeyword { keyword: "lifetimes".to_string(), weight: 0.60 },
- WeightedKeyword { keyword: "traits".to_string(), weight: 0.50 },
- ],
- node_count: 28,
- });
-
- println!("Nodes: {}", graph.node_count());
- for doc_id in graph.doc_ids() {
- let node = graph.get_node(doc_id).unwrap();
- println!(" {} ({}): {} keywords, {} nodes",
- node.doc_id, node.title, node.top_keywords.len(), node.node_count);
- }
-}
-
-/// Build a graph from multiple documents using DocumentGraphBuilder.
-fn demo_builder() -> DocumentGraph {
- let config = DocumentGraphConfig {
- enabled: true,
- min_keyword_jaccard: 0.05,
- min_shared_keywords: 2,
- max_keywords_per_doc: 50,
- max_edges_per_node: 20,
- retrieval_boost_factor: 0.15,
- };
-
- let mut builder = DocumentGraphBuilder::new(config);
-
- // Document 1: Rust Language Guide
- builder.add_document(
- "rust-guide",
- "Rust Language Guide",
- "md",
- 35,
- keywords(&[
- ("ownership", 0.95), ("borrowing", 0.90), ("lifetimes", 0.85),
- ("traits", 0.80), ("generics", 0.75), ("error-handling", 0.70),
- ("pattern-matching", 0.65), ("closures", 0.60),
- ]),
- );
-
- // Document 2: Async Rust (overlaps on lifetimes, traits, closures)
- builder.add_document(
- "async-guide",
- "Async Rust Guide",
- "md",
- 28,
- keywords(&[
- ("async", 0.95), ("tokio", 0.90), ("futures", 0.85),
- ("lifetimes", 0.60), ("traits", 0.55), ("closures", 0.50),
- ("pinning", 0.80), ("waker", 0.75),
- ]),
- );
-
- // Document 3: Rust Testing (overlaps on traits, closures, error-handling)
- builder.add_document(
- "testing-guide",
- "Rust Testing Guide",
- "md",
- 22,
- keywords(&[
- ("testing", 0.95), ("assertions", 0.90), ("mocking", 0.85),
- ("traits", 0.60), ("closures", 0.55), ("error-handling", 0.50),
- ("benchmarks", 0.80), ("coverage", 0.75),
- ]),
- );
-
- // Document 4: Unrelated document (cooking — no overlap)
- builder.add_document(
- "cooking",
- "Italian Cooking",
- "md",
- 15,
- keywords(&[
- ("pasta", 0.95), ("sauce", 0.90), ("olive-oil", 0.85),
- ("garlic", 0.80), ("basil", 0.75), ("tomato", 0.70),
- ]),
- );
-
- let graph = builder.build();
-
- println!("Graph built:");
- println!(" Documents: {}", graph.node_count());
- println!(" Edges: {}", graph.edge_count());
-
- graph
-}
-
-/// Explore nodes, edges, and relationship evidence.
-fn demo_explore(graph: &DocumentGraph) {
- for doc_id in graph.doc_ids() {
- let node = graph.get_node(doc_id).unwrap();
- let neighbors = graph.get_neighbors(doc_id);
-
- println!("[{}] {} ({} nodes)", node.doc_id, node.title, node.node_count);
-
- // Show top keywords
- let top_3: Vec = node.top_keywords.iter()
- .take(3)
- .map(|kw| format!("{} ({:.2})", kw.keyword, kw.weight))
- .collect();
- println!(" Keywords: {}", top_3.join(", "));
-
- // Show edges to other documents
- if neighbors.is_empty() {
- println!(" Edges: (none — isolated document)");
- } else {
- println!(" Edges:");
- for edge in neighbors {
- println!(
- " -> {} [weight={:.3}, jaccard={:.3}, shared={}]",
- edge.target_doc_id,
- edge.weight,
- edge.evidence.keyword_jaccard,
- edge.evidence.shared_keyword_count,
- );
- // Show shared keywords
- let shared: Vec = edge.evidence.shared_keywords.iter()
- .map(|sk| format!("{} ({:.2}/{:.2})", sk.keyword, sk.source_weight, sk.target_weight))
- .collect();
- println!(" Shared: {}", shared.join(", "));
- }
- }
- println!();
- }
-}
-
-/// Look up documents by keyword using the inverted index.
-fn demo_keyword_lookup(graph: &DocumentGraph) {
- let queries = ["traits", "closures", "async", "pasta", "nonexistent"];
-
- for kw in &queries {
- let entries = graph.find_by_keyword(kw);
- if entries.is_empty() {
- println!(" '{}': not found in any document", kw);
- } else {
- let docs: Vec = entries.iter()
- .map(|e| format!("{} ({:.2})", e.doc_id, e.weight))
- .collect();
- println!(" '{}': found in {}", kw, docs.join(", "));
- }
- }
-}
-
-/// Show how graph-boosted retrieval works conceptually.
-fn demo_graph_boosted_retrieval(graph: &DocumentGraph) {
- println!("Scenario: User queries 'traits and closures'");
- println!();
-
- // Step 1: Simulate per-document scores
- let results = vec![
- ("rust-guide".to_string(), 0.85),
- ("async-guide".to_string(), 0.60),
- ("testing-guide".to_string(), 0.55),
- ("cooking".to_string(), 0.10),
- ];
-
- println!("Before graph boosting:");
- for (doc, score) in &results {
- println!(" {}: {:.3}", doc, score);
- }
-
- // Step 2: Apply graph boost — high-score docs boost their neighbors
- let boost_factor = 0.15;
- let mut boosted = results.clone();
- for (doc, base_score) in &results {
- if *base_score > 0.5 {
- for edge in graph.get_neighbors(doc) {
- for entry in boosted.iter_mut() {
- if entry.0 == edge.target_doc_id {
- let boost = boost_factor * edge.weight * base_score;
- entry.1 += boost;
- }
- }
- }
- }
- }
- boosted.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
-
- println!();
- println!("After graph boosting (boost_factor={}):", boost_factor);
- for (doc, score) in &boosted {
- let delta = score - results.iter().find(|(d, _)| d == doc).unwrap().1;
- println!(" {}: {:.3} (+{:.3})", doc, score, delta);
- }
-
- println!();
- println!("Effect: Related documents (rust-guide, async-guide, testing-guide)");
- println!(" boost each other via shared keywords, while 'cooking' stays low.");
-}
-
-// Helper to build keyword maps
-fn keywords(pairs: &[(&str, f32)]) -> HashMap {
- pairs.iter().map(|&(k, w)| (k.to_string(), w)).collect()
-}
diff --git a/examples/rust/events.rs b/examples/rust/events.rs
index 706454fc..7d5c99c2 100644
--- a/examples/rust/events.rs
+++ b/examples/rust/events.rs
@@ -17,7 +17,8 @@
use std::sync::Arc;
use std::sync::atomic::{AtomicUsize, Ordering};
-use vectorless::client::{EngineBuilder, EventEmitter, IndexContext, IndexEvent, QueryEvent};
+use vectorless::client::{EngineBuilder, EventEmitter, IndexContext, QueryContext};
+use vectorless::client::events::{IndexEvent, QueryEvent};
#[tokio::main]
async fn main() -> Result<(), Box> {
@@ -123,14 +124,15 @@ The event system uses handlers that can be attached to the engine builder.
let doc_path = temp_dir.path().join("example.md");
tokio::fs::write(&doc_path, doc_content).await?;
- let doc_id = engine.index(IndexContext::from_path(&doc_path)).await?;
+ let index_result = engine.index(IndexContext::from_path(&doc_path)).await?;
+ let doc_id = index_result.doc_id().unwrap().to_string();
println!();
// 4. Query the document (events will fire)
println!("Step 4: Querying document (watch events)...\n");
let result = engine
- .query(&doc_id, "What features are available?")
+ .query(QueryContext::new("What features are available?").with_doc_id(&doc_id))
.await?;
println!();
diff --git a/examples/rust/feedback_learning.rs b/examples/rust/feedback_learning.rs
deleted file mode 100644
index 3848e534..00000000
--- a/examples/rust/feedback_learning.rs
+++ /dev/null
@@ -1,143 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! Feedback Learning example.
-//!
-//! This example demonstrates how to use the feedback learning system
-//! to improve Pilot decision quality over time.
-//!
-//! # What you'll learn:
-//! - How to create a FeedbackStore for collecting feedback
-//! - How to integrate PilotLearner with LlmPilot
-//! - How to record user feedback for decisions
-//! - How the learner automatically adjusts decisions
-//!
-//! # Key concepts:
-//!
-//! ## Feedback Flow
-//! ```text
-//! Retrieval → Decision → User Feedback → FeedbackStore
-//! ↑ ↓
-//! └──────── PilotLearner ────────┘
-//! (adjusts confidence)
-//! ```
-//!
-//! ## Learning Effect
-//! - High accuracy scenarios → Pilot confidence boosted
-//! - Low accuracy scenarios → Algorithm trusted more
-//! - Very low accuracy → Intervention skipped entirely
-
-use std::sync::Arc;
-use vectorless::llm::LlmClient;
-use vectorless::retrieval::pilot::{
- FeedbackRecord, FeedbackStore, FeedbackStoreConfig, InterventionPoint, LearnerConfig,
- PilotLearner, DecisionId, LlmPilot, PilotConfig,
-};
-
-fn main() -> Result<(), Box> {
- println!("=== Feedback Learning Example ===\n");
-
- // 1. Create FeedbackStore with in-memory storage
- let store = Arc::new(FeedbackStore::in_memory());
- println!("✓ Created FeedbackStore (in-memory)");
-
- // 2. Create Learner with custom configuration
- let learner_config = LearnerConfig {
- min_samples: 5, // Need 5 samples before adjusting
- high_accuracy_threshold: 0.8, // 80%+ accuracy = boost confidence
- low_accuracy_threshold: 0.5, // 50%- accuracy = reduce confidence
- max_confidence_delta: 0.2, // Max adjustment ±0.2
- };
- let learner = Arc::new(PilotLearner::with_config(store.clone(), learner_config));
- println!("✓ Created PilotLearner with custom config");
-
- // 3. Create LlmPilot with feedback learning
- let client = LlmClient::for_model("gpt-4o-mini");
- let pilot = LlmPilot::new(client, PilotConfig::default()).with_learner(learner.clone());
- println!("✓ Created LlmPilot with feedback learner");
-
- // 4. Simulate some retrieval operations with feedback
- println!("\n=== Simulating Retrieval with Feedback ===\n");
-
- // Simulate 10 retrieval operations
- for i in 0..10 {
- let decision_id = DecisionId(i);
- let was_correct = i % 3 != 0; // 66% accuracy
- let confidence = 0.7 + (i as f64 * 0.02);
-
- // Create feedback record
- let record = FeedbackRecord::new(
- decision_id,
- was_correct,
- confidence,
- InterventionPoint::Fork,
- 12345, // query_hash
- 67890, // path_hash
- );
-
- // Record feedback
- pilot.record_feedback(record);
-
- println!(
- "Decision {}: {} (confidence: {:.2})",
- i,
- if was_correct { "✓ Correct" } else { "✗ Incorrect" },
- confidence
- );
- }
-
- // 5. View learning statistics
- println!("\n=== Learning Statistics ===\n");
-
- let stats = store.intervention_stats();
- println!("Fork Point Statistics:");
- println!(" Total decisions: {}", stats.fork.total);
- println!(" Correct: {}", stats.fork.correct);
- println!(" Accuracy: {:.1}%", stats.fork.accuracy() * 100.0);
- println!(
- " Avg confidence (correct): {:.2}",
- stats.fork.avg_confidence_correct
- );
- println!(
- " Avg confidence (incorrect): {:.2}",
- stats.fork.avg_confidence_incorrect
- );
-
- let overall = store.overall_accuracy();
- println!("\nOverall accuracy: {:.1}%", overall * 100.0);
- println!("Total records: {}", store.total_records());
-
- // 6. Check if learner has enough data
- println!("\n=== Learner Status ===\n");
- if learner.has_sufficient_data() {
- println!("✓ Learner has sufficient data for adjustments");
-
- // Get adjustment for similar context
- let adjustment = learner.get_adjustment(InterventionPoint::Fork, 12345, 67890);
- println!("\nAdjustment for similar context:");
- println!(" Confidence delta: {:.3}", adjustment.confidence_delta);
- println!(" Algorithm weight: {:.2}", adjustment.algorithm_weight);
- println!(
- " Skip intervention: {}",
- adjustment.skip_intervention
- );
- } else {
- println!("✗ Learner needs more data before adjusting");
- }
-
- // 7. Demonstrate persistence (optional)
- println!("\n=== Persistence (Optional) ===\n");
-
- let persistent_config = FeedbackStoreConfig::with_persistence("/tmp/feedback.json");
- let _persistent_store = FeedbackStore::new(persistent_config);
-
- // In a real app, you would:
- // - Load existing feedback at startup: persistent_store.load()?
- // - Save periodically: persistent_store.persist()?
-
- println!("To enable persistence, create FeedbackStore with:");
- println!(" FeedbackStoreConfig::with_persistence(\"/path/to/feedback.json\")");
-
- println!("\n=== Example Complete ===");
- Ok(())
-}
diff --git a/examples/rust/html_parser.rs b/examples/rust/html_parser.rs
deleted file mode 100644
index e41aaea7..00000000
--- a/examples/rust/html_parser.rs
+++ /dev/null
@@ -1,291 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! HTML Parser Example.
-//!
-//! This example demonstrates how to parse HTML documents using vectorless.
-//!
-//! # Features
-//!
-//! - Parses HTML5 documents
-//! - Extracts heading hierarchy (h1-h6)
-//! - Extracts content from paragraphs, lists, tables
-//! - Extracts metadata from (title, description, etc.)
-//!
-//! # Usage
-//!
-//! ```bash
-//! cargo run --example html_parser
-//! ```
-
-use vectorless::parser::{DocumentParser, HtmlConfig, HtmlParser};
-
-#[tokio::main]
-async fn main() -> vectorless::Result<()> {
- println!("=== HTML Parser Example ===\n");
-
- // 1. Basic HTML parsing
- println!("--- Step 1: Basic HTML Parsing ---\n");
- demo_basic_parsing().await?;
-
- // 2. Parsing with metadata
- println!("\n--- Step 2: HTML with Metadata ---\n");
- demo_metadata_parsing().await?;
-
- // 3. Complex HTML structure
- println!("\n--- Step 3: Complex HTML Structure ---\n");
- demo_complex_structure().await?;
-
- // 4. Configuration options
- println!("\n--- Step 4: Configuration Options ---\n");
- demo_configuration().await?;
-
- // 5. Integration with Engine
- println!("\n--- Step 5: Integration with Engine ---\n");
- demo_engine_integration();
-
- println!("\n=== Done ===");
- Ok(())
-}
-
-/// Demonstrate basic HTML parsing.
-async fn demo_basic_parsing() -> vectorless::Result<()> {
- let parser = HtmlParser::new();
- let html = r#"
-
-
-Basic Document
-
- Main Title
- This is the introduction paragraph.
-
- Section 1
- Content for section 1.
-
- Section 2
- Content for section 2.
- Subsection 2.1
- Detailed content here.
-
-
-"#;
-
- let result = parser.parse(html).await?;
-
- println!("Document: {}", result.meta.name);
- println!("Nodes extracted: {}\n", result.nodes.len());
-
- for node in &result.nodes {
- println!(" {} {} (level {})",
- "•".repeat(node.level),
- node.title,
- node.level
- );
- if !node.content.is_empty() {
- let preview: String = node.content.chars().take(50).collect();
- println!(" Content: {}...", preview);
- }
- }
-
- Ok(())
-}
-
-/// Demonstrate parsing HTML with metadata.
-async fn demo_metadata_parsing() -> vectorless::Result<()> {
- let parser = HtmlParser::new();
- let html = r#"
-
-
-
- Technical Documentation
-
-
-
-
-
-
- API Reference
- Introduction to the API.
-
-
-"#;
-
- let result = parser.parse(html).await?;
-
- println!("Metadata extracted:");
- println!(" Title: {}", result.meta.name);
- println!(" Description: {:?}", result.meta.description);
- println!(" Format: {:?}", result.meta.format);
- println!(" Lines: {}", result.meta.line_count);
-
- Ok(())
-}
-
-/// Demonstrate parsing complex HTML structure.
-async fn demo_complex_structure() -> vectorless::Result<()> {
- let parser = HtmlParser::new();
- let html = r#"
-
-
-
- Complex Document
-
- Lists
-
- - First item
- - Second item
- - Third item
-
-
-
- - Step one
- - Step two
- - Step three
-
-
- Table
-
- | Name | Value |
- | Option A | 100 |
- | Option B | 200 |
-
-
- Code Block
- fn main() {
- println!("Hello, World!");
-}
-
- Blockquote
-
- This is a quoted text from another source.
- It can span multiple lines.
-
-
-
-"#;
-
- let result = parser.parse(html).await?;
-
- println!("Nodes with complex content:\n");
- for node in &result.nodes {
- println!(" [Level {}] {}", node.level, node.title);
- if node.content.contains("•") || node.content.contains("1.") {
- println!(" → Contains list content");
- }
- if node.content.contains("|") {
- println!(" → Contains table content");
- }
- if node.content.contains("```") {
- println!(" → Contains code block");
- }
- if node.content.contains(">") {
- println!(" → Contains blockquote");
- }
- }
-
- Ok(())
-}
-
-/// Demonstrate configuration options.
-async fn demo_configuration() -> vectorless::Result<()> {
- // Default configuration
- let _default_parser = HtmlParser::new();
- println!("Default config:");
- println!(" - max_heading_level: 6");
- println!(" - include_code_blocks: true");
- println!(" - merge_small_nodes: true");
- println!(" - min_content_length: 50\n");
-
- // Custom configuration
- let config = HtmlConfig::new()
- .with_max_heading_level(3) // Only h1-h3
- .with_code_blocks(false) // Exclude code
- .with_min_content_length(20) // Smaller threshold
- .with_default_title("Overview");
-
- let custom_parser = HtmlParser::with_config(config);
- println!("Custom config:");
- println!(" - max_heading_level: 3");
- println!(" - include_code_blocks: false");
- println!(" - min_content_length: 20");
- println!(" - default_title: \"Overview\"\n");
-
- // Parse with custom config
- let html = r#"
-
-
- Title
- Short.
- This heading is ignored (level > 3)
- This content goes to parent.
-
-
-"#;
-
- let result = custom_parser.parse(html).await?;
- println!("Nodes with max_level=3: {}", result.nodes.len());
-
- // Show preset configs
- println!("\nPreset configurations:");
- let simple = HtmlConfig::simple();
- println!(" HtmlConfig::simple():");
- println!(" - merge_small_nodes: {}", simple.merge_small_nodes);
- println!(" - min_content_length: {}", simple.min_content_length);
-
- let no_code = HtmlConfig::no_code_blocks();
- println!(" HtmlConfig::no_code_blocks():");
- println!(" - include_code_blocks: {}", no_code.include_code_blocks);
-
- Ok(())
-}
-
-/// Demonstrate integration with Engine.
-fn demo_engine_integration() {
- println!("Integration with Engine:\n");
-
- println!("```rust");
- println!("use vectorless::{{EngineBuilder, IndexContext}};");
- println!("use vectorless::parser::DocumentFormat;");
- println!();
- println!("# #[tokio::main]");
- println!("# async fn main() -> vectorless::Result<()> {{");
- println!(" let engine = EngineBuilder::new()");
- println!(" .with_workspace(\"./workspace\")");
- println!(" .build()");
- println!(" .await?;");
- println!();
- println!(" // Method 1: From HTML file");
- println!(" let doc_id = engine.index(");
- println!(" IndexContext::from_path(\"./documentation.html\")");
- println!(" ).await?;");
- println!();
- println!(" // Method 2: From HTML content");
- println!(" let html = r#\"");
- println!("");
- println!("My Doc");
- println!("");
- println!(" Introduction
");
- println!(" Content here...
");
- println!("");
- println!("");
- println!("\"#;");
- println!();
- println!(" let doc_id = engine.index(");
- println!(" IndexContext::from_content(html, DocumentFormat::Html)");
- println!(" .with_name(\"my-document\")");
- println!(" ).await?;");
- println!();
- println!(" // Query the indexed document");
- println!(" let result = engine.query(&doc_id, \"What is the introduction?\").await?;");
- println!(" println!(\"{{}}\", result.content);");
- println!();
- println!(" Ok(())");
- println!("}}");
- println!("```\n");
-
- println!("Supported file extensions:");
- println!(" - .html, .htm → HTML format");
- println!(" - .md, .markdown → Markdown format");
- println!(" - .pdf → PDF format");
- println!(" - .docx → Word document");
-}
diff --git a/examples/rust/index.rs b/examples/rust/index.rs
deleted file mode 100644
index bd2b6aac..00000000
--- a/examples/rust/index.rs
+++ /dev/null
@@ -1,106 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! Index example - demonstrates the document indexing.
-//!
-//! This example shows how to:
-//! 1. Create an index pipeline executor
-//! 2. Configure pipeline options
-//! 3. Execute the pipeline on a document
-//! 4. Inspect the generated document tree
-//!
-//! # Usage
-//!
-//! ```bash
-//! cargo run --example index
-//! ```
-
-use vectorless::index::{IndexInput, PipelineExecutor, PipelineOptions};
-
-#[tokio::main]
-async fn main() -> vectorless::Result<()> {
- println!("=== Index Pipeline Example ===\n");
-
- // 1. Create pipeline executor
- let mut executor = PipelineExecutor::new();
- println!("✓ Created pipeline executor\n");
-
- // 2. Configure pipeline options
- let options = PipelineOptions::default();
- println!("Pipeline options:");
- println!(" - Generate IDs: {}", options.generate_ids);
- println!(" - Generate description: {}", options.generate_description);
- println!();
-
- // 3. Create input from a file
- let input = IndexInput::file("./README.md");
- println!("Input: ./README.md\n");
-
- // 4. Execute the pipeline
- println!("Executing pipeline stages:");
- println!(" [1/5] Parse - Parse document into tree structure");
- println!(" [2/5] Build - Build document tree with metadata");
- println!(" [3/5] Enhance - Add ToC and section detection");
- println!(" [4/5] Enrich - Generate summaries for nodes");
- println!(" [5/5] Optimize - Optimize tree structure");
- println!();
-
- let result = executor.execute(input, options).await?;
- println!("✓ Pipeline completed\n");
-
- // 5. Inspect the result
- println!("Results:");
- println!(" - Document name: {}", result.name);
-
- if let Some(ref description) = result.description {
- let preview: String = description.chars().take(100).collect();
- println!(" - Description: {}...", preview);
- }
-
- if let Some(ref tree) = result.tree {
- println!(" - Tree nodes: {}", tree.node_count());
- println!();
-
- // Print tree structure (first 2 levels)
- println!("Document structure:");
- print_tree_structure(&tree, tree.root(), 0, 2);
- }
-
- if let Some(page_count) = result.page_count {
- println!("\n - Pages: {}", page_count);
- }
-
- println!("\n=== Done ===");
- Ok(())
-}
-
-/// Print tree structure up to a maximum depth.
-fn print_tree_structure(
- tree: &vectorless::document::DocumentTree,
- node_id: vectorless::document::NodeId,
- current_depth: usize,
- max_depth: usize,
-) {
- if current_depth > max_depth {
- return;
- }
-
- let indent = " ".repeat(current_depth);
-
- if let Some(node) = tree.get(node_id) {
- let children = tree.children(node_id);
- let marker = if children.is_empty() {
- "└─"
- } else {
- "├─"
- };
- println!(
- "{}{} {} (depth: {})",
- indent, marker, node.title, node.depth
- );
-
- for child_id in children {
- print_tree_structure(tree, child_id, current_depth + 1, max_depth);
- }
- }
-}
diff --git a/examples/rust/markdownflow.rs b/examples/rust/markdownflow.rs
index 60e96f54..7d7988c0 100644
--- a/examples/rust/markdownflow.rs
+++ b/examples/rust/markdownflow.rs
@@ -19,8 +19,8 @@
//! OPENAI_API_KEY=sk-... cargo run --example markdown_flow
//! ```
-use vectorless::Engine;
-use vectorless::client::{IndexContext, IndexOptions};
+use vectorless::EngineBuilder;
+use vectorless::client::{IndexContext, IndexOptions, QueryContext};
/// Sample markdown content for demonstration.
const SAMPLE_MARKDOWN: &str = r#"
@@ -43,7 +43,7 @@ async fn main() -> Result<(), Box> {
// Step 1: Create a Vectorless client (no API key needed - LLM config is automatic)
println!("Step 1: Creating Vectorless client...");
- let client = Engine::builder()
+ let client = EngineBuilder::new()
.with_workspace("./workspace")
.build()
.await
@@ -62,29 +62,19 @@ async fn main() -> Result<(), Box> {
// Check if we should generate summaries (requires API key)
println!(" - API key detected, generating summaries...");
- let doc_id = client
+ let index_result = client
.index(IndexContext::from_path(&md_path).with_options(IndexOptions::new().with_summaries()))
.await?;
+ let doc_id = index_result.doc_id().unwrap().to_string();
println!(" - Document indexed successfully");
println!(" - Document ID: {}", doc_id);
println!();
- // Step 3: Show document structure in JSON format
- println!("Step 3: Document structure (JSON):");
- println!();
-
- match client.get_structure(&doc_id).await {
- Ok(tree) => {
- // Export to JSON format (PageIndex compatible)
- let structure = tree.to_structure_json("sample.md");
- let json = serde_json::to_string_pretty(&structure)
- .unwrap_or_else(|_| "Failed to serialize".to_string());
- println!("{}", json);
- }
- Err(e) => {
- println!(" - Error getting structure: {}", e);
- }
+ // Step 3: List indexed documents
+ println!("Step 3: Indexed documents:");
+ for doc in client.list().await? {
+ println!(" - {} ({})", doc.name, doc.id);
}
println!();
@@ -96,7 +86,7 @@ async fn main() -> Result<(), Box> {
for query in queries {
println!(" Query: \"{}\"", query);
- match client.query(&doc_id, query).await {
+ match client.query(QueryContext::new(query).with_doc_id(&doc_id)).await {
Ok(result) => {
if result.content.is_empty() {
println!(" - No relevant content found");
diff --git a/examples/rust/memo_cache.rs b/examples/rust/memo_cache.rs
deleted file mode 100644
index d4655189..00000000
--- a/examples/rust/memo_cache.rs
+++ /dev/null
@@ -1,264 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! MemoStore verification example.
-//!
-//! This example demonstrates the LLM memoization system working in a real scenario,
-//! showing cache hits/misses and cost savings.
-//!
-//! # Usage
-//!
-//! ```bash
-//! cargo run --example memo_cache
-//! ```
-//!
-//! # Environment
-//!
-//! Set OPENAI_API_KEY or ANTHROPIC_API_KEY for full functionality.
-//! The example will still run without API keys (using fallback mode).
-
-use chrono::Duration;
-use vectorless::memo::{MemoKey, MemoOpType, MemoStore, MemoValue};
-
-fn print_separator(title: &str) {
- println!("\n{}", "=".repeat(60));
- println!(" {}", title);
- println!("{}", "=".repeat(60));
-}
-
-fn main() -> vectorless::Result<()> {
- println!("=== MemoStore Verification Example ===\n");
-
- // ============================================================
- // Part 1: Basic MemoStore Operations
- // ============================================================
- print_separator("Part 1: Basic Operations");
-
- let store = MemoStore::new()
- .with_ttl(Duration::days(7))
- .with_model("gpt-4o")
- .with_version(1);
-
- println!("Created MemoStore with:");
- println!(" - TTL: 7 days");
- println!(" - Model: gpt-4o");
- println!(" - Version: 1");
-
- // Create a summary cache key
- let content = "This is a long document about machine learning...";
- let content_fp = vectorless::utils::fingerprint::Fingerprint::from_str(content);
- let key = MemoKey::summary(&content_fp).with_model("gpt-4o").with_version(1);
-
- println!("\nCache key created:");
- println!(" - Op type: {:?}", key.op_type);
- println!(" - Input FP: {}", key.input_fp);
-
- // Check cache (should miss)
- println!("\nChecking cache (first time)...");
- let cached = store.get(&key);
- println!(" Cache hit: {}", cached.is_some());
-
- // Store a value
- println!("\nStoring summary...");
- let summary = "Machine learning is a subset of AI that enables systems to learn from data.";
- store.put_with_tokens(key.clone(), MemoValue::Summary(summary.to_string()), 500);
- println!(" Stored: \"{}\"", summary);
- println!(" Tokens saved estimate: 500");
-
- // Check cache again (should hit)
- println!("\nChecking cache (second time)...");
- let cached = store.get(&key);
- println!(" Cache hit: {}", cached.is_some());
- if let Some(value) = cached {
- println!(" Value: \"{}\"", value.as_summary().unwrap_or("(not a summary)"));
- }
-
- // ============================================================
- // Part 2: Statistics Tracking
- // ============================================================
- print_separator("Part 2: Statistics Tracking");
-
- // Create a new store for this demo
- let store = MemoStore::with_capacity(100)
- .with_model("gpt-4o-mini");
-
- println!("Simulating cache usage...\n");
-
- // Simulate 10 operations
- let operations = [
- ("doc1", "Content about Rust programming"),
- ("doc2", "Introduction to machine learning"),
- ("doc1", "Content about Rust programming"), // Repeat - should hit
- ("doc3", "Deep learning fundamentals"),
- ("doc2", "Introduction to machine learning"), // Repeat - should hit
- ("doc1", "Content about Rust programming"), // Repeat - should hit
- ("doc4", "Natural language processing"),
- ("doc3", "Deep learning fundamentals"), // Repeat - should hit
- ("doc5", "Computer vision basics"),
- ("doc2", "Introduction to machine learning"), // Repeat - should hit
- ];
-
- let mut hits = 0u64;
- let mut misses = 0u64;
-
- for (i, (doc_id, content)) in operations.iter().enumerate() {
- let content_fp = vectorless::utils::fingerprint::Fingerprint::from_str(content);
- let key = MemoKey::summary(&content_fp);
-
- if let Some(_value) = store.get(&key) {
- hits += 1;
- println!(" [{:2}] {} - CACHE HIT", i + 1, doc_id);
- } else {
- misses += 1;
- println!(" [{:2}] {} - cache miss (storing...)", i + 1, doc_id);
- store.put_with_tokens(key, MemoValue::Summary(format!("Summary of {}", content)), 100);
- }
- }
-
- println!("\nStatistics:");
- println!(" - Hits: {}", hits);
- println!(" - Misses: {}", misses);
- println!(" - Hit rate: {:.1}%", (hits as f64 / (hits + misses) as f64) * 100.0);
-
- // ============================================================
- // Part 3: Cache Invalidation
- // ============================================================
- print_separator("Part 3: Cache Invalidation");
-
- let store = MemoStore::new().with_model("gpt-4o");
-
- // Store different operation types
- let fp1 = vectorless::utils::fingerprint::Fingerprint::from_str("content1");
- let fp2 = vectorless::utils::fingerprint::Fingerprint::from_str("content2");
-
- store.put(MemoKey::summary(&fp1), MemoValue::Summary("Summary 1".to_string()));
- store.put(MemoKey::summary(&fp2), MemoValue::Summary("Summary 2".to_string()));
- store.put(
- MemoKey::pilot_decision(&fp1, &fp2),
- MemoValue::PilotDecision(vectorless::memo::PilotDecisionValue {
- selected_idx: 0,
- confidence: 0.9,
- reasoning: "Test decision".to_string(),
- }),
- );
-
- println!("Stored 3 entries:");
- println!(" - 2 Summary entries");
- println!(" - 1 PilotDecision entry");
- println!(" - Total: {} entries", store.len());
-
- // Invalidate by operation type
- println!("\nInvalidating all Summary entries...");
- let removed = store.invalidate_by_op_type(MemoOpType::Summary);
- println!(" Removed: {} entries", removed);
- println!(" Remaining: {} entries", store.len());
-
- // ============================================================
- // Part 4: Persistence
- // ============================================================
- print_separator("Part 4: Persistence");
-
- let temp_dir = tempfile::TempDir::new().expect("Failed to create temp dir");
- let cache_path = temp_dir.path().join("memo_cache.json");
-
- println!("Cache path: {:?}", cache_path);
-
- // Create and populate store
- let store = MemoStore::new().with_model("gpt-4o");
-
- for i in 0..5 {
- let content = format!("Document content {}", i);
- let fp = vectorless::utils::fingerprint::Fingerprint::from_str(&content);
- store.put(
- MemoKey::summary(&fp),
- MemoValue::Summary(format!("Summary {}", i)),
- );
- }
- println!("Created store with {} entries", store.len());
-
- // Note: save/load are async, skip for this sync example
- println!("\n(Async save/load skipped in sync example)");
- println!("Use store.save(&path).await and store.load(&path).await in async context");
-
- // ============================================================
- // Part 5: Real-World Scenario Simulation
- // ============================================================
- print_separator("Part 5: Real-World Scenario");
-
- println!("Simulating a document query session...\n");
-
- let store = MemoStore::new()
- .with_ttl(Duration::hours(24))
- .with_model("gpt-4o-mini");
-
- // Simulate multiple queries to the same document
- let document_content = r#"
- # Vectorless Documentation
-
- Vectorless is a hierarchical, reasoning-native document intelligence engine.
- It provides tree-based document understanding without vector databases.
-
- ## Features
- - Multi-format parsing (Markdown, PDF, DOCX)
- - LLM-powered summarization
- - Adaptive retrieval strategies
- "#;
-
- let doc_fp = vectorless::utils::fingerprint::Fingerprint::from_str(document_content);
-
- // Simulate query context fingerprints
- let queries = [
- ("What is Vectorless?", 0.85),
- ("How does it work?", 0.72),
- ("What formats are supported?", 0.91),
- ("What is Vectorless?", 0.85), // Repeat
- ("How does it work?", 0.72), // Repeat
- ];
-
- println!("Processing {} queries...\n", queries.len());
-
- for (i, (query, confidence)) in queries.iter().enumerate() {
- let query_fp = vectorless::utils::fingerprint::Fingerprint::from_str(query);
- let key = MemoKey::pilot_decision(&doc_fp, &query_fp);
-
- if let Some(_value) = store.get(&key) {
- println!(" [{:2}] \"{}\" - CACHED (confidence: {:.2})", i + 1, query, confidence);
- } else {
- println!(" [{:2}] \"{}\" - Computing... (confidence: {:.2})", i + 1, query, confidence);
- store.put_with_tokens(
- key,
- MemoValue::PilotDecision(vectorless::memo::PilotDecisionValue {
- selected_idx: 0,
- confidence: *confidence as f32,
- reasoning: format!("Reasoning for: {}", query),
- }),
- 150, // ~150 tokens per pilot decision
- );
- }
- }
-
- // Final statistics
- // Note: get() updates entry-level hits, but global stats are only
- // updated by get_or_compute(). For accurate global stats, use get_or_compute.
- println!("\n=== Final Statistics ===");
- println!(" Cache entries: {}", store.len());
- println!("\nNote: Global stats (hits/misses/tokens_saved) are tracked by");
- println!("get_or_compute(), not by direct get() calls. For accurate tracking,");
- println!("use get_or_compute() in production code.");
-
- // Cost estimation (based on manual tracking above)
- let manual_hits = 2u64; // Queries 4 and 5 were cache hits
- let tokens_per_decision = 150u64;
- let tokens_saved = manual_hits * tokens_per_decision;
- let cost_per_1k_tokens = 0.0015; // GPT-4o-mini input
- let saved_cost = (tokens_saved as f64 / 1000.0) * cost_per_1k_tokens;
- println!("\n Manual calculation:");
- println!(" Cache hits: {}", manual_hits);
- println!(" Tokens saved: {}", tokens_saved);
- println!(" Estimated cost saved: ${:.4}", saved_cost);
-
- println!("\n=== Verification Complete ===");
- println!("MemoStore is working correctly!");
-
- Ok(())
-}
diff --git a/examples/rust/multi_format.rs b/examples/rust/multi_format.rs
deleted file mode 100644
index f146b851..00000000
--- a/examples/rust/multi_format.rs
+++ /dev/null
@@ -1,77 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! Multi-format document processing example.
-//!
-//! This example demonstrates how to work with different
-//! document formats (Markdown, PDF, DOCX, HTML).
-//!
-//! # What you'll learn:
-//! - How to index documents of different formats
-//! - How format detection works
-//! - How to configure format-specific parsing options
-//! - How to handle mixed-format document sets
-//!
-//! # Supported formats:
-//! - **Markdown** (.md): Full support with ToC extraction
-//! - **PDF** (.pdf): Text extraction, structure inference
-//! - **DOCX** (.docx): Word document parsing
-//! - **HTML** (.html, .htm): Web page parsing (planned)
-//! - **Plain text** (.txt): Basic text parsing (planned)
-//!
-//! # Format-specific considerations:
-//!
-//! ## Markdown
-//! - Best format for structured documents
-//! - Automatic heading hierarchy detection
-//! - Code block handling
-//!
-//! ## PDF
-//! - Text extraction quality varies
-//! - No explicit structure (inferred from fonts/spacing)
-//! - Tables and images not supported
-//!
-//! ## DOCX
-//! - Good structure preservation
-//! - Styles mapped to hierarchy
-//! - Limited formatting support
-//!
-//! # TODO: Implementation steps
-//!
-//! 1. Detect document format from extension or content
-//! 2. Configure format-specific parser options
-//! 3. Index documents of mixed formats
-//! 4. Query across all formats
-
-// TODO: Implement multi-format example
-// ```
-// use vectorless::client::{Engine, EngineBuilder};
-// use vectorless::parser::DocumentFormat;
-//
-// async fn index_multiple_formats(engine: &Engine) {
-// // Index different formats
-// let md_doc = engine.index("./README.md").await?;
-// let pdf_doc = engine.index("./paper.pdf").await?;
-// let docx_doc = engine.index("./report.docx").await?;
-//
-// // Query works across all formats
-// let result = engine.query(&md_doc, "What is this about?").await?;
-// }
-// ```
-
-fn main() {
- // TODO: Show multi-format indexing and querying
- //
- // // Index documents of different formats
- // let md_id = engine.index("./docs/guide.md").await?;
- // let pdf_id = engine.index("./docs/paper.pdf").await?;
- // let docx_id = engine.index("./docs/report.docx").await?;
- //
- // // Each can be queried independently
- // for doc_id in &[md_id, pdf_id, docx_id] {
- // let result = engine.query(doc_id, "summary").await?;
- // println!("Result: {}", result.content);
- // }
-
- println!("TODO: Implement multi_format example");
-}
diff --git a/examples/rust/reference_following.rs b/examples/rust/reference_following.rs
deleted file mode 100644
index 1f95cbf8..00000000
--- a/examples/rust/reference_following.rs
+++ /dev/null
@@ -1,191 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! Reference Following Example
-//!
-//! This example demonstrates the reference following feature which allows
-//! the retrieval system to follow in-document references like
-//! "see Appendix G" or "refer to Table 5.3".
-//!
-//! # What you'll learn:
-//! - How references are extracted from document content
-//! - How references are resolved to actual nodes
-//! - How to use ReferenceFollower to expand search results
-//!
-//! # Key concepts:
-//!
-//! ## Reference Types
-//! - Section: "see Section 2.1", "Section 3.2.1"
-//! - Appendix: "see Appendix G", "Appendix A"
-//! - Table: "Table 5.3", "refer to Table 1"
-//! - Figure: "Figure 2.1", "fig. 3"
-//! - Page: "see page 42", "p. 15"
-//!
-//! ## Resolution Flow
-//! ```text
-//! Extract References → Resolve to Nodes → Follow → Expand Context
-//! ```
-
-use vectorless::document::{
- DocumentTree, ReferenceExtractor,
-};
-use vectorless::retrieval::{
- expand_with_references, ReferenceConfig, ReferenceFollower,
-};
-
-fn main() {
- println!("=== Reference Following Example ===\n");
-
- // 1. Create a document tree with references
- let tree = create_document_with_references();
- println!("Created document tree with {} nodes\n", tree.node_count());
-
- // 2. Build retrieval index
- let index = tree.build_retrieval_index();
- println!("Built retrieval index\n");
-
- // 3. Demonstrate reference extraction
- println!("--- Reference Extraction ---\n");
-
- let content = "For more details, see Section 2.1 and Appendix G. The data is shown in Table 5.3.";
- let refs = ReferenceExtractor::extract(content);
-
- println!("Content: \"{}\"\n", content);
- println!("Extracted {} references:", refs.len());
- for r#ref in &refs {
- println!(
- " - {:?}: '{}' -> target '{}'",
- r#ref.ref_type, r#ref.ref_text, r#ref.target_id
- );
- }
- println!();
-
- // 4. Demonstrate reference resolution
- println!("--- Reference Resolution ---\n");
-
- let resolved_refs = ReferenceExtractor::extract_and_resolve(content, &tree, &index);
- println!("Resolved references:");
- for r#ref in &resolved_refs {
- let status = if r#ref.is_resolved() {
- format!("resolved (confidence: {:.2})", r#ref.confidence)
- } else {
- "unresolved".to_string()
- };
- println!(
- " - {:?}: '{}' -> {}",
- r#ref.ref_type, r#ref.target_id, status
- );
- }
- println!();
-
- // 5. Demonstrate reference following
- println!("--- Reference Following ---\n");
-
- let config = ReferenceConfig {
- max_depth: 3,
- max_references: 10,
- follow_pages: true,
- follow_tables_figures: true,
- min_confidence: 0.3,
- ..Default::default()
- };
- let follower = ReferenceFollower::new(config);
-
- // Get the financial section node (which contains references)
- let financial_node = find_node_by_title(&tree, "Financial Summary");
- if let Some(node_id) = financial_node {
- let followed = follower.follow_from_node(&tree, &index, node_id);
-
- println!("Following references from 'Financial Summary':");
- for f in &followed {
- let target = if let Some(target_id) = f.target_node {
- let title = tree.get(target_id).map(|n| n.title.as_str()).unwrap_or("?");
- format!("-> '{}' (depth {})", title, f.depth)
- } else {
- "-> (unresolved)".to_string()
- };
- println!(
- " - {:?} '{}' {}",
- f.reference.ref_type, f.reference.target_id, target
- );
- }
- }
- println!();
-
- // 6. Demonstrate expansion with references
- println!("--- Expansion with References ---\n");
-
- let initial_nodes: Vec<_> = tree.children(tree.root());
- println!("Initial nodes: {} (root's children)", initial_nodes.len());
-
- let expansion = expand_with_references(&tree, &index, &initial_nodes, None);
-
- println!(
- "After reference expansion: {} total nodes, {} new",
- expansion.all_nodes().len(),
- expansion.expanded_nodes.len()
- );
-
- if expansion.has_expansion() {
- println!("\nExpanded nodes:");
- for node_id in expansion.new_nodes() {
- if let Some(node) = tree.get(*node_id) {
- println!(" - {}", node.title);
- }
- }
- }
- println!();
-
- // 7. Show configuration options
- println!("--- Configuration Options ---\n");
-
- let conservative = ReferenceConfig::conservative();
- let aggressive = ReferenceConfig::aggressive();
-
- println!("Conservative config:");
- println!(" - Max depth: {}", conservative.max_depth);
- println!(" - Max references: {}", conservative.max_references);
-
- println!("\nAggressive config:");
- println!(" - Max depth: {}", aggressive.max_depth);
- println!(" - Max references: {}", aggressive.max_references);
-
- println!("\n=== Done ===");
-}
-
-fn create_document_with_references() -> DocumentTree {
- let mut tree = DocumentTree::new("Annual Report", "Company annual financial report.");
-
- // Main sections
- let _intro = tree.add_child(tree.root(), "Introduction", "Overview of the report.");
- let financial = tree.add_child(
- tree.root(),
- "Financial Summary",
- "Financial overview for 2023. For detailed breakdown, see Section 2.1. Revenue data is in Table 5.3. Additional details in Appendix G.",
- );
- let _appendix = tree.add_child(
- tree.root(),
- "Appendix G",
- "Detailed financial tables and data.",
- );
-
- // Subsections
- tree.add_child(
- financial,
- "2.1 Revenue",
- "Revenue increased by 15% year over year. See Table 5.3 for breakdown.",
- );
-
- tree
-}
-
-fn find_node_by_title(tree: &DocumentTree, title: &str) -> Option {
- for node_id in tree.traverse() {
- if let Some(node) = tree.get(node_id) {
- if node.title == title {
- return Some(node_id);
- }
- }
- }
- None
-}
diff --git a/examples/rust/retrieve.rs b/examples/rust/retrieve.rs
deleted file mode 100644
index 62e5ff73..00000000
--- a/examples/rust/retrieve.rs
+++ /dev/null
@@ -1,263 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! Retrieve example - demonstrates the retrieval pipeline.
-//!
-//! This example shows how to:
-//! 1. Create a pipeline retriever
-//! 2. Configure retrieval options
-//! 3. Execute retrieval queries
-//! 4. Use the orchestrator for advanced control
-//!
-//! # Usage
-//!
-//! ```bash
-//! cargo run --example retrieve
-//! ```
-
-use std::sync::Arc;
-use vectorless::document::DocumentTree;
-use vectorless::retrieval::{
- PipelineRetriever, RetrieveOptions, Retriever, StrategyPreference,
- pipeline::RetrievalOrchestrator,
- stages::{AnalyzeStage, EvaluateStage, PlanStage, SearchStage},
-};
-
-#[tokio::main]
-async fn main() -> vectorless::Result<()> {
- println!("=== Retrieval Pipeline Example ===\n");
-
- // 1. Create a sample document tree
- let tree = create_sample_tree();
- println!(
- "✓ Created sample document tree ({} nodes)\n",
- tree.node_count()
- );
-
- // 2. Method A: Use PipelineRetriever (simple API)
- println!("--- Method A: PipelineRetriever (Simple API) ---\n");
- demo_pipeline_retriever(&tree).await?;
-
- // 3. Method B: Use RetrievalOrchestrator directly (advanced API)
- println!("\n--- Method B: RetrievalOrchestrator (Advanced API) ---\n");
- demo_orchestrator(&tree).await?;
-
- println!("\n=== Done ===");
- Ok(())
-}
-
-/// Demonstrate PipelineRetriever (simple API).
-async fn demo_pipeline_retriever(tree: &DocumentTree) -> vectorless::Result<()> {
- // Create retriever with configuration
- let retriever = PipelineRetriever::new()
- .with_max_backtracks(5)
- .with_max_iterations(10);
-
- println!("PipelineRetriever configuration:");
- println!(" - Max backtracks: 5");
- println!(" - Max iterations: 10");
- println!();
-
- // Configure retrieval options
- let options = RetrieveOptions {
- top_k: 5,
- beam_width: 3,
- max_iterations: 5,
- max_tokens: 4000,
- sufficiency_check: true,
- include_content: true,
- include_summaries: true,
- strategy: StrategyPreference::Auto,
- ..Default::default()
- };
-
- println!("RetrieveOptions:");
- println!(" - Top K: {}", options.top_k);
- println!(" - Beam width: {}", options.beam_width);
- println!(" - Max tokens: {}", options.max_tokens);
- println!(" - Sufficiency check: {}", options.sufficiency_check);
- println!();
-
- // Execute query
- let query = "What is the main architecture?";
- println!("Query: \"{}\"\n", query);
-
- let response = retriever
- .retrieve(tree, query, &options)
- .await
- .map_err(|e| vectorless::Error::Retrieval(e.to_string()))?;
-
- // Display results
- println!("Results:");
- println!(" - Is sufficient: {}", response.is_sufficient);
- println!(" - Confidence: {:.2}", response.confidence);
- println!(" - Strategy used: {}", response.strategy_used);
- println!(" - Tokens used: {}", response.tokens_used);
- println!(" - Results count: {}", response.results.len());
-
- if !response.results.is_empty() {
- println!("\n Top results:");
- for (i, result) in response.results.iter().take(3).enumerate() {
- println!(
- " {}. {} (score: {:.2})",
- i + 1,
- result.title,
- result.score
- );
- }
- }
-
- Ok(())
-}
-
-/// Demonstrate RetrievalOrchestrator (advanced API).
-async fn demo_orchestrator(tree: &DocumentTree) -> vectorless::Result<()> {
- // Build orchestrator with explicit stages
- let mut orchestrator = RetrievalOrchestrator::new()
- .with_max_backtracks(3)
- .with_max_iterations(5)
- .stage(AnalyzeStage::new())
- .stage(PlanStage::new())
- .stage(SearchStage::new())
- .stage(EvaluateStage::new());
-
- println!("Orchestrator stages:");
- if let Ok(names) = orchestrator.stage_names() {
- for (i, name) in names.iter().enumerate() {
- println!(" {}. {}", i + 1, name);
- }
- }
- println!();
-
- // Get execution groups (shows parallel potential)
- if let Ok(groups) = orchestrator.get_execution_groups() {
- println!("Execution groups: {} groups", groups.len());
- for (i, group) in groups.iter().enumerate() {
- let parallel = if group.parallel {
- " (can parallelize)"
- } else {
- ""
- };
- println!(
- " Group {}: {} stages{}",
- i,
- group.stage_indices.len(),
- parallel
- );
- }
- }
- println!();
-
- // Execute query
- let query = "How does the pipeline work?";
- println!("Query: \"{}\"\n", query);
-
- let options = RetrieveOptions::default();
- let tree_arc = Arc::new(tree.clone());
- let response = orchestrator
- .execute(tree_arc, query, options)
- .await
- .map_err(|e| vectorless::Error::Retrieval(e.to_string()))?;
-
- println!("Results:");
- println!(" - Is sufficient: {}", response.is_sufficient);
- println!(" - Confidence: {:.2}", response.confidence);
- println!(" - Complexity: {:?}", response.complexity);
- println!(" - Reasoning steps: {}", response.reasoning_chain.len());
-
- if !response.reasoning_chain.is_empty() {
- println!("\n Reasoning chain:");
- for (i, step) in response.reasoning_chain.steps.iter().take(5).enumerate() {
- let title = step.title.as_deref().unwrap_or("(no node)");
- println!(
- " {}. [{}] {} (score: {:.2}): {}",
- i + 1, step.stage, title, step.score, step.reasoning
- );
- }
- }
-
- Ok(())
-}
-
-/// Create a sample document tree for demonstration.
-fn create_sample_tree() -> DocumentTree {
- let mut tree = DocumentTree::new(
- "Vectorless Documentation",
- "A hierarchical document intelligence engine written in Rust.",
- );
-
- // Add sections using the correct API
- let _intro = tree.add_child(
- tree.root(),
- "Introduction",
- "Vectorless is a document intelligence engine written in Rust.",
- );
-
- let arch = tree.add_child(
- tree.root(),
- "Architecture",
- "The system consists of three main components: indexer, retriever, and storage.",
- );
-
- let index_section = tree.add_child(
- arch,
- "Index Pipeline",
- "The index pipeline processes documents into a tree structure with summaries.",
- );
- let retrieve_section = tree.add_child(
- arch,
- "Retrieval Pipeline",
- "The retrieval pipeline finds relevant content using multi-stage processing.",
- );
-
- tree.add_child(
- index_section,
- "Parse Stage",
- "Parses documents (Markdown, PDF, DOCX) into structured content.",
- );
- tree.add_child(
- index_section,
- "Build Stage",
- "Builds the document tree with metadata like page numbers and indices.",
- );
- tree.add_child(
- index_section,
- "Enrich Stage",
- "Generates AI summaries for tree nodes using LLM.",
- );
-
- tree.add_child(
- retrieve_section,
- "Analyze Stage",
- "Analyzes query complexity and extracts keywords for matching.",
- );
- tree.add_child(
- retrieve_section,
- "Plan Stage",
- "Selects retrieval strategy (keyword/semantic/LLM) and search algorithm.",
- );
- tree.add_child(
- retrieve_section,
- "Search Stage",
- "Executes tree traversal (greedy/beam/MCTS) to find relevant content.",
- );
- tree.add_child(
- retrieve_section,
- "Judge Stage",
- "Evaluates sufficiency of collected content, can trigger backtracking.",
- );
-
- let usage = tree.add_child(tree.root(), "Usage", "How to use the vectorless library.");
- tree.add_child(
- usage,
- "Basic Example",
- "Simple usage with default configuration and workspace.",
- );
- tree.add_child(
- usage,
- "Advanced Example",
- "Custom pipeline configuration with LLM and custom stages.",
- );
-
- tree
-}
diff --git a/examples/rust/session.rs b/examples/rust/session.rs
deleted file mode 100644
index d5cfd68d..00000000
--- a/examples/rust/session.rs
+++ /dev/null
@@ -1,205 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! Session-based multi-document operations example.
-//!
-//! This example demonstrates the Session API for:
-//! - Managing multiple documents in a single session
-//! - Cross-document queries
-//! - Session caching for improved performance
-//! - Session statistics
-//!
-//! # Usage
-//!
-//! ```bash
-//! cargo run --example session
-//! ```
-
-use vectorless::client::{EngineBuilder, IndexContext};
-
-#[tokio::main]
-async fn main() -> Result<(), Box> {
- println!("=== Session-Based Multi-Document Example ===\n");
-
- // 1. Create the engine
- println!("Step 1: Creating engine...");
- let engine = EngineBuilder::new()
- .with_workspace("./workspace_session_example")
- .build()
- .await
- .map_err(|e: vectorless::BuildError| vectorless::Error::Config(e.to_string()))?;
- println!(" ✓ Engine created\n");
-
- // 2. Create a session for multi-document operations
- println!("Step 2: Creating session...");
- let session = engine.session().await;
- println!(" ✓ Session ID: {}\n", session.id());
-
- // 3. Index multiple documents into the session
- println!("Step 3: Indexing documents...");
-
- // Create sample documents
- let temp_dir = tempfile::tempdir()?;
-
- let doc1_content = r#"# Architecture Guide
-
-## Overview
-
-Vectorless uses a tree-based architecture for document navigation.
-
-## Components
-
-- **Indexer**: Parses documents and builds tree structure
-- **Retriever**: Navigates tree to find relevant content
-- **Workspace**: Manages document persistence
-"#;
-
- let doc2_content = r#"# API Reference
-
-## Engine
-
-The main entry point for vectorless operations.
-
-### Methods
-
-- `index(path)`: Index a document
-- `query(doc_id, question)`: Query a document
-- `list_documents()`: List all documents
-
-## Session
-
-Multi-document operations with caching.
-
-### Methods
-
-- `index(path)`: Index into session
-- `query(doc_id, question)`: Query cached document
-- `query_all(question)`: Query across all documents
-"#;
-
- let doc3_content = r#"# Configuration Guide
-
-## Workspace Settings
-
-The workspace directory stores indexed documents.
-
-```toml
-[storage]
-workspace_dir = "./workspace"
-```
-
-## Retrieval Settings
-
-Configure retrieval behavior:
-
-```toml
-[retrieval]
-top_k = 5
-max_tokens = 4000
-```
-
-## Content Aggregator
-
-Control content aggregation:
-
-```toml
-[retrieval.content]
-enabled = true
-token_budget = 4000
-```
-"#;
-
- // Write sample documents
- let doc1_path = temp_dir.path().join("architecture.md");
- let doc2_path = temp_dir.path().join("api.md");
- let doc3_path = temp_dir.path().join("config.md");
-
- tokio::fs::write(&doc1_path, doc1_content).await?;
- tokio::fs::write(&doc2_path, doc2_content).await?;
- tokio::fs::write(&doc3_path, doc3_content).await?;
-
- // Index into session
- let doc1_id = session.index(IndexContext::from_path(&doc1_path)).await?;
- println!(" ✓ Indexed: architecture.md -> {}", &doc1_id[..8]);
-
- let doc2_id = session.index(IndexContext::from_path(&doc2_path)).await?;
- println!(" ✓ Indexed: api.md -> {}", &doc2_id[..8]);
-
- let doc3_id = session.index(IndexContext::from_path(&doc3_path)).await?;
- println!(" ✓ Indexed: config.md -> {}", &doc3_id[..8]);
- println!();
-
- // 4. List documents in session
- println!("Step 4: Session documents:");
- for doc in session.list_documents() {
- println!(" - {} ({})", doc.name, &doc.id[..8]);
- }
- println!();
-
- // 5. Query individual documents (uses cache)
- println!("Step 5: Query individual documents...");
- let query = "What methods are available?";
-
- println!(" Query: \"{}\"", query);
- let start = std::time::Instant::now();
- let result = session.query(&doc2_id, query).await?;
- let elapsed = start.elapsed();
- println!(" - Time: {:?}", elapsed);
- println!(" - Score: {:.2}", result.score);
- if !result.content.is_empty() {
- let preview: String = result.content.chars().take(100).collect();
- println!(" - Preview: {}...", preview);
- }
- println!();
-
- // 6. Query same document again (should be faster due to cache)
- println!("Step 6: Query cached document (should be faster)...");
- let start = std::time::Instant::now();
- let result = session.query(&doc2_id, "How to list documents?").await?;
- let cached_elapsed = start.elapsed();
- println!(" - Time: {:?}", cached_elapsed);
- println!(" - Score: {:.2}", result.score);
- println!();
-
- // 7. Query across all documents
- println!("Step 7: Cross-document query...");
- let query = "How to configure the workspace?";
- println!(" Query: \"{}\"", query);
-
- let results = session.query_all(query).await?;
- println!(" Found {} relevant documents:", results.len());
-
- for (i, result) in results.iter().enumerate() {
- println!(
- " {}. {} (score: {:.2})",
- i + 1,
- &result.doc_id[..8],
- result.score
- );
- }
- println!();
-
- // 8. Show session statistics
- println!("Step 8: Session statistics:");
- let stats = session.stats();
- println!(" - Documents: {}", session.list_documents().len());
- println!(" - Queries: {}", stats.query_count.get());
- println!(" - Cache hits: {}", stats.cache_hits.get());
- println!(" - Cache misses: {}", stats.cache_misses.get());
- println!(" - Cache hit rate: {:.1}%", stats.cache_hit_rate() * 100.0);
- if let Some(avg_time) = stats.avg_query_time() {
- println!(" - Avg query time: {:?}", avg_time);
- }
- println!(" - Session age: {:?}", session.age());
- println!();
-
- // 9. Cleanup
- println!("Step 9: Cleanup...");
- engine.remove(&doc1_id).await?;
- engine.remove(&doc2_id).await?;
- engine.remove(&doc3_id).await?;
- println!(" ✓ Documents removed\n");
-
- println!("=== Example Complete ===");
- Ok(())
-}
diff --git a/examples/rust/storage_backend.rs b/examples/rust/storage_backend.rs
deleted file mode 100644
index a239013c..00000000
--- a/examples/rust/storage_backend.rs
+++ /dev/null
@@ -1,140 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! Custom storage backend example.
-//!
-//! This example shows how to implement a custom StorageBackend.
-//! Useful for integrating with databases, cloud storage, etc.
-//!
-//! # Usage
-//!
-//! ```bash
-//! cargo run --example storage_backend
-//! ```
-
-use std::collections::HashMap;
-use std::sync::{Arc, RwLock};
-
-use vectorless::Result;
-use vectorless::document::DocumentTree;
-use vectorless::storage::{DocumentMeta, PersistedDocument, StorageBackend, Workspace};
-
-/// A simple in-memory backend with logging.
-///
-/// This demonstrates how to implement StorageBackend trait.
-/// In production, you might implement S3, PostgreSQL, Redis, etc.
-#[derive(Debug)]
-struct LoggingMemoryBackend {
- name: &'static str,
- data: RwLock>>,
-}
-
-impl LoggingMemoryBackend {
- fn new(name: &'static str) -> Self {
- Self {
- name,
- data: RwLock::new(HashMap::new()),
- }
- }
-}
-
-impl StorageBackend for LoggingMemoryBackend {
- fn get(&self, key: &str) -> Result