From b54692c200f2ec80969f039fedd94d8b9598cffe Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Thu, 16 Apr 2026 10:46:18 +0800 Subject: [PATCH 01/21] docs: add pull request template Add a standard pull request template to improve contribution workflow. The template includes sections for summary, changes, checklist, and notes to guide contributors in creating well-structured pull requests. --- .github/PULL_REQUEST_TEMPLATE.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 .github/PULL_REQUEST_TEMPLATE.md diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 00000000..2337d0f0 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,21 @@ +## Summary + + + +## Changes + + + +- + +## Checklist + +- [ ] Code compiles (`cargo build`) +- [ ] Tests pass (`cargo test --lib --all-features`) +- [ ] No new clippy warnings (`cargo clippy --all-features`) +- [ ] Public APIs have documentation comments +- [ ] Python bindings updated (if Rust API changed) + +## Notes + + From d953224d9ed9e208e15e237efdb0d250ee205c20 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Thu, 16 Apr 2026 10:54:55 +0800 Subject: [PATCH 02/21] docs(lib): update crate description to reflect LLM-guided approach Update the main library documentation to better describe the document engine as LLM-guided from indexing to querying, emphasizing the hierarchical semantic tree approach without vector databases or similarity search. --- rust/src/lib.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 7541d900..3b06cb31 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -4,11 +4,9 @@ //! # Vectorless //! -//! An ultra-performant reasoning-native document intelligence engine for AI. -//! -//! It transforms documents into rich semantic trees and uses LLMs to -//! intelligently traverse the hierarchy — retrieving the most relevant content -//! through structural reasoning and deep contextual understanding. +//! A document engine for AI. It transforms documents into hierarchical semantic +//! trees and uses the LLM itself to navigate and retrieve — purely LLM-guided, +//! from indexing to querying. No vector databases, no embeddings, no similarity search. //! //! ## Quick Start //! From ecedd8810aae2842dc62f5a345a7fa0baab2200b Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Thu, 16 Apr 2026 11:11:45 +0800 Subject: [PATCH 03/21] feat(rust): add rustfmt config and improve client builder - Add .rustfmt.toml with proper formatting configuration including import granularity, grouping, and max width settings - Simplify imports in client/builder.rs using unified import syntax - Update EngineBuilder documentation with clearer examples - Make endpoint requirement explicit in builder documentation - Remove unused ConfigLoader import from client builder --- .rustfmt.toml | 25 ++++++++++ rust/src/client/builder.rs | 100 ++++++++----------------------------- 2 files changed, 45 insertions(+), 80 deletions(-) create mode 100644 .rustfmt.toml diff --git a/.rustfmt.toml b/.rustfmt.toml new file mode 100644 index 00000000..c39ea46c --- /dev/null +++ b/.rustfmt.toml @@ -0,0 +1,25 @@ +# .rustfmt.toml + +# Merge imports from the same crate +imports_granularity = "Crate" + +# Group imports: stdlib / external crates / current crate +group_imports = "StdExternalCrate" + +# Try horizontal layout first, fall back to vertical if too long +imports_layout = "HorizontalVertical" + +# Sort import items +reorder_imports = true + +# Use the new edition formatting style +edition = "2021" + +# Maximum line width +max_width = 100 + +# Indent style for imports (keep default) +imports_indent = "Block" + +# Maximum number of blank lines (keep default) +blank_lines_upper_bound = 1 \ No newline at end of file diff --git a/rust/src/client/builder.rs b/rust/src/client/builder.rs index 7fea2913..e2041644 100644 --- a/rust/src/client/builder.rs +++ b/rust/src/client/builder.rs @@ -5,83 +5,38 @@ //! //! This module provides [`EngineBuilder`] for configuring and building //! [`Engine`] instances with sensible defaults. -//! -//! # Configuration -//! -//! `api_key` and `model` are **required**. `endpoint` is optional -//! (defaults to the model provider's standard endpoint). -//! -//! Configuration sources (later overrides earlier): -//! 1. Default configuration -//! 2. Config file (via `with_config_path`) -//! 3. Builder methods (`with_key`, `with_model`, etc.) — highest priority -//! -//! # Examples -//! -//! ```rust,no_run -//! use vectorless::client::EngineBuilder; -//! -//! # #[tokio::main] -//! # async fn main() -> Result<(), vectorless::BuildError> { -//! let engine = EngineBuilder::new() -//! .with_key("sk-...") -//! .with_model("gpt-4o") -//! .build() -//! .await?; -//! # Ok(()) -//! # } -//! ``` -//! -//! ## With Custom Endpoint -//! -//! ```rust,no_run -//! use vectorless::client::EngineBuilder; -//! -//! # #[tokio::main] -//! # async fn main() -> Result<(), vectorless::BuildError> { -//! let engine = EngineBuilder::new() -//! .with_key("sk-...") -//! .with_model("deepseek-chat") -//! .with_endpoint("https://api.deepseek.com/v1") -//! .build() -//! .await?; -//! # Ok(()) -//! # } -//! ``` - -use crate::config::{Config, ConfigLoader, RetrievalConfig}; -use crate::memo::MemoStore; -use crate::retrieval::PipelineRetriever; -use crate::storage::Workspace; - -use super::engine::Engine; -use crate::events::EventEmitter; + +use crate::{ + config::{Config, RetrievalConfig}, + events::EventEmitter, + memo::MemoStore, + retrieval::PipelineRetriever, + storage::Workspace, + client::engine::Engine, +}; /// Builder for creating a [`Engine`] client. /// -/// `api_key` and `model` are required and must be set via builder methods -/// or provided through a config file. +/// `api_key`, `model` and `endpoint` are **required**. /// /// # Example /// /// ```rust,no_run /// use vectorless::client::EngineBuilder; /// -/// # #[tokio::main] -/// # async fn main() -> Result<(), vectorless::BuildError> { -/// let client = EngineBuilder::new() -/// .with_key("sk-...") -/// .with_model("gpt-4o") -/// .build() -/// .await?; -/// # Ok(()) -/// # } +/// #[tokio::main] +/// async fn main() -> Result<(), vectorless::BuildError> { +/// let client = EngineBuilder::new() +/// .with_key("sk-...") +/// .with_model("gpt-4o") +/// .with_endpoint("https://api.xxx.com/v1") +/// .build() +/// .await?; +/// Ok(()) +/// } /// ``` #[derive(Debug)] pub struct EngineBuilder { - /// Configuration file path. - config_path: Option, - /// Custom configuration. config: Option, @@ -118,7 +73,6 @@ impl EngineBuilder { #[must_use] pub fn new() -> Self { Self { - config_path: None, config: None, retrieval_config: None, events: None, @@ -136,15 +90,6 @@ impl EngineBuilder { // Basic Configuration // ============================================================ - /// Set the configuration file path. - /// - /// The file must be a valid TOML configuration. No auto-detection is performed. - #[must_use] - pub fn with_config_path(mut self, path: impl Into) -> Self { - self.config_path = Some(path.into()); - self - } - /// Set a custom configuration object. /// /// This overrides any config file settings. @@ -350,11 +295,6 @@ impl EngineBuilder { // Load or create configuration let mut config = if let Some(config) = self.config { config - } else if let Some(path) = self.config_path { - ConfigLoader::new() - .file(&path) - .load() - .map_err(|e| BuildError::Config(e.to_string()))? } else { Config::default() }; From 4b95661e024316324ed98458885d06e4890bf733 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Thu, 16 Apr 2026 11:16:59 +0800 Subject: [PATCH 04/21] refactor(rust): remove unused config field from EngineBuilder - Remove the `config` field from EngineBuilder struct as it was unused - Remove the `with_config` method since it's no longer needed - Simplify the build process by directly using Config::default() - Remove Config-related error variant from BuildError enum - Update all related code to work without custom config field --- rust/src/client/builder.rs | 25 ++----------------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/rust/src/client/builder.rs b/rust/src/client/builder.rs index e2041644..22a55554 100644 --- a/rust/src/client/builder.rs +++ b/rust/src/client/builder.rs @@ -37,9 +37,6 @@ use crate::{ /// ``` #[derive(Debug)] pub struct EngineBuilder { - /// Custom configuration. - config: Option, - /// Custom retrieval config. retrieval_config: Option, @@ -73,7 +70,6 @@ impl EngineBuilder { #[must_use] pub fn new() -> Self { Self { - config: None, retrieval_config: None, events: None, api_key: None, @@ -90,15 +86,6 @@ impl EngineBuilder { // Basic Configuration // ============================================================ - /// Set a custom configuration object. - /// - /// This overrides any config file settings. - #[must_use] - pub fn with_config(mut self, config: Config) -> Self { - self.config = Some(config); - self - } - /// Set custom retrieval configuration. #[must_use] pub fn with_retrieval_config(mut self, config: RetrievalConfig) -> Self { @@ -292,12 +279,8 @@ impl EngineBuilder { /// # } /// ``` pub async fn build(self) -> Result { - // Load or create configuration - let mut config = if let Some(config) = self.config { - config - } else { - Config::default() - }; + // Load default configuration + let mut config = Config::default(); // Apply builder overrides to retrieval config if let Some(retrieval_config) = self.retrieval_config { @@ -422,10 +405,6 @@ impl Default for EngineBuilder { /// Error during client build. #[derive(Debug, thiserror::Error)] pub enum BuildError { - /// Configuration error. - #[error("Configuration error: {0}")] - Config(String), - /// Workspace error. #[error("Workspace error: {0}")] Workspace(String), From 8d5f97673dc1924786743edb5a6f90318b64b1c3 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Thu, 16 Apr 2026 11:19:52 +0800 Subject: [PATCH 05/21] refactor(rust): remove unused retrieval config from EngineBuilder - Remove unused RetrievalConfig import and field from EngineBuilder - Delete with_retrieval_config method that was no longer needed - Clean up related configuration application logic in build method --- rust/src/client/builder.rs | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/rust/src/client/builder.rs b/rust/src/client/builder.rs index 22a55554..8ba50086 100644 --- a/rust/src/client/builder.rs +++ b/rust/src/client/builder.rs @@ -7,7 +7,7 @@ //! [`Engine`] instances with sensible defaults. use crate::{ - config::{Config, RetrievalConfig}, + config::Config, events::EventEmitter, memo::MemoStore, retrieval::PipelineRetriever, @@ -37,9 +37,6 @@ use crate::{ /// ``` #[derive(Debug)] pub struct EngineBuilder { - /// Custom retrieval config. - retrieval_config: Option, - /// Event emitter. events: Option, @@ -70,7 +67,6 @@ impl EngineBuilder { #[must_use] pub fn new() -> Self { Self { - retrieval_config: None, events: None, api_key: None, model: None, @@ -86,13 +82,6 @@ impl EngineBuilder { // Basic Configuration // ============================================================ - /// Set custom retrieval configuration. - #[must_use] - pub fn with_retrieval_config(mut self, config: RetrievalConfig) -> Self { - self.retrieval_config = Some(config); - self - } - /// Set the event emitter for callbacks. #[must_use] pub fn with_events(mut self, events: EventEmitter) -> Self { @@ -282,11 +271,6 @@ impl EngineBuilder { // Load default configuration let mut config = Config::default(); - // Apply builder overrides to retrieval config - if let Some(retrieval_config) = self.retrieval_config { - config.retrieval = retrieval_config; - } - // Apply individual overrides to LlmPoolConfig (primary) + legacy config (compat) if let Some(api_key) = self.api_key { config.llm.api_key = Some(api_key.clone()); From bca84f1a782669d5fba546e3a13cf9a30ae1900f Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Thu, 16 Apr 2026 11:31:33 +0800 Subject: [PATCH 06/21] refactor(rust): remove unused imports and simplify builder configuration - Remove unused imports including MemoStore, reorder imports following crate granularity rules - Remove top_k, fast_mode, precise_mode, and memo_store fields from EngineBuilder as they are no longer needed - Remove related methods with_top_k, fast, precise, and with_memo_store - Clean up build logic to remove configuration of removed features - Delete associated test cases for removed functionality - Remove rustfmt.toml file and advanced example that are no longer used --- .rustfmt.toml | 25 ------- rust/examples/advanced.rs | 78 -------------------- rust/src/client/builder.rs | 147 +------------------------------------ 3 files changed, 1 insertion(+), 249 deletions(-) delete mode 100644 .rustfmt.toml delete mode 100644 rust/examples/advanced.rs diff --git a/.rustfmt.toml b/.rustfmt.toml deleted file mode 100644 index c39ea46c..00000000 --- a/.rustfmt.toml +++ /dev/null @@ -1,25 +0,0 @@ -# .rustfmt.toml - -# Merge imports from the same crate -imports_granularity = "Crate" - -# Group imports: stdlib / external crates / current crate -group_imports = "StdExternalCrate" - -# Try horizontal layout first, fall back to vertical if too long -imports_layout = "HorizontalVertical" - -# Sort import items -reorder_imports = true - -# Use the new edition formatting style -edition = "2021" - -# Maximum line width -max_width = 100 - -# Indent style for imports (keep default) -imports_indent = "Block" - -# Maximum number of blank lines (keep default) -blank_lines_upper_bound = 1 \ No newline at end of file diff --git a/rust/examples/advanced.rs b/rust/examples/advanced.rs deleted file mode 100644 index fa14e931..00000000 --- a/rust/examples/advanced.rs +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright (c) 2026 vectorless developers -// SPDX-License-Identifier: Apache-2.0 - -//! Advanced usage example - Full Configuration. -//! -//! This example demonstrates how to use a configuration file -//! for advanced use cases where you need fine-grained control. -//! -//! # Usage -//! -//! ```bash -//! # Using environment variables for LLM config (overrides config file): -//! LLM_API_KEY=sk-xxx LLM_MODEL=gpt-4o cargo run --example advanced -//! -//! # Or with defaults (using config file): -//! cargo run --example advanced -//! ``` - -use vectorless::{EngineBuilder, IndexContext, QueryContext}; - -#[tokio::main] -async fn main() -> vectorless::Result<()> { - // Initialize tracing for debug output (set RUST_LOG=debug to see more) - tracing_subscriber::fmt::init(); - - println!("=== Vectorless Advanced Example (Config File) ===\n"); - - // Load all settings from the specified config file. - // The config file must include api_key and model. - // If environment variables are set, they override the config file values. - let mut builder = EngineBuilder::new().with_config_path("./config.toml"); - - // Override config with env vars if present - if let Ok(api_key) = std::env::var("LLM_API_KEY") { - builder = builder.with_key(&api_key); - } - if let Ok(model) = std::env::var("LLM_MODEL") { - builder = builder.with_model(&model); - } - if let Ok(endpoint) = std::env::var("LLM_ENDPOINT") { - builder = builder.with_endpoint(&endpoint); - } - - let client = builder - .build() - .await - .map_err(|e: vectorless::BuildError| vectorless::Error::Config(e.to_string()))?; - - println!("Client created with config file\n"); - - // Index a document - let result = client.index(IndexContext::from_path("./README.md")).await?; - let doc_id = result.doc_id().unwrap().to_string(); - println!("Indexed: {}\n", doc_id); - - // Query - let result = client - .query( - QueryContext::new("What features does Vectorless provide?") - .with_doc_ids(vec![doc_id.clone()]), - ) - .await?; - println!("Query: What features does Vectorless provide?"); - if let Some(item) = result.single() { - println!("Score: {:.2}", item.score); - if !item.content.is_empty() { - let preview: String = item.content.chars().take(200).collect(); - println!("Result: {}...\n", preview); - } - } - - // Cleanup - client.remove(&doc_id).await?; - println!("Cleaned up"); - - println!("\n=== Done ==="); - Ok(()) -} diff --git a/rust/src/client/builder.rs b/rust/src/client/builder.rs index 8ba50086..667071bb 100644 --- a/rust/src/client/builder.rs +++ b/rust/src/client/builder.rs @@ -7,12 +7,8 @@ //! [`Engine`] instances with sensible defaults. use crate::{ - config::Config, - events::EventEmitter, - memo::MemoStore, - retrieval::PipelineRetriever, + client::engine::Engine, config::Config, events::EventEmitter, retrieval::PipelineRetriever, storage::Workspace, - client::engine::Engine, }; /// Builder for creating a [`Engine`] client. @@ -48,20 +44,7 @@ pub struct EngineBuilder { /// LLM endpoint URL (override). endpoint: Option, - - /// Top-K for retrieval (override). - top_k: Option, - - /// Fast mode flag. - fast_mode: bool, - - /// Precise mode flag. - precise_mode: bool, - - /// Memo store for caching LLM decisions. - memo_store: Option, } - impl EngineBuilder { /// Create a new builder with defaults. #[must_use] @@ -71,10 +54,6 @@ impl EngineBuilder { api_key: None, model: None, endpoint: None, - top_k: None, - fast_mode: false, - precise_mode: false, - memo_store: None, } } @@ -89,40 +68,6 @@ impl EngineBuilder { self } - /// Set a memo store for caching LLM decisions. - /// - /// When enabled, the pilot will cache navigation decisions based on - /// context fingerprints, avoiding redundant API calls for similar - /// navigation scenarios. - /// - /// # Example - /// - /// ```rust,no_run - /// use vectorless::client::EngineBuilder; - /// use vectorless::memo::MemoStore; - /// use chrono::Duration; - /// - /// # #[tokio::main] - /// # async fn main() -> Result<(), vectorless::BuildError> { - /// let memo_store = MemoStore::new() - /// .with_ttl(Duration::days(7)) - /// .with_model("gpt-4o"); - /// - /// let engine = EngineBuilder::new() - /// .with_key("sk-...") - /// .with_model("gpt-4o") - /// .with_memo_store(memo_store) - /// .build() - /// .await?; - /// # Ok(()) - /// # } - /// ``` - #[must_use] - pub fn with_memo_store(mut self, store: MemoStore) -> Self { - self.memo_store = Some(store); - self - } - // ============================================================ // LLM Configuration // ============================================================ @@ -202,45 +147,6 @@ impl EngineBuilder { // Retrieval Configuration // ============================================================ - /// Set the number of results to return from queries. - /// - /// Default is 5. Higher values return more context but cost more tokens. - #[must_use] - pub fn with_top_k(mut self, k: usize) -> Self { - self.top_k = Some(k); - self - } - - // ============================================================ - // Preset Configurations - // ============================================================ - - /// Enable fast mode for quicker but less thorough retrieval. - /// - /// Fast mode uses: - /// - Keyword-based retrieval (no LLM calls) - /// - Lower beam width / MCTS simulations - /// - Lazy summary generation - #[must_use] - pub fn fast(mut self) -> Self { - self.fast_mode = true; - self.precise_mode = false; - self - } - - /// Enable precise mode for higher quality retrieval. - /// - /// Precise mode uses: - /// - MCTS-based retrieval - /// - Higher simulation count - /// - Full summary generation - #[must_use] - pub fn precise(mut self) -> Self { - self.precise_mode = true; - self.fast_mode = false; - self - } - /// Build the Engine client. /// /// `api_key` and `model` must be provided via builder methods or config file. @@ -299,18 +205,6 @@ impl EngineBuilder { config.retrieval.endpoint = endpoint.clone(); config.summary.endpoint = endpoint; } - if let Some(top_k) = self.top_k { - config.retrieval.top_k = top_k; - } - - // Apply preset modes - if self.fast_mode { - config.retrieval.search.max_iterations = 5; - } - if self.precise_mode { - config.retrieval.search.max_iterations = 100; - } - // Validate required settings let resolved_key = config .llm @@ -363,15 +257,6 @@ impl EngineBuilder { retriever.with_content_config(retrieval_config.content.to_aggregator_config()); } - // Add memo store if provided or create default - if let Some(memo_store) = self.memo_store { - retriever = retriever.with_memo_store(memo_store); - } else { - // Create default memo store with model from config - let memo_store = MemoStore::new().with_model(retrieval_model).with_version(1); - retriever = retriever.with_memo_store(memo_store); - } - // Build engine let events = self.events.unwrap_or_default(); Engine::with_components(config, workspace, retriever, indexer, events) @@ -410,13 +295,6 @@ pub enum BuildError { mod tests { use super::*; - #[test] - fn test_builder_defaults() { - let builder = EngineBuilder::new(); - assert!(!builder.fast_mode); - assert!(!builder.precise_mode); - } - #[test] fn test_builder_with_key() { let builder = EngineBuilder::new().with_key("sk-test-key"); @@ -440,27 +318,4 @@ mod tests { assert_eq!(builder.model, Some("gpt-4o-mini".to_string())); assert_eq!(builder.api_key, Some("sk-test".to_string())); } - - #[test] - fn test_builder_fast_mode() { - let builder = EngineBuilder::new().fast(); - - assert!(builder.fast_mode); - assert!(!builder.precise_mode); - } - - #[test] - fn test_builder_precise_mode() { - let builder = EngineBuilder::new().precise(); - - assert!(builder.precise_mode); - assert!(!builder.fast_mode); - } - - #[test] - fn test_builder_top_k() { - let builder = EngineBuilder::new().with_top_k(10); - - assert_eq!(builder.top_k, Some(10)); - } } From 29f3cec075456b5c83b43371b052d8149ed400f4 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Thu, 16 Apr 2026 11:41:16 +0800 Subject: [PATCH 07/21] feat(rust): add custom config option to EngineBuilder - Add optional Config field to EngineBuilder struct for advanced tuning - Implement with_config method to set custom configuration - Use provided config or default when building engine - Export Config struct from main library module The new configuration option allows users to provide custom settings for advanced parameter tuning while maintaining backward compatibility through builder methods that can still override individual fields. --- rust/src/client/builder.rs | 20 ++++++++++++++++++-- rust/src/lib.rs | 1 + 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/rust/src/client/builder.rs b/rust/src/client/builder.rs index 667071bb..8dee7c37 100644 --- a/rust/src/client/builder.rs +++ b/rust/src/client/builder.rs @@ -33,6 +33,9 @@ use crate::{ /// ``` #[derive(Debug)] pub struct EngineBuilder { + /// Custom configuration for advanced tuning. + config: Option, + /// Event emitter. events: Option, @@ -45,11 +48,13 @@ pub struct EngineBuilder { /// LLM endpoint URL (override). endpoint: Option, } + impl EngineBuilder { /// Create a new builder with defaults. #[must_use] pub fn new() -> Self { Self { + config: None, events: None, api_key: None, model: None, @@ -61,6 +66,17 @@ impl EngineBuilder { // Basic Configuration // ============================================================ + /// Set a custom configuration for advanced tuning of internal parameters. + /// + /// When provided, this replaces the default [`Config`]. Builder methods + /// (`with_key`, `with_model`, `with_endpoint`) still override the + /// corresponding fields. + #[must_use] + pub fn with_config(mut self, config: Config) -> Self { + self.config = Some(config); + self + } + /// Set the event emitter for callbacks. #[must_use] pub fn with_events(mut self, events: EventEmitter) -> Self { @@ -174,8 +190,8 @@ impl EngineBuilder { /// # } /// ``` pub async fn build(self) -> Result { - // Load default configuration - let mut config = Config::default(); + // Load user-provided or default configuration + let mut config = self.config.unwrap_or_default(); // Apply individual overrides to LlmPoolConfig (primary) + legacy config (compat) if let Some(api_key) = self.api_key { diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 3b06cb31..a361b557 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -35,6 +35,7 @@ pub mod client; mod config; +pub use config::Config; pub mod document; pub mod error; pub mod events; From 4877e8848935a63881144be94f381ac4fa063b76 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Thu, 16 Apr 2026 12:40:15 +0800 Subject: [PATCH 08/21] feat(python): add Python bindings for vectorless engine Add comprehensive Python wrapper classes for the vectorless engine, including Config, IndexContext, QueryContext, DocumentInfo, Engine, and DocumentGraph with full async support and proper error handling. The Python bindings provide complete access to the core functionality: - Engine class with async index/query operations - Config for advanced engine tuning - Context classes for indexing and querying - Document graph for cross-document relationships - Proper error handling with VectorlessError - Full async/await support using pyo3-async-runtimes --- python/src/config.rs | 86 ++ python/src/context.rs | 290 +++++++ python/src/document.rs | 59 ++ python/src/engine.rs | 242 ++++++ python/src/error.rs | 71 ++ python/src/graph.rs | 212 +++++ python/src/lib.rs | 1575 +----------------------------------- python/src/metrics.rs | 376 +++++++++ python/src/results.rs | 351 ++++++++ rust/src/client/builder.rs | 8 +- rust/src/config/mod.rs | 4 +- rust/src/lib.rs | 2 +- 12 files changed, 1712 insertions(+), 1564 deletions(-) create mode 100644 python/src/config.rs create mode 100644 python/src/context.rs create mode 100644 python/src/document.rs create mode 100644 python/src/engine.rs create mode 100644 python/src/error.rs create mode 100644 python/src/graph.rs create mode 100644 python/src/metrics.rs create mode 100644 python/src/results.rs diff --git a/python/src/config.rs b/python/src/config.rs new file mode 100644 index 00000000..93a0552e --- /dev/null +++ b/python/src/config.rs @@ -0,0 +1,86 @@ +// Copyright (c) 2026 vectorless developers +// SPDX-License-Identifier: Apache-2.0 + +//! Config Python wrapper. + +use pyo3::prelude::*; + +/// Advanced configuration for Engine internals. +/// +/// Create a Config to customize storage, retrieval, concurrency, +/// and other engine parameters beyond the basic builder API. +/// +/// Example: +/// +/// ```python +/// from vectorless import Config, Engine +/// +/// config = Config() +/// config.set_workspace_dir("/data/vectorless") +/// config.set_top_k(10) +/// config.set_max_concurrent_requests(20) +/// +/// engine = Engine(api_key="sk-...", model="gpt-4o", config=config) +/// ``` +#[pyclass(name = "Config")] +pub struct PyConfig { + pub(crate) inner: vectorless::Config, +} + +#[pymethods] +impl PyConfig { + /// Create a new Config with defaults. + #[new] + fn new() -> Self { + Self { + inner: vectorless::Config::default(), + } + } + + /// Set the workspace directory for persisted documents. + /// + /// Default: ~/.vectorless + fn set_workspace_dir(&mut self, dir: &str) { + self.inner.storage.workspace_dir = std::path::PathBuf::from(dir); + } + + /// Set the number of top-k results to return from queries. + /// + /// Default: 3 + fn set_top_k(&mut self, k: usize) { + self.inner.retrieval.top_k = k; + } + + /// Set the maximum concurrent LLM API calls. + /// + /// Default: 10 + fn set_max_concurrent_requests(&mut self, max: usize) { + self.inner.concurrency.max_concurrent_requests = max; + } + + /// Set the rate limit (requests per minute). + /// + /// Default: 500 + fn set_requests_per_minute(&mut self, rpm: usize) { + self.inner.concurrency.requests_per_minute = rpm; + } + + /// Set the maximum iterations for retrieval search. + fn set_max_iterations(&mut self, max: usize) { + self.inner.retrieval.search.max_iterations = max; + } + + /// Set the retrieval temperature. + /// + /// Default: 0.0 + fn set_temperature(&mut self, temp: f32) { + self.inner.retrieval.temperature = temp; + } + + /// Enable or disable metrics collection. + /// + /// Default: True + fn set_metrics_enabled(&mut self, enabled: bool) { + self.inner.metrics.enabled = enabled; + } +} diff --git a/python/src/context.rs b/python/src/context.rs new file mode 100644 index 00000000..4b005033 --- /dev/null +++ b/python/src/context.rs @@ -0,0 +1,290 @@ +// Copyright (c) 2026 vectorless developers +// SPDX-License-Identifier: Apache-2.0 + +//! IndexContext, QueryContext, and IndexOptions Python wrappers. + +use pyo3::prelude::*; + +use ::vectorless::client::{ + DocumentFormat, IndexContext, IndexMode, IndexOptions, QueryContext, +}; + +use super::error::VectorlessError; + +/// Parse format string to DocumentFormat. +fn parse_format(format: &str) -> PyResult { + match format.to_lowercase().as_str() { + "markdown" | "md" => Ok(DocumentFormat::Markdown), + "pdf" => Ok(DocumentFormat::Pdf), + _ => Err(PyErr::from(VectorlessError::new( + format!("Unknown format: {}. Supported: markdown, pdf", format), + "config", + ))), + } +} + +// ============================================================ +// IndexOptions +// ============================================================ + +/// Options for controlling indexing behavior. +/// +/// Args: +/// mode: Indexing mode - "default", "force", or "incremental". +/// generate_summaries: Whether to generate summaries. Default: True. +/// generate_description: Whether to generate document description. Default: False. +/// include_text: Whether to include node text in the tree. Default: True. +/// generate_ids: Whether to generate node IDs. Default: True. +/// enable_synonym_expansion: Whether to expand keywords with LLM-generated +/// synonyms during indexing. Improves recall for differently-worded queries. +/// Default: False. +#[pyclass(name = "IndexOptions", skip_from_py_object)] +#[derive(Clone)] +pub struct PyIndexOptions { + pub(crate) inner: IndexOptions, +} + +#[pymethods] +impl PyIndexOptions { + #[new] + #[pyo3(signature = (mode="default", generate_summaries=true, generate_description=false, include_text=true, generate_ids=true, enable_synonym_expansion=false))] + fn new( + mode: &str, + generate_summaries: bool, + generate_description: bool, + include_text: bool, + generate_ids: bool, + enable_synonym_expansion: bool, + ) -> PyResult { + let mut opts = IndexOptions::new(); + match mode { + "default" => {} + "force" => opts = opts.with_mode(IndexMode::Force), + "incremental" => opts = opts.with_mode(IndexMode::Incremental), + _ => { + return Err(PyErr::from(VectorlessError::new( + format!( + "Unknown mode: {}. Supported: default, force, incremental", + mode + ), + "config", + ))); + } + } + opts.generate_summaries = generate_summaries; + opts.generate_description = generate_description; + opts.include_text = include_text; + opts.generate_ids = generate_ids; + opts.enable_synonym_expansion = enable_synonym_expansion; + Ok(Self { inner: opts }) + } + + fn __repr__(&self) -> String { + format!( + "IndexOptions(mode='{}', generate_summaries={}, generate_description={}, include_text={}, generate_ids={}, enable_synonym_expansion={})", + match self.inner.mode { + IndexMode::Default => "default", + IndexMode::Force => "force", + IndexMode::Incremental => "incremental", + }, + self.inner.generate_summaries, + self.inner.generate_description, + self.inner.include_text, + self.inner.generate_ids, + self.inner.enable_synonym_expansion, + ) + } +} + +// ============================================================ +// IndexContext +// ============================================================ + +/// Context for indexing a document. +/// +/// Create using the static methods: +/// +/// ```python +/// from vectorless import IndexContext +/// +/// # Single file +/// ctx = IndexContext.from_path("./document.pdf") +/// +/// # Multiple files +/// ctx = IndexContext.from_paths(["./a.pdf", "./b.md"]) +/// +/// # Directory +/// ctx = IndexContext.from_dir("./docs/") +/// +/// # From text +/// ctx = IndexContext.from_content("# Title\\nContent...", "markdown").with_name("doc") +/// +/// # From bytes +/// ctx = IndexContext.from_bytes(data, "pdf").with_name("doc") +/// ``` +#[pyclass(name = "IndexContext")] +pub struct PyIndexContext { + pub(crate) inner: IndexContext, +} + +#[pymethods] +impl PyIndexContext { + /// Create an IndexContext from a single file path. + #[staticmethod] + fn from_path(path: String) -> Self { + Self { + inner: IndexContext::from_path(&path), + } + } + + /// Create an IndexContext from multiple file paths. + #[staticmethod] + fn from_paths(paths: Vec) -> Self { + Self { + inner: IndexContext::from_paths(&paths), + } + } + + /// Create an IndexContext from all supported files in a directory. + /// + /// Args: + /// path: Directory path to scan. + /// recursive: If True, scan subdirectories recursively. Default: False. + #[staticmethod] + #[pyo3(signature = (path, recursive=false))] + fn from_dir(path: String, recursive: bool) -> Self { + let inner = IndexContext::from_dir(&path, recursive); + Self { inner } + } + + /// Create an IndexContext from text content. + #[staticmethod] + #[pyo3(signature = (content, format="markdown"))] + fn from_content(content: String, format: &str) -> PyResult { + let doc_format = parse_format(format)?; + let ctx = IndexContext::from_content(&content, doc_format); + Ok(Self { inner: ctx }) + } + + /// Create an IndexContext from binary data. + #[staticmethod] + fn from_bytes(data: Vec, format: &str) -> PyResult { + let doc_format = parse_format(format)?; + let ctx = IndexContext::from_bytes(data, doc_format); + Ok(Self { inner: ctx }) + } + + /// Set the document name (single-source only). + fn with_name(&self, name: String) -> Self { + let ctx = self.inner.clone().with_name(&name); + Self { inner: ctx } + } + + /// Apply indexing options. + fn with_options(&self, options: &PyIndexOptions) -> Self { + let ctx = self.inner.clone().with_options(options.inner.clone()); + Self { inner: ctx } + } + + /// Set indexing mode. + fn with_mode(&self, mode: &str) -> PyResult { + let m = match mode { + "default" => IndexMode::Default, + "force" => IndexMode::Force, + "incremental" => IndexMode::Incremental, + _ => { + return Err(PyErr::from(VectorlessError::new( + format!( + "Unknown mode: {}. Supported: default, force, incremental", + mode + ), + "config", + ))); + } + }; + let ctx = self.inner.clone().with_mode(m); + Ok(Self { inner: ctx }) + } + + /// Number of document sources. + fn __len__(&self) -> usize { + self.inner.len() + } + + /// Whether no sources are present. + fn is_empty(&self) -> bool { + self.inner.is_empty() + } + + fn __repr__(&self) -> String { + format!("IndexContext(sources={})", self.inner.len()) + } +} + +// ============================================================ +// QueryContext +// ============================================================ + +/// Context for a query operation. +/// +/// ```python +/// from vectorless import QueryContext +/// +/// # Query specific documents +/// ctx = QueryContext("What is the total revenue?").with_doc_ids([doc_id]) +/// +/// # Query multiple documents +/// ctx = QueryContext("What is the architecture?").with_doc_ids(["doc-1", "doc-2"]) +/// +/// # Query entire workspace +/// ctx = QueryContext("Explain the algorithm") +/// ``` +#[pyclass(name = "QueryContext")] +pub struct PyQueryContext { + pub(crate) inner: QueryContext, +} + +#[pymethods] +impl PyQueryContext { + /// Create a new query context (defaults to workspace scope). + #[new] + fn new(query: String) -> Self { + Self { + inner: QueryContext::new(&query), + } + } + + /// Set scope to specific documents. + fn with_doc_ids(&self, doc_ids: Vec) -> Self { + let ctx = self.inner.clone().with_doc_ids(doc_ids); + Self { inner: ctx } + } + + /// Set scope to entire workspace. + fn with_workspace(&self) -> Self { + let ctx = self.inner.clone().with_workspace(); + Self { inner: ctx } + } + + /// Set the maximum tokens for the result content. + fn with_max_tokens(&self, tokens: usize) -> Self { + let ctx = self.inner.clone().with_max_tokens(tokens); + Self { inner: ctx } + } + + /// Set whether to include the reasoning chain. + fn with_include_reasoning(&self, include: bool) -> Self { + let ctx = self.inner.clone().with_include_reasoning(include); + Self { inner: ctx } + } + + /// Set the maximum tree traversal depth. + fn with_depth_limit(&self, depth: usize) -> Self { + let ctx = self.inner.clone().with_depth_limit(depth); + Self { inner: ctx } + } + + fn __repr__(&self) -> String { + "QueryContext(...)".to_string() + } +} diff --git a/python/src/document.rs b/python/src/document.rs new file mode 100644 index 00000000..eee70c0e --- /dev/null +++ b/python/src/document.rs @@ -0,0 +1,59 @@ +// Copyright (c) 2026 vectorless developers +// SPDX-License-Identifier: Apache-2.0 + +//! DocumentInfo Python wrapper. + +use pyo3::prelude::*; + +use ::vectorless::client::DocumentInfo; + +/// Information about an indexed document. +#[pyclass(name = "DocumentInfo")] +pub struct PyDocumentInfo { + pub(crate) inner: DocumentInfo, +} + +#[pymethods] +impl PyDocumentInfo { + #[getter] + fn id(&self) -> &str { + &self.inner.id + } + + #[getter] + fn name(&self) -> &str { + &self.inner.name + } + + #[getter] + fn format(&self) -> &str { + &self.inner.format + } + + #[getter] + fn description(&self) -> Option<&str> { + self.inner.description.as_deref() + } + + #[getter] + fn source_path(&self) -> Option<&str> { + self.inner.source_path.as_deref() + } + + #[getter] + fn page_count(&self) -> Option { + self.inner.page_count + } + + #[getter] + fn line_count(&self) -> Option { + self.inner.line_count + } + + fn __repr__(&self) -> String { + format!( + "DocumentInfo(id='{}', name='{}', format='{}')", + self.inner.id, self.inner.name, self.inner.format + ) + } +} diff --git a/python/src/engine.rs b/python/src/engine.rs new file mode 100644 index 00000000..8f7dc015 --- /dev/null +++ b/python/src/engine.rs @@ -0,0 +1,242 @@ +// Copyright (c) 2026 vectorless developers +// SPDX-License-Identifier: Apache-2.0 + +//! Engine Python wrapper and async helpers. + +use pyo3::prelude::*; +use pyo3_async_runtimes::tokio::future_into_py; +use std::sync::Arc; +use tokio::runtime::Runtime; + +use ::vectorless::client::{Engine, EngineBuilder, IndexContext, QueryContext}; + +use super::config::PyConfig; +use super::context::{PyIndexContext, PyQueryContext}; +use super::document::PyDocumentInfo; +use super::error::VectorlessError; +use super::error::to_py_err; +use super::graph::PyDocumentGraph; +use super::metrics::PyMetricsReport; +use super::results::{PyIndexResult, PyQueryResult}; + +// ============================================================ +// Engine async helpers (named functions to avoid FnOnce HRTB issue) +// ============================================================ + +async fn run_index(engine: Arc, ctx: IndexContext) -> PyResult { + let result = engine.index(ctx).await.map_err(to_py_err)?; + Ok(PyIndexResult { inner: result }) +} + +async fn run_query(engine: Arc, ctx: QueryContext) -> PyResult { + let result = engine.query(ctx).await.map_err(to_py_err)?; + Ok(PyQueryResult { inner: result }) +} + +async fn run_list(engine: Arc) -> PyResult> { + let docs = engine.list().await.map_err(to_py_err)?; + Ok(docs + .into_iter() + .map(|d| PyDocumentInfo { inner: d }) + .collect()) +} + +async fn run_remove(engine: Arc, doc_id: String) -> PyResult { + engine.remove(&doc_id).await.map_err(to_py_err) +} + +async fn run_clear(engine: Arc) -> PyResult { + engine.clear().await.map_err(to_py_err) +} + +async fn run_exists(engine: Arc, doc_id: String) -> PyResult { + engine.exists(&doc_id).await.map_err(to_py_err) +} + +async fn run_get_graph(engine: Arc) -> PyResult> { + let graph = engine.get_graph().await.map_err(to_py_err)?; + Ok(graph.map(|g| PyDocumentGraph { inner: g })) +} + +fn run_metrics_report(engine: Arc) -> PyMetricsReport { + PyMetricsReport { + inner: engine.metrics_report(), + } +} + +// ============================================================ +// Engine +// ============================================================ + +/// The main vectorless engine. +/// +/// `api_key` and `model` are **required**. +/// +/// ```python +/// from vectorless import Engine, IndexContext, QueryContext +/// +/// engine = Engine( +/// api_key="sk-...", +/// model="gpt-4o", +/// ) +/// +/// # Index +/// result = await engine.index(IndexContext.from_path("./report.pdf")) +/// doc_id = result.doc_id +/// +/// # Query +/// answer = await engine.query(QueryContext("What is the revenue?").with_doc_ids([doc_id])) +/// print(answer.single().content) +/// ``` +#[pyclass(name = "Engine")] +pub struct PyEngine { + inner: Arc, +} + +#[pymethods] +impl PyEngine { + /// Create a new Engine. + /// + /// Args: + /// api_key: **Required**. LLM API key. + /// model: **Required**. LLM model name. + /// endpoint: Optional API endpoint. + /// config: Optional Config for advanced tuning. + /// + /// Raises: + /// VectorlessError: If engine creation fails. + #[new] + #[pyo3(signature = (api_key=None, model=None, endpoint=None, config=None))] + fn new( + api_key: Option, + model: Option, + endpoint: Option, + config: Option>, + ) -> PyResult { + let rt = Runtime::new().map_err(|e| { + PyErr::from(VectorlessError::new( + format!("Failed to create tokio runtime: {}", e), + "config", + )) + })?; + + let rust_config = config.map(|c| c.inner.clone()); + + let engine = rt.block_on(async { + let mut builder = EngineBuilder::new(); + + if let Some(config) = rust_config { + builder = builder.with_config(config); + } + + if let Some(m) = &model { + builder = builder.with_model(m); + } + if let Some(e) = &endpoint { + builder = builder.with_endpoint(e); + } + if let Some(key) = api_key { + builder = builder.with_key(key); + } + + builder.build().await + }); + + let engine = engine.map_err(|e| { + PyErr::from(VectorlessError::new( + format!("Failed to create engine: {}", e), + "config", + )) + })?; + + Ok(Self { + inner: Arc::new(engine), + }) + } + + /// Index a document. + /// + /// Args: + /// ctx: IndexContext created from from_path, from_paths, from_dir, etc. + /// + /// Returns: + /// IndexResult with doc_id and items. + /// + /// Raises: + /// VectorlessError: If indexing fails. + fn index<'py>(&self, py: Python<'py>, ctx: &PyIndexContext) -> PyResult> { + let engine = Arc::clone(&self.inner); + let index_ctx = ctx.inner.clone(); + future_into_py(py, run_index(engine, index_ctx)) + } + + /// Query indexed documents. + /// + /// Args: + /// ctx: QueryContext with query text and scope. + /// + /// Returns: + /// QueryResult with answer and score. + /// + /// Raises: + /// VectorlessError: If query fails. + fn query<'py>(&self, py: Python<'py>, ctx: &PyQueryContext) -> PyResult> { + let engine = Arc::clone(&self.inner); + let query_ctx = ctx.inner.clone(); + future_into_py(py, run_query(engine, query_ctx)) + } + + /// List all indexed documents. + /// + /// Returns: + /// List of DocumentInfo objects. + fn list<'py>(&self, py: Python<'py>) -> PyResult> { + let engine = Arc::clone(&self.inner); + future_into_py(py, run_list(engine)) + } + + /// Remove a document by ID. + /// + /// Returns: + /// True if removed, False if not found. + fn remove<'py>(&self, py: Python<'py>, doc_id: String) -> PyResult> { + let engine = Arc::clone(&self.inner); + future_into_py(py, run_remove(engine, doc_id)) + } + + /// Remove all indexed documents. + /// + /// Returns: + /// Number of documents removed. + fn clear<'py>(&self, py: Python<'py>) -> PyResult> { + let engine = Arc::clone(&self.inner); + future_into_py(py, run_clear(engine)) + } + + /// Check if a document exists. + fn exists<'py>(&self, py: Python<'py>, doc_id: String) -> PyResult> { + let engine = Arc::clone(&self.inner); + future_into_py(py, run_exists(engine, doc_id)) + } + + /// Get the cross-document relationship graph. + /// + /// Returns: + /// DocumentGraph if any documents exist, else None. + fn get_graph<'py>(&self, py: Python<'py>) -> PyResult> { + let engine = Arc::clone(&self.inner); + future_into_py(py, run_get_graph(engine)) + } + + /// Generate a complete metrics report. + /// + /// Returns: + /// MetricsReport with LLM, Pilot, and Retrieval metrics. + fn metrics_report(&self) -> PyMetricsReport { + run_metrics_report(Arc::clone(&self.inner)) + } + + fn __repr__(&self) -> String { + "Engine(...)".to_string() + } +} diff --git a/python/src/error.rs b/python/src/error.rs new file mode 100644 index 00000000..d128ce5a --- /dev/null +++ b/python/src/error.rs @@ -0,0 +1,71 @@ +// Copyright (c) 2026 vectorless developers +// SPDX-License-Identifier: Apache-2.0 + +//! Python exception types and error conversion. + +use pyo3::exceptions::PyException; +use pyo3::prelude::*; + +use ::vectorless::error::Error as RustError; + +/// Python exception for vectorless errors. +#[pyclass(extends = PyException, subclass)] +pub struct VectorlessError { + message: String, + kind: String, +} + +#[pymethods] +impl VectorlessError { + #[new] + fn new_py(message: String, kind: String) -> Self { + Self { message, kind } + } + + #[getter] + fn message(&self) -> &str { + &self.message + } + + #[getter] + fn kind(&self) -> &str { + &self.kind + } + + fn __str__(&self) -> &str { + &self.message + } + + fn __repr__(&self) -> String { + format!("VectorlessError('{}', kind='{}')", self.message, self.kind) + } +} + +impl VectorlessError { + pub fn new(message: String, kind: &str) -> Self { + Self { + message, + kind: kind.to_string(), + } + } +} + +impl From for PyErr { + fn from(err: VectorlessError) -> PyErr { + PyErr::new::((err.message, err.kind)) + } +} + +/// Convert vectorless errors to Python exceptions. +pub fn to_py_err(e: RustError) -> PyErr { + let message = e.to_string(); + let kind = match &e { + RustError::DocumentNotFound(_) => "not_found", + RustError::Parse(_) => "parse", + RustError::Config(_) => "config", + RustError::Workspace(_) => "workspace", + RustError::Llm(_) => "llm", + _ => "unknown", + }; + VectorlessError::new(message, kind).into() +} diff --git a/python/src/graph.rs b/python/src/graph.rs new file mode 100644 index 00000000..1aacd47f --- /dev/null +++ b/python/src/graph.rs @@ -0,0 +1,212 @@ +// Copyright (c) 2026 vectorless developers +// SPDX-License-Identifier: Apache-2.0 + +//! DocumentGraph Python wrappers. + +use pyo3::prelude::*; + +use ::vectorless::graph::{ + DocumentGraph, DocumentGraphNode, EdgeEvidence, GraphEdge, WeightedKeyword, +}; + +/// A keyword with weight from document analysis. +#[pyclass(name = "WeightedKeyword")] +pub struct PyWeightedKeyword { + pub(crate) inner: WeightedKeyword, +} + +#[pymethods] +impl PyWeightedKeyword { + #[getter] + fn keyword(&self) -> &str { + &self.inner.keyword + } + + #[getter] + fn weight(&self) -> f32 { + self.inner.weight + } + + fn __repr__(&self) -> String { + format!( + "WeightedKeyword('{}', weight={:.2})", + self.inner.keyword, self.inner.weight + ) + } +} + +/// Evidence for a cross-document connection. +#[pyclass(name = "EdgeEvidence")] +pub struct PyEdgeEvidence { + pub(crate) inner: EdgeEvidence, +} + +#[pymethods] +impl PyEdgeEvidence { + /// Number of shared keywords. + #[getter] + fn shared_keyword_count(&self) -> usize { + self.inner.shared_keyword_count + } + + /// Jaccard similarity of keyword sets. + #[getter] + fn keyword_jaccard(&self) -> f32 { + self.inner.keyword_jaccard + } + + /// Shared keywords with weights. + #[getter] + fn shared_keywords(&self) -> Vec<(String, f32, f32)> { + self.inner + .shared_keywords + .iter() + .map(|sk| (sk.keyword.clone(), sk.source_weight, sk.target_weight)) + .collect() + } + + fn __repr__(&self) -> String { + format!( + "EdgeEvidence(shared={}, jaccard={:.2})", + self.inner.shared_keyword_count, self.inner.keyword_jaccard + ) + } +} + +/// An edge representing a relationship between two documents. +#[pyclass(name = "GraphEdge")] +pub struct PyGraphEdge { + pub(crate) inner: GraphEdge, +} + +#[pymethods] +impl PyGraphEdge { + /// Target document ID. + #[getter] + fn target_doc_id(&self) -> &str { + &self.inner.target_doc_id + } + + /// Edge weight (connection strength). + #[getter] + fn weight(&self) -> f32 { + self.inner.weight + } + + /// Evidence for this connection. + #[getter] + fn evidence(&self) -> PyEdgeEvidence { + PyEdgeEvidence { + inner: self.inner.evidence.clone(), + } + } + + fn __repr__(&self) -> String { + format!( + "GraphEdge(target='{}', weight={:.2})", + self.inner.target_doc_id, self.inner.weight + ) + } +} + +/// A node in the document graph representing an indexed document. +#[pyclass(name = "DocumentGraphNode")] +pub struct PyDocumentGraphNode { + pub(crate) inner: DocumentGraphNode, +} + +#[pymethods] +impl PyDocumentGraphNode { + #[getter] + fn doc_id(&self) -> &str { + &self.inner.doc_id + } + + #[getter] + fn title(&self) -> &str { + &self.inner.title + } + + #[getter] + fn format(&self) -> &str { + &self.inner.format + } + + #[getter] + fn node_count(&self) -> usize { + self.inner.node_count + } + + /// Top keywords extracted from the document. + #[getter] + fn top_keywords(&self) -> Vec { + self.inner + .top_keywords + .iter() + .map(|kw| PyWeightedKeyword { inner: kw.clone() }) + .collect() + } + + fn __repr__(&self) -> String { + format!( + "DocumentGraphNode(doc_id='{}', title='{}')", + self.inner.doc_id, self.inner.title + ) + } +} + +/// Cross-document relationship graph. +/// +/// Automatically rebuilt after indexing. Connects documents +/// that share keywords via Jaccard similarity. +#[pyclass(name = "DocumentGraph")] +pub struct PyDocumentGraph { + pub(crate) inner: DocumentGraph, +} + +#[pymethods] +impl PyDocumentGraph { + /// Number of document nodes. + fn node_count(&self) -> usize { + self.inner.node_count() + } + + /// Number of relationship edges. + fn edge_count(&self) -> usize { + self.inner.edge_count() + } + + /// Get a document node by ID. + fn get_node(&self, doc_id: String) -> Option { + self.inner + .get_node(&doc_id) + .map(|n| PyDocumentGraphNode { inner: n.clone() }) + } + + /// Get all document IDs in the graph. + fn doc_ids(&self) -> Vec { + self.inner.doc_ids().map(|s| s.to_string()).collect() + } + + /// Get edges (neighbors) for a document. + fn get_neighbors(&self, doc_id: String) -> Vec { + self.inner + .get_neighbors(&doc_id) + .iter() + .map(|e| PyGraphEdge { inner: e.clone() }) + .collect() + } + + /// Whether the graph is empty. + fn is_empty(&self) -> bool { + self.inner.is_empty() + } + + fn __repr__(&self) -> String { + format!( + "DocumentGraph(nodes={}, edges={})", + self.inner.node_count(), + self.inner.edge_count() + ) + } +} diff --git a/python/src/lib.rs b/python/src/lib.rs index f6ff36ee..c3e71c59 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -3,1565 +3,25 @@ //! Python bindings for vectorless. -use pyo3::exceptions::PyException; use pyo3::prelude::*; -use pyo3_async_runtimes::tokio::future_into_py; -use std::sync::Arc; -use tokio::runtime::Runtime; -use ::vectorless::client::{ - DocumentFormat, DocumentInfo, Engine, EngineBuilder, FailedItem, IndexContext, IndexItem, - IndexMode, IndexOptions, IndexResult, QueryContext, QueryResult, QueryResultItem, -}; -use ::vectorless::error::Error as RustError; -use ::vectorless::metrics::IndexMetrics; -use ::vectorless::metrics::{ - LlmMetricsReport, MetricsReport, PilotMetricsReport, RetrievalMetricsReport, -}; - -// ============================================================ -// Error Types -// ============================================================ - -/// Python exception for vectorless errors. -#[pyclass(extends = PyException, subclass)] -pub struct VectorlessError { - message: String, - kind: String, -} - -#[pymethods] -impl VectorlessError { - #[new] - fn new_py(message: String, kind: String) -> Self { - Self { message, kind } - } - - #[getter] - fn message(&self) -> &str { - &self.message - } - - #[getter] - fn kind(&self) -> &str { - &self.kind - } - - fn __str__(&self) -> &str { - &self.message - } - - fn __repr__(&self) -> String { - format!("VectorlessError('{}', kind='{}')", self.message, self.kind) - } -} - -impl VectorlessError { - fn new(message: String, kind: &str) -> Self { - Self { - message, - kind: kind.to_string(), - } - } -} - -impl From for PyErr { - fn from(err: VectorlessError) -> PyErr { - PyErr::new::((err.message, err.kind)) - } -} - -/// Convert vectorless errors to Python exceptions. -fn to_py_err(e: RustError) -> PyErr { - let message = e.to_string(); - let kind = match &e { - RustError::DocumentNotFound(_) => "not_found", - RustError::Parse(_) => "parse", - RustError::Config(_) => "config", - RustError::Workspace(_) => "workspace", - RustError::Llm(_) => "llm", - _ => "unknown", - }; - VectorlessError::new(message, kind).into() -} - -/// Parse format string to DocumentFormat. -fn parse_format(format: &str) -> PyResult { - match format.to_lowercase().as_str() { - "markdown" | "md" => Ok(DocumentFormat::Markdown), - "pdf" => Ok(DocumentFormat::Pdf), - _ => Err(PyErr::from(VectorlessError::new( - format!("Unknown format: {}. Supported: markdown, pdf", format), - "config", - ))), - } -} - -// ============================================================ -// IndexOptions -// ============================================================ - -/// Options for controlling indexing behavior. -/// -/// Args: -/// mode: Indexing mode - "default", "force", or "incremental". -/// generate_summaries: Whether to generate summaries. Default: True. -/// generate_description: Whether to generate document description. Default: False. -/// include_text: Whether to include node text in the tree. Default: True. -/// generate_ids: Whether to generate node IDs. Default: True. -/// enable_synonym_expansion: Whether to expand keywords with LLM-generated -/// synonyms during indexing. Improves recall for differently-worded queries. -/// Default: False. -#[pyclass(name = "IndexOptions", skip_from_py_object)] -#[derive(Clone)] -pub struct PyIndexOptions { - inner: IndexOptions, -} - -#[pymethods] -impl PyIndexOptions { - #[new] - #[pyo3(signature = (mode="default", generate_summaries=true, generate_description=false, include_text=true, generate_ids=true, enable_synonym_expansion=false))] - fn new( - mode: &str, - generate_summaries: bool, - generate_description: bool, - include_text: bool, - generate_ids: bool, - enable_synonym_expansion: bool, - ) -> PyResult { - let mut opts = IndexOptions::new(); - match mode { - "default" => {} - "force" => opts = opts.with_mode(IndexMode::Force), - "incremental" => opts = opts.with_mode(IndexMode::Incremental), - _ => { - return Err(PyErr::from(VectorlessError::new( - format!( - "Unknown mode: {}. Supported: default, force, incremental", - mode - ), - "config", - ))); - } - } - opts.generate_summaries = generate_summaries; - opts.generate_description = generate_description; - opts.include_text = include_text; - opts.generate_ids = generate_ids; - opts.enable_synonym_expansion = enable_synonym_expansion; - Ok(Self { inner: opts }) - } - - fn __repr__(&self) -> String { - format!( - "IndexOptions(mode='{}', generate_summaries={}, generate_description={}, include_text={}, generate_ids={}, enable_synonym_expansion={})", - match self.inner.mode { - IndexMode::Default => "default", - IndexMode::Force => "force", - IndexMode::Incremental => "incremental", - }, - self.inner.generate_summaries, - self.inner.generate_description, - self.inner.include_text, - self.inner.generate_ids, - self.inner.enable_synonym_expansion, - ) - } -} - -// ============================================================ -// IndexContext -// ============================================================ - -/// Context for indexing a document. -/// -/// Create using the static methods: -/// -/// ```python -/// from vectorless import IndexContext -/// -/// # Single file -/// ctx = IndexContext.from_path("./document.pdf") -/// -/// # Multiple files -/// ctx = IndexContext.from_paths(["./a.pdf", "./b.md"]) -/// -/// # Directory -/// ctx = IndexContext.from_dir("./docs/") -/// -/// # From text -/// ctx = IndexContext.from_content("# Title\\nContent...", "markdown").with_name("doc") -/// -/// # From bytes -/// ctx = IndexContext.from_bytes(data, "pdf").with_name("doc") -/// ``` -#[pyclass(name = "IndexContext")] -pub struct PyIndexContext { - inner: IndexContext, -} - -#[pymethods] -impl PyIndexContext { - /// Create an IndexContext from a single file path. - #[staticmethod] - fn from_path(path: String) -> Self { - Self { - inner: IndexContext::from_path(&path), - } - } - - /// Create an IndexContext from multiple file paths. - #[staticmethod] - fn from_paths(paths: Vec) -> Self { - Self { - inner: IndexContext::from_paths(&paths), - } - } - - /// Create an IndexContext from all supported files in a directory. - /// - /// Args: - /// path: Directory path to scan. - /// recursive: If True, scan subdirectories recursively. Default: False. - #[staticmethod] - #[pyo3(signature = (path, recursive=false))] - fn from_dir(path: String, recursive: bool) -> Self { - let inner = IndexContext::from_dir(&path, recursive); - Self { inner } - } - - /// Create an IndexContext from text content. - #[staticmethod] - #[pyo3(signature = (content, format="markdown"))] - fn from_content(content: String, format: &str) -> PyResult { - let doc_format = parse_format(format)?; - let ctx = IndexContext::from_content(&content, doc_format); - Ok(Self { inner: ctx }) - } - - /// Create an IndexContext from binary data. - #[staticmethod] - fn from_bytes(data: Vec, format: &str) -> PyResult { - let doc_format = parse_format(format)?; - let ctx = IndexContext::from_bytes(data, doc_format); - Ok(Self { inner: ctx }) - } - - /// Set the document name (single-source only). - fn with_name(&self, name: String) -> Self { - let ctx = self.inner.clone().with_name(&name); - Self { inner: ctx } - } - - /// Apply indexing options. - fn with_options(&self, options: &PyIndexOptions) -> Self { - let ctx = self.inner.clone().with_options(options.inner.clone()); - Self { inner: ctx } - } - - /// Set indexing mode. - fn with_mode(&self, mode: &str) -> PyResult { - let m = match mode { - "default" => IndexMode::Default, - "force" => IndexMode::Force, - "incremental" => IndexMode::Incremental, - _ => { - return Err(PyErr::from(VectorlessError::new( - format!( - "Unknown mode: {}. Supported: default, force, incremental", - mode - ), - "config", - ))); - } - }; - let ctx = self.inner.clone().with_mode(m); - Ok(Self { inner: ctx }) - } - - /// Number of document sources. - fn __len__(&self) -> usize { - self.inner.len() - } - - /// Whether no sources are present. - fn is_empty(&self) -> bool { - self.inner.is_empty() - } - - fn __repr__(&self) -> String { - format!("IndexContext(sources={})", self.inner.len()) - } -} - -// ============================================================ -// QueryContext -// ============================================================ - -/// Context for a query operation. -/// -/// ```python -/// from vectorless import QueryContext -/// -/// # Query specific documents -/// ctx = QueryContext("What is the total revenue?").with_doc_ids([doc_id]) -/// -/// # Query multiple documents -/// ctx = QueryContext("What is the architecture?").with_doc_ids(["doc-1", "doc-2"]) -/// -/// # Query entire workspace -/// ctx = QueryContext("Explain the algorithm") -/// ``` -#[pyclass(name = "QueryContext")] -pub struct PyQueryContext { - inner: QueryContext, -} - -#[pymethods] -impl PyQueryContext { - /// Create a new query context (defaults to workspace scope). - #[new] - fn new(query: String) -> Self { - Self { - inner: QueryContext::new(&query), - } - } - - /// Set scope to specific documents. - fn with_doc_ids(&self, doc_ids: Vec) -> Self { - let ctx = self.inner.clone().with_doc_ids(doc_ids); - Self { inner: ctx } - } - - /// Set scope to entire workspace. - fn with_workspace(&self) -> Self { - let ctx = self.inner.clone().with_workspace(); - Self { inner: ctx } - } - - /// Set the maximum tokens for the result content. - fn with_max_tokens(&self, tokens: usize) -> Self { - let ctx = self.inner.clone().with_max_tokens(tokens); - Self { inner: ctx } - } - - /// Set whether to include the reasoning chain. - fn with_include_reasoning(&self, include: bool) -> Self { - let ctx = self.inner.clone().with_include_reasoning(include); - Self { inner: ctx } - } - - /// Set the maximum tree traversal depth. - fn with_depth_limit(&self, depth: usize) -> Self { - let ctx = self.inner.clone().with_depth_limit(depth); - Self { inner: ctx } - } - - fn __repr__(&self) -> String { - "QueryContext(...)".to_string() - } -} - -// ============================================================ -// QueryResultItem -// ============================================================ - -/// A single document's query result. -#[pyclass(name = "QueryResultItem")] -pub struct PyQueryResultItem { - inner: QueryResultItem, -} - -#[pymethods] -impl PyQueryResultItem { - /// The document ID. - #[getter] - fn doc_id(&self) -> &str { - &self.inner.doc_id - } - - /// The retrieved content. - #[getter] - fn content(&self) -> &str { - &self.inner.content - } - - /// Relevance score (0.0 to 1.0). - #[getter] - fn score(&self) -> f32 { - self.inner.score - } - - /// Node IDs that matched. - #[getter] - fn node_ids(&self) -> Vec { - self.inner.node_ids.clone() - } - - fn __repr__(&self) -> String { - format!( - "QueryResultItem(doc_id='{}', score={:.2}, content_len={})", - self.inner.doc_id, - self.inner.score, - self.inner.content.len() - ) - } -} - -// ============================================================ -// FailedItem -// ============================================================ - -/// A failed item in a batch operation. -#[pyclass(name = "FailedItem")] -pub struct PyFailedItem { - inner: FailedItem, -} - -#[pymethods] -impl PyFailedItem { - /// Source description. - #[getter] - fn source(&self) -> &str { - &self.inner.source - } - - /// Error message. - #[getter] - fn error(&self) -> &str { - &self.inner.error - } - - fn __repr__(&self) -> String { - format!( - "FailedItem(source='{}', error='{}')", - self.inner.source, self.inner.error - ) - } -} - -// ============================================================ -// QueryResult -// ============================================================ - -/// Result of a document query. -#[pyclass(name = "QueryResult")] -pub struct PyQueryResult { - inner: QueryResult, -} - -#[pymethods] -impl PyQueryResult { - /// Result items (one per document). - #[getter] - fn items(&self) -> Vec { - self.inner - .items - .iter() - .map(|i| PyQueryResultItem { inner: i.clone() }) - .collect() - } - - /// Get the first (single-doc) result item. - fn single(&self) -> Option { - self.inner - .single() - .map(|i| PyQueryResultItem { inner: i.clone() }) - } - - /// Number of result items. - fn __len__(&self) -> usize { - self.inner.len() - } - - /// Whether any documents failed. - fn has_failures(&self) -> bool { - self.inner.has_failures() - } - - /// Failed items. - #[getter] - fn failed(&self) -> Vec { - self.inner - .failed - .iter() - .map(|f| PyFailedItem { inner: f.clone() }) - .collect() - } - - fn __repr__(&self) -> String { - format!( - "QueryResult(items={}, failed={})", - self.inner.len(), - self.inner.failed.len() - ) - } -} - -// ============================================================ -// IndexMetrics -// ============================================================ - -/// Indexing pipeline metrics. -#[pyclass(name = "IndexMetrics")] -pub struct PyIndexMetrics { - inner: IndexMetrics, -} - -#[pymethods] -impl PyIndexMetrics { - /// Total indexing time (ms). - #[getter] - fn total_time_ms(&self) -> u64 { - self.inner.total_time_ms() - } - - /// Parse stage duration (ms). - #[getter] - fn parse_time_ms(&self) -> u64 { - self.inner.parse_time_ms - } - - /// Build stage duration (ms). - #[getter] - fn build_time_ms(&self) -> u64 { - self.inner.build_time_ms - } - - /// Enhance (summary) stage duration (ms). - #[getter] - fn enhance_time_ms(&self) -> u64 { - self.inner.enhance_time_ms - } - - /// Number of nodes processed. - #[getter] - fn nodes_processed(&self) -> usize { - self.inner.nodes_processed - } - - /// Number of summaries successfully generated. - #[getter] - fn summaries_generated(&self) -> usize { - self.inner.summaries_generated - } - - /// Number of summaries that failed to generate. - #[getter] - fn summaries_failed(&self) -> usize { - self.inner.summaries_failed - } - - /// Number of LLM calls made. - #[getter] - fn llm_calls(&self) -> usize { - self.inner.llm_calls - } - - /// Total tokens generated by LLM. - #[getter] - fn total_tokens_generated(&self) -> usize { - self.inner.total_tokens_generated - } - - /// Number of topics in reasoning index. - #[getter] - fn topics_indexed(&self) -> usize { - self.inner.topics_indexed - } - - /// Number of keywords in reasoning index. - #[getter] - fn keywords_indexed(&self) -> usize { - self.inner.keywords_indexed - } - - fn __repr__(&self) -> String { - format!( - "IndexMetrics(total={}ms, summaries={}, failed={}, llm_calls={})", - self.inner.total_time_ms(), - self.inner.summaries_generated, - self.inner.summaries_failed, - self.inner.llm_calls, - ) - } -} - -// ============================================================ -// Runtime Metrics Reports -// ============================================================ - -/// LLM usage metrics report. -#[pyclass(name = "LlmMetricsReport")] -pub struct PyLlmMetricsReport { - inner: LlmMetricsReport, -} - -#[pymethods] -impl PyLlmMetricsReport { - /// Total number of LLM calls. - #[getter] - fn total_calls(&self) -> u64 { - self.inner.total_calls - } - - /// Number of successful calls. - #[getter] - fn successful_calls(&self) -> u64 { - self.inner.successful_calls - } - - /// Number of failed calls. - #[getter] - fn failed_calls(&self) -> u64 { - self.inner.failed_calls - } - - /// Success rate (0.0 - 1.0). - #[getter] - fn success_rate(&self) -> f64 { - self.inner.success_rate - } - - /// Total input tokens. - #[getter] - fn total_input_tokens(&self) -> u64 { - self.inner.total_input_tokens - } - - /// Total output tokens. - #[getter] - fn total_output_tokens(&self) -> u64 { - self.inner.total_output_tokens - } - - /// Total tokens (input + output). - #[getter] - fn total_tokens(&self) -> u64 { - self.inner.total_tokens - } - - /// Average latency per call in milliseconds. - #[getter] - fn avg_latency_ms(&self) -> f64 { - self.inner.avg_latency_ms - } - - /// Total latency in milliseconds. - #[getter] - fn total_latency_ms(&self) -> u64 { - self.inner.total_latency_ms - } - - /// Estimated cost in USD. - #[getter] - fn estimated_cost_usd(&self) -> f64 { - self.inner.estimated_cost_usd - } - - /// Number of rate limit errors. - #[getter] - fn rate_limit_errors(&self) -> u64 { - self.inner.rate_limit_errors - } - - /// Number of timeout errors. - #[getter] - fn timeout_errors(&self) -> u64 { - self.inner.timeout_errors - } - - /// Number of fallback triggers. - #[getter] - fn fallback_triggers(&self) -> u64 { - self.inner.fallback_triggers - } - - fn __repr__(&self) -> String { - format!( - "LlmMetricsReport(calls={}, tokens={}, cost=${:.4})", - self.inner.total_calls, self.inner.total_tokens, self.inner.estimated_cost_usd, - ) - } -} - -/// Pilot decision metrics report. -#[pyclass(name = "PilotMetricsReport")] -pub struct PyPilotMetricsReport { - inner: PilotMetricsReport, -} - -#[pymethods] -impl PyPilotMetricsReport { - /// Total number of Pilot decisions. - #[getter] - fn total_decisions(&self) -> u64 { - self.inner.total_decisions - } - - /// Number of start guidance calls. - #[getter] - fn start_guidance_calls(&self) -> u64 { - self.inner.start_guidance_calls - } - - /// Number of fork decisions. - #[getter] - fn fork_decisions(&self) -> u64 { - self.inner.fork_decisions - } - - /// Number of backtrack calls. - #[getter] - fn backtrack_calls(&self) -> u64 { - self.inner.backtrack_calls - } - - /// Number of evaluate calls. - #[getter] - fn evaluate_calls(&self) -> u64 { - self.inner.evaluate_calls - } - - /// Decision accuracy based on feedback (0.0 - 1.0). - #[getter] - fn accuracy(&self) -> f64 { - self.inner.accuracy - } - - /// Number of correct decisions. - #[getter] - fn correct_decisions(&self) -> u64 { - self.inner.correct_decisions - } - - /// Number of incorrect decisions. - #[getter] - fn incorrect_decisions(&self) -> u64 { - self.inner.incorrect_decisions - } - - /// Average confidence across all decisions. - #[getter] - fn avg_confidence(&self) -> f64 { - self.inner.avg_confidence - } - - /// Number of LLM calls made by Pilot. - #[getter] - fn llm_calls(&self) -> u64 { - self.inner.llm_calls - } - - /// Number of interventions. - #[getter] - fn interventions(&self) -> u64 { - self.inner.interventions - } - - /// Number of skipped interventions. - #[getter] - fn skipped_interventions(&self) -> u64 { - self.inner.skipped_interventions - } - - /// Number of budget exhausted events. - #[getter] - fn budget_exhausted(&self) -> u64 { - self.inner.budget_exhausted - } - - /// Number of algorithm fallbacks. - #[getter] - fn algorithm_fallbacks(&self) -> u64 { - self.inner.algorithm_fallbacks - } - - fn __repr__(&self) -> String { - format!( - "PilotMetricsReport(decisions={}, accuracy={:.2}, avg_confidence={:.2})", - self.inner.total_decisions, self.inner.accuracy, self.inner.avg_confidence, - ) - } -} - -/// Retrieval operation metrics report. -#[pyclass(name = "RetrievalMetricsReport")] -pub struct PyRetrievalMetricsReport { - inner: RetrievalMetricsReport, -} - -#[pymethods] -impl PyRetrievalMetricsReport { - /// Total number of queries. - #[getter] - fn total_queries(&self) -> u64 { - self.inner.total_queries - } - - /// Total number of search iterations. - #[getter] - fn total_iterations(&self) -> u64 { - self.inner.total_iterations - } - - /// Average iterations per query. - #[getter] - fn avg_iterations(&self) -> f64 { - self.inner.avg_iterations - } - - /// Total nodes visited. - #[getter] - fn nodes_visited(&self) -> u64 { - self.inner.nodes_visited - } - - /// Total paths found. - #[getter] - fn paths_found(&self) -> u64 { - self.inner.paths_found - } - - /// Average path length. - #[getter] - fn avg_path_length(&self) -> f64 { - self.inner.avg_path_length - } - - /// Average path score (0.0 - 1.0). - #[getter] - fn avg_path_score(&self) -> f64 { - self.inner.avg_path_score - } - - /// Number of high-score paths (>= 0.5). - #[getter] - fn high_score_paths(&self) -> u64 { - self.inner.high_score_paths - } - - /// Number of low-score paths (< 0.3). - #[getter] - fn low_score_paths(&self) -> u64 { - self.inner.low_score_paths - } - - /// Number of cache hits. - #[getter] - fn cache_hits(&self) -> u64 { - self.inner.cache_hits - } - - /// Number of cache misses. - #[getter] - fn cache_misses(&self) -> u64 { - self.inner.cache_misses - } - - /// Cache hit rate (0.0 - 1.0). - #[getter] - fn cache_hit_rate(&self) -> f64 { - self.inner.cache_hit_rate - } - - /// Total latency in milliseconds. - #[getter] - fn total_latency_ms(&self) -> u64 { - self.inner.total_latency_ms - } - - /// Average latency per query in milliseconds. - #[getter] - fn avg_latency_ms(&self) -> f64 { - self.inner.avg_latency_ms - } - - /// Number of backtracks. - #[getter] - fn backtracks(&self) -> u64 { - self.inner.backtracks - } - - /// Number of sufficiency checks. - #[getter] - fn sufficiency_checks(&self) -> u64 { - self.inner.sufficiency_checks - } - - /// Sufficiency rate (0.0 - 1.0). - #[getter] - fn sufficiency_rate(&self) -> f64 { - self.inner.sufficiency_rate - } - - fn __repr__(&self) -> String { - format!( - "RetrievalMetricsReport(queries={}, avg_score={:.2}, cache_hit={:.1}%)", - self.inner.total_queries, - self.inner.avg_path_score, - self.inner.cache_hit_rate * 100.0, - ) - } -} - -/// Complete metrics report combining all subsystem metrics. -#[pyclass(name = "MetricsReport")] -pub struct PyMetricsReport { - inner: MetricsReport, -} - -#[pymethods] -impl PyMetricsReport { - /// LLM metrics. - #[getter] - fn llm(&self) -> PyLlmMetricsReport { - PyLlmMetricsReport { - inner: self.inner.llm.clone(), - } - } - - /// Pilot metrics. - #[getter] - fn pilot(&self) -> PyPilotMetricsReport { - PyPilotMetricsReport { - inner: self.inner.pilot.clone(), - } - } - - /// Retrieval metrics. - #[getter] - fn retrieval(&self) -> PyRetrievalMetricsReport { - PyRetrievalMetricsReport { - inner: self.inner.retrieval.clone(), - } - } - - /// Total estimated cost in USD. - fn total_cost_usd(&self) -> f64 { - self.inner.total_cost_usd() - } - - /// Overall success rate (0.0 - 1.0). - fn overall_success_rate(&self) -> f64 { - self.inner.overall_success_rate() - } - - fn __repr__(&self) -> String { - format!( - "MetricsReport(llm_calls={}, cost=${:.4}, queries={})", - self.inner.llm.total_calls, - self.inner.total_cost_usd(), - self.inner.retrieval.total_queries, - ) - } -} - -// ============================================================ -// IndexItem / IndexResult -// ============================================================ - -/// A single indexed document item. -#[pyclass(name = "IndexItem")] -pub struct PyIndexItem { - inner: IndexItem, -} - -#[pymethods] -impl PyIndexItem { - #[getter] - fn doc_id(&self) -> &str { - &self.inner.doc_id - } - - #[getter] - fn name(&self) -> &str { - &self.inner.name - } - - #[getter] - fn format(&self) -> String { - format!("{:?}", self.inner.format).to_lowercase() - } - - #[getter] - fn description(&self) -> Option<&str> { - self.inner.description.as_deref() - } - - #[getter] - fn source_path(&self) -> Option<&str> { - self.inner.source_path.as_deref() - } - - #[getter] - fn page_count(&self) -> Option { - self.inner.page_count - } - - /// Indexing pipeline metrics (timing, LLM usage, etc.). - #[getter] - fn metrics(&self) -> Option { - self.inner - .metrics - .as_ref() - .map(|m| PyIndexMetrics { inner: m.clone() }) - } - - fn __repr__(&self) -> String { - format!( - "IndexItem(doc_id='{}', name='{}')", - self.inner.doc_id, self.inner.name - ) - } -} - -/// Result of a document indexing operation. -#[pyclass(name = "IndexResult")] -pub struct PyIndexResult { - inner: IndexResult, -} - -#[pymethods] -impl PyIndexResult { - /// The document ID (convenience for single-document indexing). - #[getter] - fn doc_id(&self) -> Option { - self.inner.doc_id().map(|s| s.to_string()) - } - - /// All indexed items. - #[getter] - fn items(&self) -> Vec { - self.inner - .items - .iter() - .map(|i| PyIndexItem { inner: i.clone() }) - .collect() - } - - /// Failed items. - #[getter] - fn failed(&self) -> Vec { - self.inner - .failed - .iter() - .map(|f| PyFailedItem { inner: f.clone() }) - .collect() - } - - /// Whether any items failed. - fn has_failures(&self) -> bool { - self.inner.has_failures() - } - - /// Total number of items (successful + failed). - fn total(&self) -> usize { - self.inner.total() - } - - fn __len__(&self) -> usize { - self.inner.len() - } - - fn __repr__(&self) -> String { - format!( - "IndexResult(doc_id={:?}, count={}, failed={})", - self.inner.doc_id(), - self.inner.items.len(), - self.inner.failed.len() - ) - } -} - -// ============================================================ -// DocumentInfo -// ============================================================ - -/// Information about an indexed document. -#[pyclass(name = "DocumentInfo")] -pub struct PyDocumentInfo { - inner: DocumentInfo, -} - -#[pymethods] -impl PyDocumentInfo { - #[getter] - fn id(&self) -> &str { - &self.inner.id - } - - #[getter] - fn name(&self) -> &str { - &self.inner.name - } - - #[getter] - fn format(&self) -> &str { - &self.inner.format - } - - #[getter] - fn description(&self) -> Option<&str> { - self.inner.description.as_deref() - } - - #[getter] - fn source_path(&self) -> Option<&str> { - self.inner.source_path.as_deref() - } - - #[getter] - fn page_count(&self) -> Option { - self.inner.page_count - } - - #[getter] - fn line_count(&self) -> Option { - self.inner.line_count - } - - fn __repr__(&self) -> String { - format!( - "DocumentInfo(id='{}', name='{}', format='{}')", - self.inner.id, self.inner.name, self.inner.format - ) - } -} - -// ============================================================ -// DocumentGraph types -// ============================================================ - -use ::vectorless::graph::{ - DocumentGraph, DocumentGraphNode, EdgeEvidence, GraphEdge, WeightedKeyword, -}; - -/// A keyword with weight from document analysis. -#[pyclass(name = "WeightedKeyword")] -pub struct PyWeightedKeyword { - inner: WeightedKeyword, -} - -#[pymethods] -impl PyWeightedKeyword { - #[getter] - fn keyword(&self) -> &str { - &self.inner.keyword - } - - #[getter] - fn weight(&self) -> f32 { - self.inner.weight - } - - fn __repr__(&self) -> String { - format!( - "WeightedKeyword('{}', weight={:.2})", - self.inner.keyword, self.inner.weight - ) - } -} - -/// Evidence for a cross-document connection. -#[pyclass(name = "EdgeEvidence")] -pub struct PyEdgeEvidence { - inner: EdgeEvidence, -} - -#[pymethods] -impl PyEdgeEvidence { - /// Number of shared keywords. - #[getter] - fn shared_keyword_count(&self) -> usize { - self.inner.shared_keyword_count - } - - /// Jaccard similarity of keyword sets. - #[getter] - fn keyword_jaccard(&self) -> f32 { - self.inner.keyword_jaccard - } - - /// Shared keywords with weights. - #[getter] - fn shared_keywords(&self) -> Vec<(String, f32, f32)> { - self.inner - .shared_keywords - .iter() - .map(|sk| (sk.keyword.clone(), sk.source_weight, sk.target_weight)) - .collect() - } - - fn __repr__(&self) -> String { - format!( - "EdgeEvidence(shared={}, jaccard={:.2})", - self.inner.shared_keyword_count, self.inner.keyword_jaccard - ) - } -} - -/// An edge representing a relationship between two documents. -#[pyclass(name = "GraphEdge")] -pub struct PyGraphEdge { - inner: GraphEdge, -} - -#[pymethods] -impl PyGraphEdge { - /// Target document ID. - #[getter] - fn target_doc_id(&self) -> &str { - &self.inner.target_doc_id - } - - /// Edge weight (connection strength). - #[getter] - fn weight(&self) -> f32 { - self.inner.weight - } - - /// Evidence for this connection. - #[getter] - fn evidence(&self) -> PyEdgeEvidence { - PyEdgeEvidence { - inner: self.inner.evidence.clone(), - } - } - - fn __repr__(&self) -> String { - format!( - "GraphEdge(target='{}', weight={:.2})", - self.inner.target_doc_id, self.inner.weight - ) - } -} - -/// A node in the document graph representing an indexed document. -#[pyclass(name = "DocumentGraphNode")] -pub struct PyDocumentGraphNode { - inner: DocumentGraphNode, -} - -#[pymethods] -impl PyDocumentGraphNode { - #[getter] - fn doc_id(&self) -> &str { - &self.inner.doc_id - } - - #[getter] - fn title(&self) -> &str { - &self.inner.title - } - - #[getter] - fn format(&self) -> &str { - &self.inner.format - } - - #[getter] - fn node_count(&self) -> usize { - self.inner.node_count - } - - /// Top keywords extracted from the document. - #[getter] - fn top_keywords(&self) -> Vec { - self.inner - .top_keywords - .iter() - .map(|kw| PyWeightedKeyword { inner: kw.clone() }) - .collect() - } - - fn __repr__(&self) -> String { - format!( - "DocumentGraphNode(doc_id='{}', title='{}')", - self.inner.doc_id, self.inner.title - ) - } -} - -/// Cross-document relationship graph. -/// -/// Automatically rebuilt after indexing. Connects documents -/// that share keywords via Jaccard similarity. -#[pyclass(name = "DocumentGraph")] -pub struct PyDocumentGraph { - inner: DocumentGraph, -} - -#[pymethods] -impl PyDocumentGraph { - /// Number of document nodes. - fn node_count(&self) -> usize { - self.inner.node_count() - } - - /// Number of relationship edges. - fn edge_count(&self) -> usize { - self.inner.edge_count() - } - - /// Get a document node by ID. - fn get_node(&self, doc_id: String) -> Option { - self.inner - .get_node(&doc_id) - .map(|n| PyDocumentGraphNode { inner: n.clone() }) - } - - /// Get all document IDs in the graph. - fn doc_ids(&self) -> Vec { - self.inner.doc_ids().map(|s| s.to_string()).collect() - } - - /// Get edges (neighbors) for a document. - fn get_neighbors(&self, doc_id: String) -> Vec { - self.inner - .get_neighbors(&doc_id) - .iter() - .map(|e| PyGraphEdge { inner: e.clone() }) - .collect() - } - - /// Whether the graph is empty. - fn is_empty(&self) -> bool { - self.inner.is_empty() - } - - fn __repr__(&self) -> String { - format!( - "DocumentGraph(nodes={}, edges={})", - self.inner.node_count(), - self.inner.edge_count() - ) - } -} - -// ============================================================ -// Engine async helpers (named functions to avoid FnOnce HRTB issue) -// ============================================================ - -async fn run_index(engine: Arc, ctx: IndexContext) -> PyResult { - let result = engine.index(ctx).await.map_err(to_py_err)?; - Ok(PyIndexResult { inner: result }) -} - -async fn run_query(engine: Arc, ctx: QueryContext) -> PyResult { - let result = engine.query(ctx).await.map_err(to_py_err)?; - Ok(PyQueryResult { inner: result }) -} - -async fn run_list(engine: Arc) -> PyResult> { - let docs = engine.list().await.map_err(to_py_err)?; - Ok(docs - .into_iter() - .map(|d| PyDocumentInfo { inner: d }) - .collect()) -} - -async fn run_remove(engine: Arc, doc_id: String) -> PyResult { - engine.remove(&doc_id).await.map_err(to_py_err) -} - -async fn run_clear(engine: Arc) -> PyResult { - engine.clear().await.map_err(to_py_err) -} - -async fn run_exists(engine: Arc, doc_id: String) -> PyResult { - engine.exists(&doc_id).await.map_err(to_py_err) -} - -async fn run_get_graph(engine: Arc) -> PyResult> { - let graph = engine.get_graph().await.map_err(to_py_err)?; - Ok(graph.map(|g| PyDocumentGraph { inner: g })) -} - -fn run_metrics_report(engine: Arc) -> PyMetricsReport { - PyMetricsReport { - inner: engine.metrics_report(), - } -} - -// ============================================================ -// Engine -// ============================================================ - -/// The main vectorless engine. -/// -/// `api_key` and `model` are **required**. -/// -/// ```python -/// from vectorless import Engine, IndexContext, QueryContext -/// -/// engine = Engine( -/// api_key="sk-...", -/// model="gpt-4o", -/// ) -/// -/// # Index -/// result = await engine.index(IndexContext.from_path("./report.pdf")) -/// doc_id = result.doc_id -/// -/// # Query -/// answer = await engine.query(QueryContext("What is the revenue?").with_doc_ids([doc_id])) -/// print(answer.single().content) -/// ``` -#[pyclass(name = "Engine")] -pub struct PyEngine { - inner: Arc, -} - -#[pymethods] -impl PyEngine { - /// Create a new Engine. - /// - /// Args: - /// config_path: Path to configuration file (optional). - /// api_key: **Required**. LLM API key. - /// model: **Required**. LLM model name. - /// endpoint: Optional API endpoint. - /// - /// Raises: - /// VectorlessError: If engine creation fails. - #[new] - #[pyo3(signature = (config_path=None, api_key=None, model=None, endpoint=None))] - fn new( - config_path: Option, - api_key: Option, - model: Option, - endpoint: Option, - ) -> PyResult { - let rt = Runtime::new().map_err(|e| { - PyErr::from(VectorlessError::new( - format!("Failed to create tokio runtime: {}", e), - "config", - )) - })?; - - let engine = rt.block_on(async { - let mut builder = EngineBuilder::new(); - - if let Some(path) = &config_path { - builder = builder.with_config_path(path); - } - if let Some(m) = &model { - builder = builder.with_model(m); - } - if let Some(e) = &endpoint { - builder = builder.with_endpoint(e); - } - if let Some(key) = api_key { - builder = builder.with_key(key); - } - - builder.build().await - }); - - let engine = engine.map_err(|e| { - PyErr::from(VectorlessError::new( - format!("Failed to create engine: {}", e), - "config", - )) - })?; - - Ok(Self { - inner: Arc::new(engine), - }) - } - - /// Index a document. - /// - /// Args: - /// ctx: IndexContext created from from_path, from_paths, from_dir, etc. - /// - /// Returns: - /// IndexResult with doc_id and items. - /// - /// Raises: - /// VectorlessError: If indexing fails. - fn index<'py>(&self, py: Python<'py>, ctx: &PyIndexContext) -> PyResult> { - let engine = Arc::clone(&self.inner); - let index_ctx = ctx.inner.clone(); - future_into_py(py, run_index(engine, index_ctx)) - } - - /// Query indexed documents. - /// - /// Args: - /// ctx: QueryContext with query text and scope. - /// - /// Returns: - /// QueryResult with answer and score. - /// - /// Raises: - /// VectorlessError: If query fails. - fn query<'py>(&self, py: Python<'py>, ctx: &PyQueryContext) -> PyResult> { - let engine = Arc::clone(&self.inner); - let query_ctx = ctx.inner.clone(); - future_into_py(py, run_query(engine, query_ctx)) - } - - /// List all indexed documents. - /// - /// Returns: - /// List of DocumentInfo objects. - fn list<'py>(&self, py: Python<'py>) -> PyResult> { - let engine = Arc::clone(&self.inner); - future_into_py(py, run_list(engine)) - } - - /// Remove a document by ID. - /// - /// Returns: - /// True if removed, False if not found. - fn remove<'py>(&self, py: Python<'py>, doc_id: String) -> PyResult> { - let engine = Arc::clone(&self.inner); - future_into_py(py, run_remove(engine, doc_id)) - } - - /// Remove all indexed documents. - /// - /// Returns: - /// Number of documents removed. - fn clear<'py>(&self, py: Python<'py>) -> PyResult> { - let engine = Arc::clone(&self.inner); - future_into_py(py, run_clear(engine)) - } - - /// Check if a document exists. - fn exists<'py>(&self, py: Python<'py>, doc_id: String) -> PyResult> { - let engine = Arc::clone(&self.inner); - future_into_py(py, run_exists(engine, doc_id)) - } - - /// Get the cross-document relationship graph. - /// - /// Returns: - /// DocumentGraph if any documents exist, else None. - fn get_graph<'py>(&self, py: Python<'py>) -> PyResult> { - let engine = Arc::clone(&self.inner); - future_into_py(py, run_get_graph(engine)) - } - - /// Generate a complete metrics report. - /// - /// Returns: - /// MetricsReport with LLM, Pilot, and Retrieval metrics. - fn metrics_report(&self) -> PyMetricsReport { - run_metrics_report(Arc::clone(&self.inner)) - } - - fn __repr__(&self) -> String { - "Engine(...)".to_string() - } -} - -// ============================================================ -// Module Definition -// ============================================================ +mod config; +mod context; +mod document; +mod engine; +mod error; +mod graph; +mod metrics; +mod results; + +use config::PyConfig; +use context::{PyIndexContext, PyIndexOptions, PyQueryContext}; +use document::PyDocumentInfo; +use engine::PyEngine; +use error::VectorlessError; +use graph::{PyDocumentGraph, PyDocumentGraphNode, PyEdgeEvidence, PyGraphEdge, PyWeightedKeyword}; +use metrics::{PyLlmMetricsReport, PyMetricsReport, PyPilotMetricsReport, PyRetrievalMetricsReport}; +use results::{PyFailedItem, PyIndexItem, PyIndexMetrics, PyIndexResult, PyQueryResult, PyQueryResultItem}; /// Vectorless - Reasoning-native document intelligence engine. /// @@ -1595,6 +55,7 @@ fn _vectorless(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; m.add_class::()?; m.add("__version__", env!("CARGO_PKG_VERSION"))?; diff --git a/python/src/metrics.rs b/python/src/metrics.rs new file mode 100644 index 00000000..669511cb --- /dev/null +++ b/python/src/metrics.rs @@ -0,0 +1,376 @@ +// Copyright (c) 2026 vectorless developers +// SPDX-License-Identifier: Apache-2.0 + +//! Metrics report Python wrappers. + +use pyo3::prelude::*; + +use ::vectorless::metrics::{ + LlmMetricsReport, MetricsReport, PilotMetricsReport, RetrievalMetricsReport, +}; + +/// LLM usage metrics report. +#[pyclass(name = "LlmMetricsReport")] +pub struct PyLlmMetricsReport { + pub(crate) inner: LlmMetricsReport, +} + +#[pymethods] +impl PyLlmMetricsReport { + /// Total number of LLM calls. + #[getter] + fn total_calls(&self) -> u64 { + self.inner.total_calls + } + + /// Number of successful calls. + #[getter] + fn successful_calls(&self) -> u64 { + self.inner.successful_calls + } + + /// Number of failed calls. + #[getter] + fn failed_calls(&self) -> u64 { + self.inner.failed_calls + } + + /// Success rate (0.0 - 1.0). + #[getter] + fn success_rate(&self) -> f64 { + self.inner.success_rate + } + + /// Total input tokens. + #[getter] + fn total_input_tokens(&self) -> u64 { + self.inner.total_input_tokens + } + + /// Total output tokens. + #[getter] + fn total_output_tokens(&self) -> u64 { + self.inner.total_output_tokens + } + + /// Total tokens (input + output). + #[getter] + fn total_tokens(&self) -> u64 { + self.inner.total_tokens + } + + /// Average latency per call in milliseconds. + #[getter] + fn avg_latency_ms(&self) -> f64 { + self.inner.avg_latency_ms + } + + /// Total latency in milliseconds. + #[getter] + fn total_latency_ms(&self) -> u64 { + self.inner.total_latency_ms + } + + /// Estimated cost in USD. + #[getter] + fn estimated_cost_usd(&self) -> f64 { + self.inner.estimated_cost_usd + } + + /// Number of rate limit errors. + #[getter] + fn rate_limit_errors(&self) -> u64 { + self.inner.rate_limit_errors + } + + /// Number of timeout errors. + #[getter] + fn timeout_errors(&self) -> u64 { + self.inner.timeout_errors + } + + /// Number of fallback triggers. + #[getter] + fn fallback_triggers(&self) -> u64 { + self.inner.fallback_triggers + } + + fn __repr__(&self) -> String { + format!( + "LlmMetricsReport(calls={}, tokens={}, cost=${:.4})", + self.inner.total_calls, self.inner.total_tokens, self.inner.estimated_cost_usd, + ) + } +} + +/// Pilot decision metrics report. +#[pyclass(name = "PilotMetricsReport")] +pub struct PyPilotMetricsReport { + pub(crate) inner: PilotMetricsReport, +} + +#[pymethods] +impl PyPilotMetricsReport { + /// Total number of Pilot decisions. + #[getter] + fn total_decisions(&self) -> u64 { + self.inner.total_decisions + } + + /// Number of start guidance calls. + #[getter] + fn start_guidance_calls(&self) -> u64 { + self.inner.start_guidance_calls + } + + /// Number of fork decisions. + #[getter] + fn fork_decisions(&self) -> u64 { + self.inner.fork_decisions + } + + /// Number of backtrack calls. + #[getter] + fn backtrack_calls(&self) -> u64 { + self.inner.backtrack_calls + } + + /// Number of evaluate calls. + #[getter] + fn evaluate_calls(&self) -> u64 { + self.inner.evaluate_calls + } + + /// Decision accuracy based on feedback (0.0 - 1.0). + #[getter] + fn accuracy(&self) -> f64 { + self.inner.accuracy + } + + /// Number of correct decisions. + #[getter] + fn correct_decisions(&self) -> u64 { + self.inner.correct_decisions + } + + /// Number of incorrect decisions. + #[getter] + fn incorrect_decisions(&self) -> u64 { + self.inner.incorrect_decisions + } + + /// Average confidence across all decisions. + #[getter] + fn avg_confidence(&self) -> f64 { + self.inner.avg_confidence + } + + /// Number of LLM calls made by Pilot. + #[getter] + fn llm_calls(&self) -> u64 { + self.inner.llm_calls + } + + /// Number of interventions. + #[getter] + fn interventions(&self) -> u64 { + self.inner.interventions + } + + /// Number of skipped interventions. + #[getter] + fn skipped_interventions(&self) -> u64 { + self.inner.skipped_interventions + } + + /// Number of budget exhausted events. + #[getter] + fn budget_exhausted(&self) -> u64 { + self.inner.budget_exhausted + } + + /// Number of algorithm fallbacks. + #[getter] + fn algorithm_fallbacks(&self) -> u64 { + self.inner.algorithm_fallbacks + } + + fn __repr__(&self) -> String { + format!( + "PilotMetricsReport(decisions={}, accuracy={:.2}, avg_confidence={:.2})", + self.inner.total_decisions, self.inner.accuracy, self.inner.avg_confidence, + ) + } +} + +/// Retrieval operation metrics report. +#[pyclass(name = "RetrievalMetricsReport")] +pub struct PyRetrievalMetricsReport { + pub(crate) inner: RetrievalMetricsReport, +} + +#[pymethods] +impl PyRetrievalMetricsReport { + /// Total number of queries. + #[getter] + fn total_queries(&self) -> u64 { + self.inner.total_queries + } + + /// Total number of search iterations. + #[getter] + fn total_iterations(&self) -> u64 { + self.inner.total_iterations + } + + /// Average iterations per query. + #[getter] + fn avg_iterations(&self) -> f64 { + self.inner.avg_iterations + } + + /// Total nodes visited. + #[getter] + fn nodes_visited(&self) -> u64 { + self.inner.nodes_visited + } + + /// Total paths found. + #[getter] + fn paths_found(&self) -> u64 { + self.inner.paths_found + } + + /// Average path length. + #[getter] + fn avg_path_length(&self) -> f64 { + self.inner.avg_path_length + } + + /// Average path score (0.0 - 1.0). + #[getter] + fn avg_path_score(&self) -> f64 { + self.inner.avg_path_score + } + + /// Number of high-score paths (>= 0.5). + #[getter] + fn high_score_paths(&self) -> u64 { + self.inner.high_score_paths + } + + /// Number of low-score paths (< 0.3). + #[getter] + fn low_score_paths(&self) -> u64 { + self.inner.low_score_paths + } + + /// Number of cache hits. + #[getter] + fn cache_hits(&self) -> u64 { + self.inner.cache_hits + } + + /// Number of cache misses. + #[getter] + fn cache_misses(&self) -> u64 { + self.inner.cache_misses + } + + /// Cache hit rate (0.0 - 1.0). + #[getter] + fn cache_hit_rate(&self) -> f64 { + self.inner.cache_hit_rate + } + + /// Total latency in milliseconds. + #[getter] + fn total_latency_ms(&self) -> u64 { + self.inner.total_latency_ms + } + + /// Average latency per query in milliseconds. + #[getter] + fn avg_latency_ms(&self) -> f64 { + self.inner.avg_latency_ms + } + + /// Number of backtracks. + #[getter] + fn backtracks(&self) -> u64 { + self.inner.backtracks + } + + /// Number of sufficiency checks. + #[getter] + fn sufficiency_checks(&self) -> u64 { + self.inner.sufficiency_checks + } + + /// Sufficiency rate (0.0 - 1.0). + #[getter] + fn sufficiency_rate(&self) -> f64 { + self.inner.sufficiency_rate + } + + fn __repr__(&self) -> String { + format!( + "RetrievalMetricsReport(queries={}, avg_score={:.2}, cache_hit={:.1}%)", + self.inner.total_queries, + self.inner.avg_path_score, + self.inner.cache_hit_rate * 100.0, + ) + } +} + +/// Complete metrics report combining all subsystem metrics. +#[pyclass(name = "MetricsReport")] +pub struct PyMetricsReport { + pub(crate) inner: MetricsReport, +} + +#[pymethods] +impl PyMetricsReport { + /// LLM metrics. + #[getter] + fn llm(&self) -> PyLlmMetricsReport { + PyLlmMetricsReport { + inner: self.inner.llm.clone(), + } + } + + /// Pilot metrics. + #[getter] + fn pilot(&self) -> PyPilotMetricsReport { + PyPilotMetricsReport { + inner: self.inner.pilot.clone(), + } + } + + /// Retrieval metrics. + #[getter] + fn retrieval(&self) -> PyRetrievalMetricsReport { + PyRetrievalMetricsReport { + inner: self.inner.retrieval.clone(), + } + } + + /// Total estimated cost in USD. + fn total_cost_usd(&self) -> f64 { + self.inner.total_cost_usd() + } + + /// Overall success rate (0.0 - 1.0). + fn overall_success_rate(&self) -> f64 { + self.inner.overall_success_rate() + } + + fn __repr__(&self) -> String { + format!( + "MetricsReport(llm_calls={}, cost=${:.4}, queries={})", + self.inner.llm.total_calls, + self.inner.total_cost_usd(), + self.inner.retrieval.total_queries, + ) + } +} diff --git a/python/src/results.rs b/python/src/results.rs new file mode 100644 index 00000000..fe780a4c --- /dev/null +++ b/python/src/results.rs @@ -0,0 +1,351 @@ +// Copyright (c) 2026 vectorless developers +// SPDX-License-Identifier: Apache-2.0 + +//! Query and index result Python wrappers. + +use pyo3::prelude::*; + +use ::vectorless::client::{FailedItem, IndexItem, IndexResult, QueryResult, QueryResultItem}; +use ::vectorless::metrics::IndexMetrics; + +// ============================================================ +// QueryResultItem +// ============================================================ + +/// A single document's query result. +#[pyclass(name = "QueryResultItem")] +pub struct PyQueryResultItem { + pub(crate) inner: QueryResultItem, +} + +#[pymethods] +impl PyQueryResultItem { + /// The document ID. + #[getter] + fn doc_id(&self) -> &str { + &self.inner.doc_id + } + + /// The retrieved content. + #[getter] + fn content(&self) -> &str { + &self.inner.content + } + + /// Relevance score (0.0 to 1.0). + #[getter] + fn score(&self) -> f32 { + self.inner.score + } + + /// Node IDs that matched. + #[getter] + fn node_ids(&self) -> Vec { + self.inner.node_ids.clone() + } + + fn __repr__(&self) -> String { + format!( + "QueryResultItem(doc_id='{}', score={:.2}, content_len={})", + self.inner.doc_id, + self.inner.score, + self.inner.content.len() + ) + } +} + +// ============================================================ +// FailedItem +// ============================================================ + +/// A failed item in a batch operation. +#[pyclass(name = "FailedItem")] +pub struct PyFailedItem { + pub(crate) inner: FailedItem, +} + +#[pymethods] +impl PyFailedItem { + /// Source description. + #[getter] + fn source(&self) -> &str { + &self.inner.source + } + + /// Error message. + #[getter] + fn error(&self) -> &str { + &self.inner.error + } + + fn __repr__(&self) -> String { + format!( + "FailedItem(source='{}', error='{}')", + self.inner.source, self.inner.error + ) + } +} + +// ============================================================ +// QueryResult +// ============================================================ + +/// Result of a document query. +#[pyclass(name = "QueryResult")] +pub struct PyQueryResult { + pub(crate) inner: QueryResult, +} + +#[pymethods] +impl PyQueryResult { + /// Result items (one per document). + #[getter] + fn items(&self) -> Vec { + self.inner + .items + .iter() + .map(|i| PyQueryResultItem { inner: i.clone() }) + .collect() + } + + /// Get the first (single-doc) result item. + fn single(&self) -> Option { + self.inner + .single() + .map(|i| PyQueryResultItem { inner: i.clone() }) + } + + /// Number of result items. + fn __len__(&self) -> usize { + self.inner.len() + } + + /// Whether any documents failed. + fn has_failures(&self) -> bool { + self.inner.has_failures() + } + + /// Failed items. + #[getter] + fn failed(&self) -> Vec { + self.inner + .failed + .iter() + .map(|f| PyFailedItem { inner: f.clone() }) + .collect() + } + + fn __repr__(&self) -> String { + format!( + "QueryResult(items={}, failed={})", + self.inner.len(), + self.inner.failed.len() + ) + } +} + +// ============================================================ +// IndexMetrics +// ============================================================ + +/// Indexing pipeline metrics. +#[pyclass(name = "IndexMetrics")] +pub struct PyIndexMetrics { + pub(crate) inner: IndexMetrics, +} + +#[pymethods] +impl PyIndexMetrics { + /// Total indexing time (ms). + #[getter] + fn total_time_ms(&self) -> u64 { + self.inner.total_time_ms() + } + + /// Parse stage duration (ms). + #[getter] + fn parse_time_ms(&self) -> u64 { + self.inner.parse_time_ms + } + + /// Build stage duration (ms). + #[getter] + fn build_time_ms(&self) -> u64 { + self.inner.build_time_ms + } + + /// Enhance (summary) stage duration (ms). + #[getter] + fn enhance_time_ms(&self) -> u64 { + self.inner.enhance_time_ms + } + + /// Number of nodes processed. + #[getter] + fn nodes_processed(&self) -> usize { + self.inner.nodes_processed + } + + /// Number of summaries successfully generated. + #[getter] + fn summaries_generated(&self) -> usize { + self.inner.summaries_generated + } + + /// Number of summaries that failed to generate. + #[getter] + fn summaries_failed(&self) -> usize { + self.inner.summaries_failed + } + + /// Number of LLM calls made. + #[getter] + fn llm_calls(&self) -> usize { + self.inner.llm_calls + } + + /// Total tokens generated by LLM. + #[getter] + fn total_tokens_generated(&self) -> usize { + self.inner.total_tokens_generated + } + + /// Number of topics in reasoning index. + #[getter] + fn topics_indexed(&self) -> usize { + self.inner.topics_indexed + } + + /// Number of keywords in reasoning index. + #[getter] + fn keywords_indexed(&self) -> usize { + self.inner.keywords_indexed + } + + fn __repr__(&self) -> String { + format!( + "IndexMetrics(total={}ms, summaries={}, failed={}, llm_calls={})", + self.inner.total_time_ms(), + self.inner.summaries_generated, + self.inner.summaries_failed, + self.inner.llm_calls, + ) + } +} + +// ============================================================ +// IndexItem / IndexResult +// ============================================================ + +/// A single indexed document item. +#[pyclass(name = "IndexItem")] +pub struct PyIndexItem { + pub(crate) inner: IndexItem, +} + +#[pymethods] +impl PyIndexItem { + #[getter] + fn doc_id(&self) -> &str { + &self.inner.doc_id + } + + #[getter] + fn name(&self) -> &str { + &self.inner.name + } + + #[getter] + fn format(&self) -> String { + format!("{:?}", self.inner.format).to_lowercase() + } + + #[getter] + fn description(&self) -> Option<&str> { + self.inner.description.as_deref() + } + + #[getter] + fn source_path(&self) -> Option<&str> { + self.inner.source_path.as_deref() + } + + #[getter] + fn page_count(&self) -> Option { + self.inner.page_count + } + + /// Indexing pipeline metrics (timing, LLM usage, etc.). + #[getter] + fn metrics(&self) -> Option { + self.inner + .metrics + .as_ref() + .map(|m| PyIndexMetrics { inner: m.clone() }) + } + + fn __repr__(&self) -> String { + format!( + "IndexItem(doc_id='{}', name='{}')", + self.inner.doc_id, self.inner.name + ) + } +} + +/// Result of a document indexing operation. +#[pyclass(name = "IndexResult")] +pub struct PyIndexResult { + pub(crate) inner: IndexResult, +} + +#[pymethods] +impl PyIndexResult { + /// The document ID (convenience for single-document indexing). + #[getter] + fn doc_id(&self) -> Option { + self.inner.doc_id().map(|s| s.to_string()) + } + + /// All indexed items. + #[getter] + fn items(&self) -> Vec { + self.inner + .items + .iter() + .map(|i| PyIndexItem { inner: i.clone() }) + .collect() + } + + /// Failed items. + #[getter] + fn failed(&self) -> Vec { + self.inner + .failed + .iter() + .map(|f| PyFailedItem { inner: f.clone() }) + .collect() + } + + /// Whether any items failed. + fn has_failures(&self) -> bool { + self.inner.has_failures() + } + + /// Total number of items (successful + failed). + fn total(&self) -> usize { + self.inner.total() + } + + fn __len__(&self) -> usize { + self.inner.len() + } + + fn __repr__(&self) -> String { + format!( + "IndexResult(doc_id={:?}, count={}, failed={})", + self.inner.doc_id(), + self.inner.items.len(), + self.inner.failed.len() + ) + } +} diff --git a/rust/src/client/builder.rs b/rust/src/client/builder.rs index 8dee7c37..d3df9223 100644 --- a/rust/src/client/builder.rs +++ b/rust/src/client/builder.rs @@ -66,11 +66,11 @@ impl EngineBuilder { // Basic Configuration // ============================================================ - /// Set a custom configuration for advanced tuning of internal parameters. + /// Set a custom configuration. /// - /// When provided, this replaces the default [`Config`]. Builder methods - /// (`with_key`, `with_model`, `with_endpoint`) still override the - /// corresponding fields. + /// When provided, this replaces the default [`Config`] entirely. + /// Builder methods (`with_key`, `with_model`, `with_endpoint`) + /// will still override the corresponding fields on top of this config. #[must_use] pub fn with_config(mut self, config: Config) -> Self { self.config = Some(config); diff --git a/rust/src/config/mod.rs b/rust/src/config/mod.rs index 0a347826..f6d26927 100644 --- a/rust/src/config/mod.rs +++ b/rust/src/config/mod.rs @@ -11,9 +11,9 @@ mod merge; mod types; mod validator; -pub(crate) use loader::ConfigLoader; +pub use types::Config; pub(crate) use types::{ - CacheConfig, CompressionAlgorithm, ConcurrencyConfig, Config, FallbackBehavior, FallbackConfig, + CacheConfig, CompressionAlgorithm, ConcurrencyConfig, FallbackBehavior, FallbackConfig, IndexerConfig, LlmClientConfig, LlmConfig, LlmMetricsConfig, LlmPoolConfig, MetricsConfig, OnAllFailedBehavior, PilotMetricsConfig, RetrievalConfig, RetrievalMetricsConfig, SufficiencyConfig, SummaryConfig, diff --git a/rust/src/lib.rs b/rust/src/lib.rs index a361b557..26dcceae 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -34,7 +34,7 @@ //! ``` pub mod client; -mod config; +pub mod config; pub use config::Config; pub mod document; pub mod error; From 1d9cfec1b7b5ff40ecbbf1bc617083cf2fe7a2f9 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Thu, 16 Apr 2026 12:46:34 +0800 Subject: [PATCH 09/21] feat(client): add endpoint validation in engine builder - Add validation to check if LLM endpoint is configured - Return MissingEndpoint error when endpoint is not provided - Update BuildError enum with MissingEndpoint variant - Include proper error message for missing endpoint configuration --- rust/src/client/builder.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/rust/src/client/builder.rs b/rust/src/client/builder.rs index d3df9223..b9ea2cd9 100644 --- a/rust/src/client/builder.rs +++ b/rust/src/client/builder.rs @@ -240,6 +240,9 @@ impl EngineBuilder { if retrieval_model.is_empty() { return Err(BuildError::MissingModel); } + if config.llm.endpoint.is_none() { + return Err(BuildError::MissingEndpoint); + } // Open workspace from config let workspace = Workspace::new(&config.storage.workspace_dir) @@ -302,6 +305,10 @@ pub enum BuildError { #[error("Missing model: call .with_model(\"gpt-4o\") or set model in config file")] MissingModel, + /// Missing endpoint URL. + #[error("Missing endpoint: call .with_endpoint(\"https://api.xxx.com/v1\") or set endpoint in config")] + MissingEndpoint, + /// Other error. #[error("{0}")] Other(String), From 47a5e3852a7d74d8bd7df94bf3d338f29ed18492 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Thu, 16 Apr 2026 12:51:21 +0800 Subject: [PATCH 10/21] refactor(rust): clean up imports and formatting across multiple modules - Remove unnecessary line breaks in import statements in context.rs - Format long import statements with proper line breaks in lib.rs - Fix multiline string formatting in builder.rs error definition - Add missing endpoint example in engine.rs documentation - Reorganize import statements in engine.rs for better readability --- python/src/context.rs | 4 +--- python/src/lib.rs | 8 ++++++-- rust/src/client/builder.rs | 4 +++- rust/src/client/engine.rs | 42 ++++++++++++++++++++++---------------- 4 files changed, 34 insertions(+), 24 deletions(-) diff --git a/python/src/context.rs b/python/src/context.rs index 4b005033..3eedc6f9 100644 --- a/python/src/context.rs +++ b/python/src/context.rs @@ -5,9 +5,7 @@ use pyo3::prelude::*; -use ::vectorless::client::{ - DocumentFormat, IndexContext, IndexMode, IndexOptions, QueryContext, -}; +use ::vectorless::client::{DocumentFormat, IndexContext, IndexMode, IndexOptions, QueryContext}; use super::error::VectorlessError; diff --git a/python/src/lib.rs b/python/src/lib.rs index c3e71c59..ebee59cf 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -20,8 +20,12 @@ use document::PyDocumentInfo; use engine::PyEngine; use error::VectorlessError; use graph::{PyDocumentGraph, PyDocumentGraphNode, PyEdgeEvidence, PyGraphEdge, PyWeightedKeyword}; -use metrics::{PyLlmMetricsReport, PyMetricsReport, PyPilotMetricsReport, PyRetrievalMetricsReport}; -use results::{PyFailedItem, PyIndexItem, PyIndexMetrics, PyIndexResult, PyQueryResult, PyQueryResultItem}; +use metrics::{ + PyLlmMetricsReport, PyMetricsReport, PyPilotMetricsReport, PyRetrievalMetricsReport, +}; +use results::{ + PyFailedItem, PyIndexItem, PyIndexMetrics, PyIndexResult, PyQueryResult, PyQueryResultItem, +}; /// Vectorless - Reasoning-native document intelligence engine. /// diff --git a/rust/src/client/builder.rs b/rust/src/client/builder.rs index b9ea2cd9..b0c035cf 100644 --- a/rust/src/client/builder.rs +++ b/rust/src/client/builder.rs @@ -306,7 +306,9 @@ pub enum BuildError { MissingModel, /// Missing endpoint URL. - #[error("Missing endpoint: call .with_endpoint(\"https://api.xxx.com/v1\") or set endpoint in config")] + #[error( + "Missing endpoint: call .with_endpoint(\"https://api.xxx.com/v1\") or set endpoint in config" + )] MissingEndpoint, /// Other error. diff --git a/rust/src/client/engine.rs b/rust/src/client/engine.rs index 5c909c36..6cc4e207 100644 --- a/rust/src/client/engine.rs +++ b/rust/src/client/engine.rs @@ -19,6 +19,7 @@ //! let engine = EngineBuilder::new() //! .with_key("sk-...") //! .with_model("gpt-4o") +//! .with_endpoint("https://api.openai.com/v1") //! .build() //! .await?; //! @@ -36,28 +37,33 @@ //! # } //! ``` -use std::collections::HashMap; -use std::sync::Arc; +use std::{collections::HashMap, sync::Arc}; use futures::StreamExt; use tracing::info; -use crate::config::Config; -use crate::error::Result; -use crate::index::PipelineOptions; -use crate::index::incremental::{self, IndexAction}; -use crate::metrics::MetricsHub; -use crate::retrieval::{PipelineRetriever, RetrieveEventReceiver}; -use crate::storage::{PersistedDocument, Workspace}; -use crate::{DocumentTree, Error}; - -use super::index_context::{IndexContext, IndexSource}; -use super::indexer::IndexerClient; -use super::query_context::{QueryContext, QueryScope}; -use super::retriever::RetrieverClient; -use super::types::{DocumentInfo, FailedItem, IndexItem, IndexMode, IndexResult, QueryResult}; -use super::workspace::WorkspaceClient; -use crate::events::EventEmitter; +use crate::{ + DocumentTree, Error, + config::Config, + error::Result, + events::EventEmitter, + index::{ + PipelineOptions, + incremental::{self, IndexAction}, + }, + metrics::MetricsHub, + retrieval::{PipelineRetriever, RetrieveEventReceiver}, + storage::{PersistedDocument, Workspace}, +}; + +use super::{ + index_context::{IndexContext, IndexSource}, + indexer::IndexerClient, + query_context::{QueryContext, QueryScope}, + retriever::RetrieverClient, + types::{DocumentInfo, FailedItem, IndexItem, IndexMode, IndexResult, QueryResult}, + workspace::WorkspaceClient, +}; /// The main Engine client. /// From 89de8e8735a7ca67913a4aa45341912eaa6fb2bf Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Thu, 16 Apr 2026 19:16:42 +0800 Subject: [PATCH 11/21] feat(docs): update GitHub star component and redesign homepage - remove unused FaStar import from react-icons/fa - adjust GitHub star button styling with larger icon size (14px to 16px) - update spinner character from three dots to HTML entity … - fix stargazers link to point to correct repository path - modify font weights in styles module from 700 to 600 for consistency - redesign homepage layout with new typography and spacing - add dark theme support for all components - implement responsive design improvements - update navbar styling with new dimensions and colors - replace hero section content with emphasis on reasoning approach - enhance GitHub star button with improved styling and SVG icons - update CSS variables for better color scheme consistency BREAKING CHANGE: homepage layout structure has been completely redesigned --- docs/src/components/GitHubStar/index.tsx | 8 +- .../components/GitHubStar/styles.module.css | 4 +- docs/src/css/custom.css | 101 +++++++-- docs/src/pages/index.module.css | 202 ++++++++---------- docs/src/pages/index.tsx | 91 ++------ docs/src/theme/Navbar/index.tsx | 1 + docs/src/theme/Navbar/styles.module.css | 9 +- 7 files changed, 212 insertions(+), 204 deletions(-) diff --git a/docs/src/components/GitHubStar/index.tsx b/docs/src/components/GitHubStar/index.tsx index b2247874..712e8b74 100644 --- a/docs/src/components/GitHubStar/index.tsx +++ b/docs/src/components/GitHubStar/index.tsx @@ -1,5 +1,5 @@ import React, { useState, useEffect } from 'react'; -import { FaGithub, FaStar } from 'react-icons/fa'; +import { FaGithub } from 'react-icons/fa'; import styles from './styles.module.css'; function formatStars(count: number | null): string { @@ -40,16 +40,16 @@ export default function GitHubStar(): React.ReactElement { rel="noopener noreferrer" className={styles.githubStarButton} > - + Star {loading ? (
- ... +
) : (
-
- {siteConfig.title} -

- No vector database. No embeddings. No similarity search.
- Retrieve by reasoning, not by math. -

-
- - Get Started - - - GitHub - -
-
- -
-
- - - - Python -
- - {({tokens, getLineProps, getTokenProps}) => ( -
-                
-                  {tokens.map((line, i) => (
-                    
- {line.map((token, key) => ( - - ))} -
- ))} -
-
- )} -
+

+ Reason, + don't vector +

+

+ + Vectorless will reason through any of your structured documents — PDFs, Markdown, reports, contracts, + +
+ and retrieve only what's relevant. Nothing more, nothing less. +

+
+ + + Star on GitHub + +
diff --git a/docs/src/theme/Navbar/index.tsx b/docs/src/theme/Navbar/index.tsx index 35abb2a6..2437b65e 100644 --- a/docs/src/theme/Navbar/index.tsx +++ b/docs/src/theme/Navbar/index.tsx @@ -26,6 +26,7 @@ export default function Navbar(): React.ReactElement { alt={logo?.alt || title} /> +
Vectorless
{leftItems.map((item, i) => )} diff --git a/docs/src/theme/Navbar/styles.module.css b/docs/src/theme/Navbar/styles.module.css index c8d5283d..9e3c803e 100644 --- a/docs/src/theme/Navbar/styles.module.css +++ b/docs/src/theme/Navbar/styles.module.css @@ -27,10 +27,17 @@ } .navbarLogo { - height: 32px; + height: 40px; width: auto; } +.logo { + font-size: 1.6rem; + font-weight: 800; + letter-spacing: -0.02em; + color: var(--primary); +} + /* Center: navigation links */ .navbarCenter { flex: 1; From a7b04f50412629484d13567ba61e64eccd03face Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Thu, 16 Apr 2026 19:48:12 +0800 Subject: [PATCH 12/21] feat(docs): replace feature grid with interactive demo card - Remove old feature grid component and styling - Add new demo card component with dark theme design - Implement tabbed interface for Python/Rust code examples - Add syntax highlighting for Python and Rust code blocks - Include copy-to-clipboard functionality for code snippets - Create terminal output simulation with blinking cursor animation - Add installation command section with copy button - Update homepage to use new get started section instead of features --- docs/src/pages/index.module.css | 249 +++++++++++++++++++++++++++----- docs/src/pages/index.tsx | 230 +++++++++++++++++++++++------ 2 files changed, 399 insertions(+), 80 deletions(-) diff --git a/docs/src/pages/index.module.css b/docs/src/pages/index.module.css index 54a243d4..aece1890 100644 --- a/docs/src/pages/index.module.css +++ b/docs/src/pages/index.module.css @@ -172,50 +172,224 @@ margin: 0 0 3rem; } -/* ===== Feature Grid ===== */ -.grid { - display: grid; - grid-template-columns: repeat(3, 1fr); +/* ===== Get Started — Linear Dark Demo Card ===== */ +.demoCard { + max-width: 1200px; + margin: 0 auto; + background: #121417; + border-radius: 16px; + border: 1px solid #23262B; + overflow: hidden; + box-shadow: 0 12px 40px rgba(0, 0, 0, 0.4); +} + +.demoTabs { + display: flex; + align-items: center; gap: 1.5rem; + padding: 0 1.25rem; + border-bottom: 1px solid #23262B; + background: #121417; } -.card { - padding: 1.75rem; - border-radius: 12px; - border: 1px solid var(--border); - background: var(--card-bg); - transition: border-color 0.2s, box-shadow 0.2s; +.demoTab { + padding: 0.875rem 0 0.75rem; + font-size: 0.8rem; + font-weight: 500; + color: #8E95A3; + border: none; + border-bottom: 2px solid transparent; + background: transparent; + cursor: pointer; + transition: all 0.15s ease; + letter-spacing: -0.2px; + font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif; } -.card:hover { - border-color: var(--primary-light); - box-shadow: 0 4px 24px var(--primary-soft); +.demoTabActive { + color: #AF788B; + border-bottom-color: #AF788B; } -.cardIcon { - display: inline-flex; +.demoTab:not(.demoTabActive):hover { + color: #EBEDF0; +} + +.demoPanel { + background: #0B0D0E; +} + +.demoCodeHeader { + padding: 0.75rem 1.25rem; + background: #0B0D0E; + border-bottom: 1px solid #2A2E34; + display: flex; align-items: center; - justify-content: center; - width: 44px; - height: 44px; - font-size: 1.35rem; - border-radius: 10px; - background: var(--bg-offset); - margin-bottom: 1rem; + gap: 0.75rem; } -.cardTitle { - font-size: 1.05rem; - font-weight: 600; - margin: 0 0 0.5rem; - color: var(--text); +.windowDots { + display: flex; + gap: 0.5rem; } -.cardDesc { - font-size: 0.88rem; - line-height: 1.65; - color: var(--text-light); +.windowDot { + width: 11px; + height: 11px; + border-radius: 50%; + display: inline-block; +} + +.dotRed { + background: #ED6A5E; +} + +.dotYellow { + background: #F5BD4F; +} + +.dotGreen { + background: #61C454; +} + +.copyBtn { + margin-left: auto; + background: transparent; + border: none; + color: #8E95A3; + font-size: 0.7rem; + font-family: 'JetBrains Mono', 'Fira Code', monospace; + cursor: pointer; + padding: 0.25rem 0.75rem; + border-radius: 6px; + transition: all 0.15s; + letter-spacing: -0.2px; +} + +.copyBtn:hover { + background: rgba(175, 120, 139, 0.1); + color: #AF788B; +} + +.demoPre { margin: 0; + padding: 1.75rem 2rem; + overflow-x: auto; + font-family: 'JetBrains Mono', 'Fira Code', 'SF Mono', Menlo, monospace; + font-size: 0.85rem; + line-height: 1.75; + color: #EBEDF0; + background: #0B0D0E; +} + +.demoPre code { + font-family: inherit; + background: transparent; + border: none; + padding: 0; + color: inherit; +} + +/* Syntax highlight tokens */ +.hlKeyword { + color: #AF788B; + font-weight: 500; +} + +.hlFunction { + color: #6DCDFF; +} + +.hlString { + color: #B0E57C; +} + +.hlComment { + color: #5E6673; + font-style: italic; +} + +.hlType { + color: #6DCDFF; +} + +.hlAttribute { + color: #D9A7E8; +} + +.terminalOutput { + background: #0B0D0E; + border-top: 1px solid #2A2E34; + padding: 1rem 2rem; + font-family: 'JetBrains Mono', 'Fira Code', monospace; + font-size: 0.78rem; + color: #8E95A3; + line-height: 1.7; +} + +.terminalPrompt { + color: #B0E57C; +} + +.terminalAnswer { + color: #EBEDF0; +} + +.terminalCursor { + display: inline-block; + width: 7px; + height: 13px; + background-color: #AF788B; + vertical-align: middle; + margin-left: 4px; + animation: cursorBlink 1s step-end infinite; +} + +@keyframes cursorBlink { + 0%, 100% { opacity: 1; } + 50% { opacity: 0; } +} + +.installBar { + padding: 1rem 2rem; + background: #121417; + border-top: 1px solid #23262B; + display: flex; + align-items: center; + justify-content: space-between; + flex-wrap: wrap; + gap: 0.75rem; +} + +.installCommand { + font-family: 'JetBrains Mono', 'Fira Code', monospace; + font-size: 0.75rem; + color: #8E95A3; + background: #0B0D0E; + padding: 0.4rem 1rem; + border-radius: 8px; + border: 1px solid #23262B; +} + +.installCommand span { + color: #AF788B; +} + +.installBtn { + background: #AF788B; + border: none; + color: white; + font-size: 0.75rem; + font-weight: 500; + padding: 0.4rem 1.2rem; + border-radius: 20px; + cursor: pointer; + transition: background 0.15s; + font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif; +} + +.installBtn:hover { + background: #9A6A7C; } /* ===== How It Works ===== */ @@ -328,10 +502,6 @@ font-size: 1.15rem; } - .grid { - grid-template-columns: repeat(2, 1fr); - } - .steps { flex-direction: column; gap: 2.5rem; @@ -363,8 +533,13 @@ max-width: 280px; } - .grid { - grid-template-columns: 1fr; + .demoPre { + font-size: 0.65rem; + } + + .installBar { + flex-direction: column; + align-items: flex-start; } .ctaActions { diff --git a/docs/src/pages/index.tsx b/docs/src/pages/index.tsx index f1175ac7..dad389c6 100644 --- a/docs/src/pages/index.tsx +++ b/docs/src/pages/index.tsx @@ -1,4 +1,5 @@ import type {ReactNode} from 'react'; +import {useState, useMemo} from 'react'; import useDocusaurusContext from '@docusaurus/useDocusaurusContext'; import Layout from '@theme/Layout'; import Heading from '@theme/Heading'; @@ -37,57 +38,200 @@ function HomepageHeader() { ); } -function SectionWhy() { - const items = [ - { - icon: '\u{1F9E0}', - title: 'Reasoning-Native', - desc: 'LLMs navigate hierarchical document trees with semantic understanding \u2014 not vector proximity.', - }, - { - icon: '\u{1F5C2}\u{FE0F}', - title: 'No Vector Database', - desc: 'Eliminate embedding pipelines, vector stores, and similarity search entirely. Trees are the index.', - }, - { - icon: '\u26A1', - title: 'Rust-Powered', - desc: 'Core engine in Rust with Python bindings. Arena-based trees, async I/O, and zero-copy traversal.', - }, - { - icon: '\u{1F50D}', - title: 'Multi-Algorithm Search', - desc: 'Beam search, MCTS, and greedy algorithms with LLM-guided Pilot at key decision points.', - }, - { - icon: '\u{1F4CA}', - title: 'Explainable Results', - desc: 'Full reasoning chain traces every navigation decision. Audit how and why content was retrieved.', - }, - { - icon: '\u{1F4C4}', - title: 'PDF & Markdown', - desc: 'Index PDFs and Markdown out of the box. Hierarchical structure extracted automatically.', - }, - ]; +/* ---- Regex-based syntax highlighter ---- */ +function highlight(code: string, lang: 'python' | 'rust'): ReactNode[] { + // Each rule has exactly ONE capture group in its regex + const rules: {re: RegExp; cls: string}[] = lang === 'python' + ? [ + {re: /(#.*)/g, cls: styles.hlComment}, + {re: /("(?:[^"\\]|\\.)*")/g, cls: styles.hlString}, + {re: /\b(import|from|async|def|await|return|as|with|for|in|if|else|None|True|False)\b/g, cls: styles.hlKeyword}, + {re: /\b([A-Z][A-Za-z0-9_]*)\b/g, cls: styles.hlType}, + {re: /\b([a-z_]\w*)\s*(?=\()/g, cls: styles.hlFunction}, + ] + : [ + {re: /(\/\/.*)/g, cls: styles.hlComment}, + {re: /("(?:[^"\\]|\\.)*")/g, cls: styles.hlString}, + {re: /\b(use|let|mut|fn|async|await|return|if|else|match|struct|impl|pub|mod|crate|self|super|where|for|in|loop|while|break|continue|move|ref|type|enum|trait|const|static|unsafe|extern)\b/g, cls: styles.hlKeyword}, + {re: /\b([A-Z][A-Za-z0-9_]*)\b/g, cls: styles.hlType}, + {re: /\b(\w+!)/g, cls: styles.hlFunction}, + {re: /\b([a-z_]\w*)\s*(?=\()/g, cls: styles.hlFunction}, + {re: /(#\[.*?\])/g, cls: styles.hlAttribute}, + ]; + + // Build combined regex — join the single capture-group sources directly + const combined = rules.map(r => r.re.source).join('|'); + const re = new RegExp(combined, 'gm'); + + const nodes: ReactNode[] = []; + let lastIdx = 0; + let m: RegExpExecArray | null; + re.lastIndex = 0; + + while ((m = re.exec(code)) !== null) { + if (m.index > lastIdx) { + nodes.push(code.slice(lastIdx, m.index)); + } + // match[1..rules.length] corresponds to each rule's capture group + for (let i = 0; i < rules.length; i++) { + const captured = m[i + 1]; + if (captured !== undefined) { + nodes.push({captured}); + break; + } + } + lastIdx = re.lastIndex; + } + if (lastIdx < code.length) { + nodes.push(code.slice(lastIdx)); + } + return nodes; +} + +// Exact code from README +const PYTHON_CODE = `import asyncio +from vectorless import Engine, IndexContext, QueryContext + +async def main(): + engine = Engine(api_key="sk-...", model="gpt-4o") + + # Index a document + result = await engine.index(IndexContext.from_path("./report.pdf")) + doc_id = result.doc_id + + # Query + result = await engine.query( + QueryContext("What is the total revenue?").with_doc_ids([doc_id]) + ) + print(result.single().content) + +asyncio.run(main())`; + +const RUST_CODE = `use vectorless::client::{EngineBuilder, IndexContext, QueryContext}; + +#[tokio::main] +async fn main() -> vectorless::Result<()> { + let engine = EngineBuilder::new() + .with_key("sk-...") + .with_model("gpt-4o") + .build() + .await?; + + // Index a document + let result = engine.index(IndexContext::from_path("./report.pdf")).await?; + let doc_id = result.doc_id().unwrap(); + + // Query + let result = engine.query( + QueryContext::new("What is the total revenue?") + .with_doc_ids(vec![doc_id.to_string()]) + ).await?; + println!("{}", result.content); + + Ok(()) +}`; + +function PythonCode() { + const nodes = useMemo(() => highlight(PYTHON_CODE, 'python'), []); + return
{nodes}
; +} + +function RustCode() { + const nodes = useMemo(() => highlight(RUST_CODE, 'rust'), []); + return
{nodes}
; +} + +function SectionGetStarted() { + const [activeTab, setActiveTab] = useState<'python' | 'rust'>('python'); + const [copyLabel, setCopyLabel] = useState('Copy'); + const [installLabel, setInstallLabel] = useState('Copy & install'); + + const installCmd = activeTab === 'python' ? 'pip install vectorless' : 'cargo add vectorless'; + + const handleCopy = () => { + const code = activeTab === 'python' ? PYTHON_CODE : RUST_CODE; + navigator.clipboard.writeText(code); + setCopyLabel('\u2713 Copied!'); + setTimeout(() => setCopyLabel('Copy'), 1500); + }; + + const handleInstallCopy = () => { + navigator.clipboard.writeText(installCmd); + setInstallLabel('\u2713 Copied!'); + setTimeout(() => setInstallLabel('Copy & install'), 1500); + }; return (
- Why Vectorless? + Get Started

- RAG without the baggage. + Just a few lines of code to get up and running.

-
- {items.map((item, i) => ( -
- {item.icon} - {item.title} -

{item.desc}

+
+ {/* Tabs */} +
+ + +
+ + {/* Python panel */} + {activeTab === 'python' && ( +
+
+
+ + + +
+ +
+ +
+ $ python demo.py
+ → The total revenue for fiscal year 2024 was $2.3 billion, a 15% increase YoY. + +
- ))} + )} + + {/* Rust panel */} + {activeTab === 'rust' && ( +
+
+
+ + + +
+ +
+ +
+ $ cargo run
+ → The total revenue for fiscal year 2024 was $2.3 billion, a 15% increase YoY. + +
+
+ )} + + {/* Install bar */} +
+
+ $ {installCmd} +
+ +
@@ -160,7 +304,7 @@ export default function Home(): ReactNode { description="Reasoning-native document intelligence engine. No vector database, no embeddings. Retrieve by reasoning.">
- +
From ce5192ad774f3080b8e95f90429eabad7f8306f9 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Thu, 16 Apr 2026 19:57:14 +0800 Subject: [PATCH 13/21] docs(homepage): update how it works section with workflow diagram - replace step-by-step text explanation with visual workflow diagram - remove numbered steps component and associated CSS styles - add new workflow wrapper and image styling - update section title to "How does Vectorless work?" - add subtitle "You declare a few lines of code. We do everything else." - use SVG image for better visual representation of the process --- docs/src/pages/index.module.css | 45 ++++++++++----------------------- docs/src/pages/index.tsx | 25 +++++------------- 2 files changed, 20 insertions(+), 50 deletions(-) diff --git a/docs/src/pages/index.module.css b/docs/src/pages/index.module.css index aece1890..a67493de 100644 --- a/docs/src/pages/index.module.css +++ b/docs/src/pages/index.module.css @@ -393,44 +393,25 @@ } /* ===== How It Works ===== */ -.steps { - display: flex; - gap: 2rem; - max-width: 960px; +.workflowWrapper { + max-width: 100%; + padding: 0; margin: 0 auto; -} - -.step { - flex: 1; text-align: center; } -.stepNum { - display: inline-flex; - align-items: center; - justify-content: center; - width: 48px; - height: 48px; - border-radius: 50%; - background: var(--primary); - color: #fff; - font-size: 1rem; - font-weight: 700; - margin-bottom: 1.25rem; +.workflowImg { + width: 100%; + height: auto; + display: block; + min-height: 520px; + object-fit: contain; } -.stepTitle { - font-size: 1.1rem; - font-weight: 600; - margin: 0 0 0.5rem; - color: var(--text); -} - -.stepDesc { - font-size: 0.88rem; - line-height: 1.65; - color: var(--text-light); - margin: 0; +.workflowImg { + width: 100%; + height: auto; + display: block; } /* ===== CTA ===== */ diff --git a/docs/src/pages/index.tsx b/docs/src/pages/index.tsx index dad389c6..9af6b34f 100644 --- a/docs/src/pages/index.tsx +++ b/docs/src/pages/index.tsx @@ -239,28 +239,17 @@ function SectionGetStarted() { } function SectionHowItWorks() { - const steps = [ - { num: '01', title: 'Index', desc: 'Parse documents into hierarchical semantic trees with LLM-generated summaries.' }, - { num: '02', title: 'Navigate', desc: 'Pilot uses LLM to navigate the tree at key forks \u2014 beam search explores multiple paths in parallel.' }, - { num: '03', title: 'Retrieve', desc: 'Evaluate sufficiency and backtrack if needed. Aggregate only the most relevant content within budget.' }, - ]; - return ( -
+
- How It Works + How does Vectorless work? -
- {steps.map((step, i) => ( -
-
{step.num}
-
- {step.title} -

{step.desc}

-
-
- ))} +

+ You declare a few lines of code. We do everything else. +

+
+ How Vectorless works
From 3dced1b11a3ea99fd580217b862a6e3068143c32 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Thu, 16 Apr 2026 20:27:32 +0800 Subject: [PATCH 14/21] feat(docs): add interactive use cases slider to homepage - Implement responsive slider component with smooth transitions and navigation controls - Add 6 detailed use cases showcasing financial reports, legal documents, technical documentation, research papers, cross-document analysis, and compliance scenarios - Create CSS module styles for slider track, cards, navigation buttons, and dot indicators with active states - Add keyboard-friendly navigation and screen reader accessibility - Include realistic example queries and answers for each use case - Make slider responsive with different card sizes for various screen widths - Add resize event listener to handle dynamic layout adjustments --- docs/src/pages/index.module.css | 143 ++++++++++++++++++++++++++++++-- docs/src/pages/index.tsx | 119 +++++++++++++++++++++++++- 2 files changed, 255 insertions(+), 7 deletions(-) diff --git a/docs/src/pages/index.module.css b/docs/src/pages/index.module.css index a67493de..6df9df3c 100644 --- a/docs/src/pages/index.module.css +++ b/docs/src/pages/index.module.css @@ -408,10 +408,138 @@ object-fit: contain; } -.workflowImg { +/* ===== Use Cases Slider ===== */ +.sliderOuter { + overflow: hidden; width: 100%; - height: auto; - display: block; + padding: 1rem 0; +} + +.sliderTrack { + display: flex; + gap: 1.5rem; + transition: transform 0.45s cubic-bezier(0.2, 0.9, 0.4, 1.1); + will-change: transform; +} + +.caseCard { + flex: 0 0 calc(65% - 0.75rem); + background: #121417; + border: 1px solid #23262B; + border-radius: 16px; + padding: 3rem 3rem 2.5rem; + opacity: 0.4; + transform: scale(0.94); + transition: all 0.35s ease; + filter: brightness(0.75); + min-height: 460px; + display: flex; + flex-direction: column; + justify-content: center; + gap: 0.5rem; +} + +.caseCardActive { + opacity: 1; + transform: scale(1); + filter: brightness(1); + border-color: #AF788B; + box-shadow: 0 12px 40px rgba(175, 120, 139, 0.18); +} + +.caseTitle { + font-size: 1.55rem; + font-weight: 600; + margin: 0 0 0.75rem; + color: #EBEDF0; +} + +.caseDesc { + color: #8E95A3; + font-size: 1.05rem; + line-height: 1.7; + margin: 0 0 1.75rem; +} + +.caseQuery { + background: #0B0D0E; + border-radius: 12px; + padding: 1.5rem 1.75rem; + font-family: 'JetBrains Mono', 'Fira Code', monospace; + font-size: 0.88rem; + color: #EBEDF0; + border: 1px solid #23262B; + line-height: 1.7; +} + +.caseQueryLabel { + color: #AF788B; + font-weight: 600; + margin-bottom: 0.5rem; +} + +.caseQueryText { + color: #EBEDF0; +} + +.caseAnswer { + color: #8E95A3; + margin-top: 0.75rem; + padding-top: 0.75rem; + border-top: 1px solid #23262B; + font-size: 0.75rem; +} + +.sliderNav { + display: flex; + justify-content: center; + align-items: center; + gap: 1rem; + margin-top: 2.5rem; +} + +.sliderBtn { + background: var(--card-bg); + border: 1px solid var(--border); + color: var(--text-light); + font-size: 1.2rem; + width: 44px; + height: 44px; + border-radius: 44px; + display: inline-flex; + align-items: center; + justify-content: center; + cursor: pointer; + transition: all 0.2s; +} + +.sliderBtn:hover { + border-color: var(--primary); + color: var(--primary-dark); + background: var(--primary-soft); +} + +.sliderDots { + display: flex; + gap: 0.5rem; +} + +.sliderDot { + width: 8px; + height: 8px; + border-radius: 8px; + background: var(--text-light); + border: none; + padding: 0; + cursor: pointer; + transition: all 0.25s; + opacity: 0.4; +} + +.sliderDotActive { + width: 28px; + background: var(--primary); + opacity: 1; } /* ===== CTA ===== */ @@ -483,9 +611,8 @@ font-size: 1.15rem; } - .steps { - flex-direction: column; - gap: 2.5rem; + .caseCard { + flex: 0 0 calc(70% - 0.75rem); } .section { @@ -514,6 +641,10 @@ max-width: 280px; } + .caseCard { + flex: 0 0 calc(90% - 0.75rem); + } + .demoPre { font-size: 0.65rem; } diff --git a/docs/src/pages/index.tsx b/docs/src/pages/index.tsx index 9af6b34f..e9d1b4e1 100644 --- a/docs/src/pages/index.tsx +++ b/docs/src/pages/index.tsx @@ -1,5 +1,5 @@ import type {ReactNode} from 'react'; -import {useState, useMemo} from 'react'; +import {useState, useMemo, useRef, useEffect, useCallback} from 'react'; import useDocusaurusContext from '@docusaurus/useDocusaurusContext'; import Layout from '@theme/Layout'; import Heading from '@theme/Heading'; @@ -256,6 +256,122 @@ function SectionHowItWorks() { ); } +const USE_CASES = [ + { + title: 'Financial reports', + desc: 'Extract specific KPIs from 10\u2011K, annual reports, or earnings transcripts \u2014 even across fiscal years.', + query: '\u201cWhat was the net profit margin for Q3 2024?\u201d', + answer: '18.4%, up from 16.2% in Q3 2023. Source: Section 6.2, page 34.', + }, + { + title: 'Legal & contracts', + desc: 'Locate clauses, definitions, or obligations across complex agreements without missing cross\u2011references.', + query: '\u201cWhich sections define \u2018force majeure\u2019 and what are the notice requirements?\u201d', + answer: 'Section 12.3(a) + 12.3(b) \u2014 30\u2011day written notice required.', + }, + { + title: 'Technical docs', + desc: 'Navigate large API references, internal wikis, or on\u2011prem manuals with step\u2011by\u2011step reasoning.', + query: '\u201cHow to configure authentication for the WebSocket gateway?\u201d', + answer: 'See \u201cWebSocket Auth\u201d \u2192 section 4.2.1: use Authorization: Bearer .', + }, + { + title: 'Research papers', + desc: 'Cross\u2011reference findings, tables, or citations across arXiv preprints or internal literature.', + query: '\u201cWhat datasets were used for evaluation in Section 4?\u201d', + answer: 'Table 2: SQuAD, Natural Questions, and TriviaQA.', + }, + { + title: 'Cross\u2011document analysis', + desc: 'Compare metrics, definitions, or timelines across multiple reports in one query.', + query: '\u201cCompare R&D spending from 2023 vs 2024 annual reports.\u201d', + answer: '2023: $12.4M (page 9) \u00b7 2024: $15.1M (page 11) \u2192 +21.8% YoY.', + }, + { + title: 'Compliance & audit', + desc: 'Trace every retrieved statement back to its source \u2014 full explainability for regulated industries.', + query: '\u201cShow me all references to data retention policy.\u201d', + answer: 'Section 3.2 (page 8), Section 5.1 (page 14), and Appendix B.', + }, +]; + +function SectionUseCases() { + const [current, setCurrent] = useState(0); + const outerRef = useRef(null); + const trackRef = useRef(null); + const [offset, setOffset] = useState(0); + + const total = USE_CASES.length; + + const measure = useCallback(() => { + if (!outerRef.current || !trackRef.current) return; + const outerW = outerRef.current.offsetWidth; + const firstCard = trackRef.current.children[0] as HTMLElement; + if (!firstCard) return; + const cardW = firstCard.offsetWidth; + const gap = 24; // 1.5rem + const step = cardW + gap; + const newOffset = outerW / 2 - current * step - cardW / 2; + setOffset(newOffset); + }, [current]); + + useEffect(() => { + measure(); + window.addEventListener('resize', measure); + return () => window.removeEventListener('resize', measure); + }, [measure]); + + const prev = () => setCurrent(i => Math.max(0, i - 1)); + const next = () => setCurrent(i => Math.min(total - 1, i + 1)); + + return ( +
+
+ + Use cases · precision reasoning + +

+ Vectorless navigates through the structure of any document to retrieve exact context. +

+
+
+ {USE_CASES.map((c, i) => ( +
+ {c.title} +

{c.desc}

+
+
Query:
+
{c.query}
+
{c.answer}
+
+
+ ))} +
+
+
+ +
+ {USE_CASES.map((_, i) => ( +
+ +
+
+
+ ); +} + function SectionCTA() { return (
@@ -295,6 +411,7 @@ export default function Home(): ReactNode {
+
From 20960b88af414dae6465381e19d94d4fc939eea0 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Thu, 16 Apr 2026 20:41:42 +0800 Subject: [PATCH 15/21] feat(docs): enhance CTA section with dark theme and installation cards - Add new dark themed CTA section with class sectionCtaDark - Implement installation command cards with copy functionality for pip and cargo - Add interactive copy buttons with success feedback animations - Update CTA title to "Start reasoning, not vectoring" - Style CTA elements with new typography and spacing - Add GitHub star button with custom styling - Implement responsive design for installation cards --- docs/src/pages/index.module.css | 110 +++++++++++++++++++++++++------- docs/src/pages/index.tsx | 43 +++++++++---- 2 files changed, 119 insertions(+), 34 deletions(-) diff --git a/docs/src/pages/index.module.css b/docs/src/pages/index.module.css index 6df9df3c..da48b6fd 100644 --- a/docs/src/pages/index.module.css +++ b/docs/src/pages/index.module.css @@ -543,41 +543,110 @@ } /* ===== CTA ===== */ +.sectionCtaDark { + background: #0B0D0E; + padding: 3rem 1.5rem; +} + .ctaBox { + max-width: 880px; + margin: 0 auto; text-align: center; padding: 4rem 2rem; - border-radius: 16px; - background: var(--bg-offset); - border: 1px solid var(--border); } .ctaTitle { - font-size: 2rem; + font-size: 2.5rem; font-weight: 700; - margin: 0 0 0.75rem; - color: var(--text); + letter-spacing: -0.02em; + margin: 0 0 1rem; + background: linear-gradient(135deg, #EBEDF0, #AF788B); + background-clip: text; + -webkit-background-clip: text; + -webkit-text-fill-color: transparent; + color: transparent; } .ctaDesc { - font-size: 1.1rem; - color: var(--text-light); - margin: 0 0 2rem; + font-size: 1.05rem; + color: #8E95A3; + max-width: 520px; + margin: 0 auto 2rem; + line-height: 1.6; } -.ctaDesc code { - padding: 0.2em 0.5em; - border-radius: 4px; - background: var(--card-bg); - border: 1px solid var(--border); - font-size: 0.95rem; - color: var(--primary-dark); +.ctaButtons { + display: flex; + justify-content: center; + gap: 1rem; + margin-bottom: 1.5rem; + flex-wrap: wrap; +} + +.ctaBtnSecondary { + padding: 0.7rem 1.8rem; + border-radius: 40px; + font-weight: 600; + font-size: 0.88rem; + background: transparent; + border: 1px solid #23262B; + color: #EBEDF0; + text-decoration: none; + cursor: pointer; + transition: all 0.2s; + font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif; +} + +.ctaBtnSecondary:hover { + border-color: #AF788B; + background: rgba(175, 120, 139, 0.12); + color: #AF788B; + text-decoration: none; } -.ctaActions { +.ctaInstallCards { display: flex; - gap: 0.75rem; justify-content: center; + gap: 1rem; flex-wrap: wrap; + margin-bottom: 2rem; +} + +.ctaInstallItem { + background: #121417; + border: 1px solid #23262B; + border-radius: 12px; + padding: 0.65rem 1.25rem; + display: flex; + align-items: center; + gap: 1rem; +} + +.ctaInstallCommand { + font-family: 'JetBrains Mono', 'Fira Code', monospace; + font-size: 0.82rem; + color: #EBEDF0; +} + +.ctaInstallCommand span { + color: #AF788B; +} + +.ctaCopyIcon { + background: transparent; + border: none; + color: #8E95A3; + cursor: pointer; + padding: 0.25rem 0.65rem; + border-radius: 6px; + font-size: 0.7rem; + font-family: 'Inter', sans-serif; + transition: all 0.15s; +} + +.ctaCopyIcon:hover { + background: rgba(175, 120, 139, 0.12); + color: #AF788B; } /* ===== Dark theme overrides ===== */ @@ -654,11 +723,6 @@ align-items: flex-start; } - .ctaActions { - flex-direction: column; - align-items: center; - } - .sectionTitle { font-size: 1.5rem; } diff --git a/docs/src/pages/index.tsx b/docs/src/pages/index.tsx index e9d1b4e1..f0abd954 100644 --- a/docs/src/pages/index.tsx +++ b/docs/src/pages/index.tsx @@ -373,28 +373,49 @@ function SectionUseCases() { } function SectionCTA() { + const [pipLabel, setPipLabel] = useState('Copy'); + const [cargoLabel, setCargoLabel] = useState('Copy'); + + const handlePipCopy = () => { + navigator.clipboard.writeText('pip install vectorless'); + setPipLabel('\u2713'); + setTimeout(() => setPipLabel('Copy'), 1500); + }; + + const handleCargoCopy = () => { + navigator.clipboard.writeText('cargo add vectorless'); + setCargoLabel('\u2713'); + setTimeout(() => setCargoLabel('Copy'), 1500); + }; + return ( -
+
- Start building in minutes + Start reasoning, not vectoring -

- pip install vectorless -

-
- - Read the Docs - +
- View on GitHub + + Star on GitHub +
+
+
+
$ pip install vectorless
+ +
+
+
$ cargo add vectorless
+ +
+
From 9d313ac707fcbf41406977442cc6ab12bc5dcda6 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Thu, 16 Apr 2026 20:47:47 +0800 Subject: [PATCH 16/21] style(footer): adjust footer spacing and typography - Increase footer padding from 3rem 1.5rem 2rem to 5rem 1.5rem 3rem - Adjust font sizes for footer title (0.8rem -> 0.85rem), link items (0.88rem -> 0.92rem), and copyright text (0.8rem -> 0.82rem) - Update line height for footer links from 1.8 to 2 - Increase margin-bottom for footer title from 0.75rem to 1rem - Adjust copyright section margins and padding (2rem -> 3rem top margin, 1.5rem -> 2rem padding-top) style(landing): update gradient color scheme - Change hero section gradient from light colors (#EBEDF0, #AF788B) to darker palette (#1e293b, #8B5E6F) refactor(landing): remove dark section class from CTA component - Remove styles.sectionCtaDark class from section element in SectionCTA component --- docs/src/css/custom.css | 16 ++++++++-------- docs/src/pages/index.module.css | 2 +- docs/src/pages/index.tsx | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/src/css/custom.css b/docs/src/css/custom.css index 1e274af0..d2d29d94 100644 --- a/docs/src/css/custom.css +++ b/docs/src/css/custom.css @@ -127,22 +127,22 @@ /* ===== Footer ===== */ .footer { background-color: transparent !important; - padding: 3rem 1.5rem 2rem; + padding: 5rem 1.5rem 3rem; } .footer__title { - font-size: 0.8rem; + font-size: 0.85rem; font-weight: 700; text-transform: uppercase; letter-spacing: 0.08em; color: var(--text); - margin-bottom: 0.75rem; + margin-bottom: 1rem; } .footer__link-item { - font-size: 0.88rem; + font-size: 0.92rem; color: var(--text-light); - line-height: 1.8; + line-height: 2; transition: color 0.15s; } @@ -156,10 +156,10 @@ .footer__copyright { text-align: center; - font-size: 0.8rem; + font-size: 0.82rem; color: var(--text-light); letter-spacing: 0.03em; - margin-top: 2rem; - padding-top: 1.5rem; + margin-top: 3rem; + padding-top: 2rem; border-top: 1px solid var(--border); } diff --git a/docs/src/pages/index.module.css b/docs/src/pages/index.module.css index da48b6fd..49ff3c3b 100644 --- a/docs/src/pages/index.module.css +++ b/docs/src/pages/index.module.css @@ -560,7 +560,7 @@ font-weight: 700; letter-spacing: -0.02em; margin: 0 0 1rem; - background: linear-gradient(135deg, #EBEDF0, #AF788B); + background: linear-gradient(135deg, #1e293b, #8B5E6F); background-clip: text; -webkit-background-clip: text; -webkit-text-fill-color: transparent; diff --git a/docs/src/pages/index.tsx b/docs/src/pages/index.tsx index f0abd954..b7bd3766 100644 --- a/docs/src/pages/index.tsx +++ b/docs/src/pages/index.tsx @@ -389,7 +389,7 @@ function SectionCTA() { }; return ( -
+
From 1496eb8a1640d1fe7f062b8e0833a0e0aa4332c7 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Thu, 16 Apr 2026 21:42:45 +0800 Subject: [PATCH 17/21] fix(docs): update copyright symbol and footer styling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Replace copyright symbol from literal "©" to Unicode escape "\u00A9" - Reduce footer bottom padding from 3rem to 1.5rem - Adjust footer copyright styling: increase font size from 0.82rem to 0.88rem, remove center alignment, letter spacing, top margin, and border --- docs/docusaurus.config.ts | 2 +- docs/src/css/custom.css | 9 ++------- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/docs/docusaurus.config.ts b/docs/docusaurus.config.ts index 6b329f6e..fd576711 100644 --- a/docs/docusaurus.config.ts +++ b/docs/docusaurus.config.ts @@ -133,7 +133,7 @@ const config: Config = { ], }, ], - copyright: `Copyright © ${new Date().getFullYear()} Vectorless`, + copyright: `Copyright \u00A9 ${new Date().getFullYear()} Vectorless`, }, prism: { theme: prismThemes.github, diff --git a/docs/src/css/custom.css b/docs/src/css/custom.css index d2d29d94..b6f2ebc6 100644 --- a/docs/src/css/custom.css +++ b/docs/src/css/custom.css @@ -127,7 +127,7 @@ /* ===== Footer ===== */ .footer { background-color: transparent !important; - padding: 5rem 1.5rem 3rem; + padding: 5rem 1.5rem 1.5rem; } .footer__title { @@ -155,11 +155,6 @@ } .footer__copyright { - text-align: center; - font-size: 0.82rem; + font-size: 0.88rem; color: var(--text-light); - letter-spacing: 0.03em; - margin-top: 3rem; - padding-top: 2rem; - border-top: 1px solid var(--border); } From c281322092098bf1a8347a5284d2005ec033496e Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Thu, 16 Apr 2026 22:00:39 +0800 Subject: [PATCH 18/21] docs(README): update description and restructure content - Replace the technical explanation of semantic tree navigation with clearer value proposition highlighting supported document types (PDFs, Markdown, reports, contracts) - Rename "Why Vectorless" section to "How It Works" for better clarity - Remove detailed technical explanation about semantic tree indexing approach --- README.md | 8 ++------ docs/docs/api-reference.mdx | 17 +++++++++++++++++ docs/docusaurus.config.ts | 2 +- docs/sidebars.ts | 1 + 4 files changed, 21 insertions(+), 7 deletions(-) create mode 100644 docs/docs/api-reference.mdx diff --git a/README.md b/README.md index 96d73505..33a0de9d 100644 --- a/README.md +++ b/README.md @@ -13,15 +13,11 @@
-**Vectorless** is a reasoning-native document engine designed to be the foundational layer for AI applications that need structured access to documents, with the core written in Rust. It does not use vector databases, embeddings, or similarity search. Instead, it transforms documents into hierarchical semantic trees and uses the LLM itself to navigate and retrieve — purely LLM-guided, from indexing to querying. +**Vectorless** is a reasoning-native document engine designed to be the foundational layer for AI applications that need structured access to documents, with the core written in Rust. It does not use vector databases, embeddings, or similarity search. Instead, it will reason through any of your structured documents — **PDFs, Markdown, reports, contracts** — and retrieve only what's relevant. Nothing more, nothing less. -## Why Vectorless - -Most document retrieval solutions rely on vector similarity — splitting documents into chunks, embedding them, and searching by cosine distance. This works for rough topic matching, but breaks down when you need **precision**: specific numbers, cross-section references, or multi-step reasoning across a document. - -Vectorless takes a different approach. No vectors at all. It builds a **semantic tree index** of each document — preserving the original hierarchy — and uses the LLM itself to navigate that structure. The LLM generates the tree during indexing and reasons through it during retrieval. Pure LLM guidance, end to end. +## How It Works
Vectorless Workflow diff --git a/docs/docs/api-reference.mdx b/docs/docs/api-reference.mdx new file mode 100644 index 00000000..5261afbf --- /dev/null +++ b/docs/docs/api-reference.mdx @@ -0,0 +1,17 @@ +--- +sidebar_position: 9 +title: API Reference +description: Complete API reference for Vectorless Rust crate and Python SDK. +--- + +# API Reference + +> This page is a work in progress. The full API reference will be published in a future update. + +In the meantime, you can refer to the following resources: + +- **Rust crate docs**: [docs.rs/vectorless](https://docs.rs/vectorless) — auto-generated documentation from source code +- **Python SDK docs**: Available via `help(vectorless)` in an interactive Python session +- **Source code**: [github.com/vectorlessflow/vectorless](https://github.com/vectorlessflow/vectorless) + +For usage examples, see [Quick Query](/docs/examples/quick-query), [Multi-Document](/docs/examples/multi-document), and [Batch Indexing](/docs/examples/batch-indexing). diff --git a/docs/docusaurus.config.ts b/docs/docusaurus.config.ts index fd576711..b72d033d 100644 --- a/docs/docusaurus.config.ts +++ b/docs/docusaurus.config.ts @@ -111,7 +111,7 @@ const config: Config = { }, { label: 'API Reference', - href: 'https://docs.rs/vectorless', + to: '/docs/api-reference', }, ], }, diff --git a/docs/sidebars.ts b/docs/sidebars.ts index e3ddf067..2f70bedb 100644 --- a/docs/sidebars.ts +++ b/docs/sidebars.ts @@ -42,6 +42,7 @@ const sidebars: SidebarsConfig = { 'sdk/rust', ], }, + 'api-reference', { type: 'category', label: 'Examples', From 851b80c9ed6c5e43f0a24dbaebb584bf60839525 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Thu, 16 Apr 2026 22:03:01 +0800 Subject: [PATCH 19/21] style(docs): update heading text color to use CSS variable - Replace gradient text effect with standard CSS variable - Remove unnecessary background properties for better maintainability - Use consistent text color through --text variable --- docs/src/pages/index.module.css | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/docs/src/pages/index.module.css b/docs/src/pages/index.module.css index 49ff3c3b..9224cf86 100644 --- a/docs/src/pages/index.module.css +++ b/docs/src/pages/index.module.css @@ -560,11 +560,7 @@ font-weight: 700; letter-spacing: -0.02em; margin: 0 0 1rem; - background: linear-gradient(135deg, #1e293b, #8B5E6F); - background-clip: text; - -webkit-background-clip: text; - -webkit-text-fill-color: transparent; - color: transparent; + color: var(--text); } .ctaDesc { From 2bb763248b3b8627200be1daaaecae207f2289cf Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Thu, 16 Apr 2026 22:12:12 +0800 Subject: [PATCH 20/21] feat(engine): add endpoint parameter support for API configuration - Add with_endpoint method to EngineBuilder in Rust implementation - Include endpoint parameter in Python Engine constructor - Update documentation examples to show endpoint configuration - Set default OpenAI endpoint to https://api.openai.com/v1 fix(docs): update hero title emphasis color to use text variable - Change color from --primary-dark to --text in index module CSS - Maintain consistent styling with the rest of the documentation --- README.md | 5 +++-- docs/src/pages/index.module.css | 2 +- docs/src/pages/index.tsx | 3 ++- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 33a0de9d..556ba5f3 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,7 @@ async fn main() -> vectorless::Result<()> { let engine = EngineBuilder::new() .with_key("sk-...") .with_model("gpt-4o") + .with_endpoint("https://api.openai.com/v1") .build() .await?; @@ -73,7 +74,7 @@ import asyncio from vectorless import Engine, IndexContext, QueryContext async def main(): - engine = Engine(api_key="sk-...", model="gpt-4o") + engine = Engine(api_key="sk-...", model="gpt-4o", endpoint="https://api.openai.com/v1") # Index a document result = await engine.index(IndexContext.from_path("./report.pdf")) @@ -126,7 +127,7 @@ result = await engine.query( Indexed documents are stored in a workspace — there's no need to reprocess files between sessions: ```python -engine = Engine(api_key="sk-...", model="gpt-4o") +engine = Engine(api_key="sk-...", model="gpt-4o", endpoint="https://api.openai.com/v1") # List all indexed documents docs = await engine.list() diff --git a/docs/src/pages/index.module.css b/docs/src/pages/index.module.css index 9224cf86..f2685eb8 100644 --- a/docs/src/pages/index.module.css +++ b/docs/src/pages/index.module.css @@ -32,7 +32,7 @@ } .heroTitleEmphasis { - color: var(--primary-dark); + color: var(--text); } .heroTitleLight { diff --git a/docs/src/pages/index.tsx b/docs/src/pages/index.tsx index b7bd3766..c75abfd7 100644 --- a/docs/src/pages/index.tsx +++ b/docs/src/pages/index.tsx @@ -93,7 +93,7 @@ const PYTHON_CODE = `import asyncio from vectorless import Engine, IndexContext, QueryContext async def main(): - engine = Engine(api_key="sk-...", model="gpt-4o") + engine = Engine(api_key="sk-...", model="gpt-4o", endpoint="https://api.openai.com/v1") # Index a document result = await engine.index(IndexContext.from_path("./report.pdf")) @@ -114,6 +114,7 @@ async fn main() -> vectorless::Result<()> { let engine = EngineBuilder::new() .with_key("sk-...") .with_model("gpt-4o") + .with_endpoint("https://api.openai.com/v1") .build() .await?; From 3939faa20dbce7a8050e574d191c7cc9330b52c4 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Thu, 16 Apr 2026 22:17:46 +0800 Subject: [PATCH 21/21] chore(release): bump version to 0.1.29 workspace and 0.1.8 python package - Update workspace package version from 0.1.28 to 0.1.29 in Cargo.toml - Update python package version from 0.1.7 to 0.1.8 in pyproject.toml --- Cargo.toml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 1a626bab..cd8fca41 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ members = ["rust", "python"] resolver = "2" [workspace.package] -version = "0.1.28" +version = "0.1.29" edition = "2024" authors = ["zTgx "] license = "Apache-2.0" diff --git a/pyproject.toml b/pyproject.toml index f752a6ea..9d83bdd6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "vectorless" -version = "0.1.7" +version = "0.1.8" description = "Reasoning-native document intelligence engine for AI" readme = "README.md" requires-python = ">=3.9"