diff --git a/config.example.toml b/config.example.toml deleted file mode 100644 index dd0a9a93..00000000 --- a/config.example.toml +++ /dev/null @@ -1,254 +0,0 @@ -# Vectorless Configuration Example -# Copy this file to vectorless.toml and fill in your API keys -# -# All configuration is loaded from this file only. -# No environment variables are used - this ensures explicit, traceable configuration. - -# ============================================================================ -# LLM Configuration (Unified) -# ============================================================================ -# -# The LLM pool allows configuring different models for different purposes: -# - summary: Used for generating document summaries during indexing -# - retrieval: Used for retrieval decisions and content evaluation -# - pilot: Used for intelligent navigation guidance -# -# Each client can have its own model, endpoint, and settings. - -[llm] -# Default API key (used by all clients unless overridden per-client) -api_key = "sk-your-api-key-here" - -# Summary client - generates document summaries during indexing -# Use a fast, cheap model for bulk processing -[llm.summary] -model = "gpt-4o-mini" -endpoint = "https://api.openai.com/v1" -max_tokens = 200 -temperature = 0.0 -# api_key = "sk-specific-key-for-summary" # Optional: override default - -# Retrieval client - used for retrieval decisions and content evaluation -# Can use a more capable model for better decisions -[llm.retrieval] -model = "gpt-4o" -endpoint = "https://api.openai.com/v1" -max_tokens = 100 -temperature = 0.0 -# api_key = "sk-specific-key-for-retrieval" # Optional: override default - -# Pilot client - used for intelligent navigation guidance -# Use a fast model for quick navigation decisions -[llm.pilot] -model = "gpt-4o-mini" -endpoint = "https://api.openai.com/v1" -max_tokens = 300 -temperature = 0.0 -# api_key = "sk-specific-key-for-pilot" # Optional: override default - -# Retry configuration (applies to all LLM calls) -[llm.retry] -max_attempts = 3 -initial_delay_ms = 500 -max_delay_ms = 30000 -multiplier = 2.0 -retry_on_rate_limit = true - -# Throttle/rate limiting configuration (applies to all LLM calls) -[llm.throttle] -max_concurrent_requests = 10 -requests_per_minute = 500 -enabled = true -semaphore_enabled = true - -# Fallback configuration (applies to all LLM calls) -[llm.fallback] -enabled = true -models = ["gpt-4o-mini", "glm-4-flash"] -# Alternative endpoints for fallback -# endpoints = [ -# "https://api.openai.com/v1", -# "https://api.z.ai/api/paas/v4" -# ] -on_rate_limit = "retry_then_fallback" -on_timeout = "retry_then_fallback" -on_all_failed = "return_error" - -# ============================================================================ -# Metrics Configuration (Unified) -# ============================================================================ - -[metrics] -enabled = true -storage_path = "./workspace/metrics" -retention_days = 30 - -[metrics.llm] -track_tokens = true -track_latency = true -track_cost = true -cost_per_1k_input_tokens = 0.00015 # gpt-4o-mini pricing -cost_per_1k_output_tokens = 0.0006 - -[metrics.pilot] -track_decisions = true -track_accuracy = true -track_feedback = true - -[metrics.retrieval] -track_paths = true -track_scores = true -track_iterations = true -track_cache = true - -# ============================================================================ -# Pilot Configuration -# ============================================================================ - -[pilot] -mode = "Balanced" # Aggressive | Balanced | Conservative | AlgorithmOnly -guide_at_start = true -guide_at_backtrack = true - -[pilot.budget] -max_tokens_per_query = 2000 -max_tokens_per_call = 500 -max_calls_per_query = 5 -max_calls_per_level = 2 -hard_limit = true - -[pilot.intervention] -fork_threshold = 3 -score_gap_threshold = 0.15 -low_score_threshold = 0.3 -max_interventions_per_level = 2 - -[pilot.feedback] -enabled = true -storage_path = "./workspace/feedback" -learning_rate = 0.1 -min_samples_for_learning = 10 - -# ============================================================================ -# Retrieval Configuration -# ============================================================================ - -[retrieval] -model = "gpt-4o" -endpoint = "https://api.openai.com/v1" -top_k = 3 -max_tokens = 1000 -temperature = 0.0 - -[retrieval.search] -top_k = 5 -beam_width = 3 -max_iterations = 10 -min_score = 0.1 - -[retrieval.sufficiency] -min_tokens = 500 -target_tokens = 2000 -max_tokens = 4000 -min_content_length = 200 -confidence_threshold = 0.7 - -[retrieval.cache] -max_entries = 1000 -ttl_secs = 3600 - -[retrieval.strategy] -exploration_weight = 1.414 -similarity_threshold = 0.5 -high_similarity_threshold = 0.8 -low_similarity_threshold = 0.3 - -# Hybrid Strategy Configuration (BM25 + LLM refinement) -# Recommended for most use cases - reduces LLM calls while maintaining accuracy -[retrieval.strategy.hybrid] -enabled = true -pre_filter_ratio = 0.3 # Keep top 30% of BM25 candidates -min_candidates = 2 # Minimum candidates to pass to LLM -max_candidates = 5 # Maximum candidates for LLM refinement -auto_accept_threshold = 0.85 # BM25 score for auto-accept (skip LLM) -auto_reject_threshold = 0.15 # BM25 score for auto-reject (skip LLM) -bm25_weight = 0.4 # Weight for BM25 score in final scoring -llm_weight = 0.6 # Weight for LLM score in final scoring - -# Cross-Document Retrieval Configuration -# For searching across multiple documents simultaneously -[retrieval.strategy.cross_document] -enabled = true -max_documents = 10 # Maximum documents to search -max_results_per_doc = 3 # Maximum results per document -max_total_results = 10 # Maximum total results -min_score = 0.3 # Minimum score threshold -merge_strategy = "TopK" # TopK | BestPerDocument | WeightedByRelevance -parallel_search = true # Search documents in parallel - -# Page-Range Strategy Configuration -# For filtering by page range before retrieval -[retrieval.strategy.page_range] -enabled = true -include_boundary_nodes = true # Include nodes spanning across boundary -expand_context_pages = 0 # Expand range by N pages for context -min_overlap_ratio = 0.1 # Minimum overlap ratio for node inclusion - -[retrieval.content] -enabled = true -token_budget = 4000 -min_relevance_score = 0.2 -scoring_strategy = "hybrid" # keyword | bm25 | hybrid -output_format = "markdown" -include_scores = false -hierarchical_min_per_level = 0.1 -deduplicate = true -dedup_threshold = 0.9 - -# ============================================================================ -# Multi-turn Retrieval Configuration -# ============================================================================ - -[retrieval.multiturn] -enabled = true -max_sub_queries = 3 -decomposition_model = "gpt-4o-mini" -aggregation_strategy = "merge" # merge | rank | synthesize - -# ============================================================================ -# Reference Following Configuration -# ============================================================================ - -[retrieval.reference] -enabled = true -max_depth = 3 -max_references = 10 -follow_pages = true -follow_tables_figures = true -min_confidence = 0.5 - -# ============================================================================ -# Storage Configuration -# ============================================================================ - -[storage] -workspace_dir = "./workspace" -cache_size = 100 -atomic_writes = true -file_lock = true -checksum_enabled = true - -[storage.compression] -enabled = false -algorithm = "gzip" -level = 6 - -# ============================================================================ -# Indexer Configuration -# ============================================================================ - -[indexer] -subsection_threshold = 300 -max_segment_tokens = 3000 -max_summary_tokens = 200 -min_summary_tokens = 20 diff --git a/python/src/lib.rs b/python/src/lib.rs index a5da45bb..4a743842 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -15,6 +15,7 @@ use ::vectorless::client::{ }; use ::vectorless::error::Error as RustError; use ::vectorless::metrics::IndexMetrics; +use ::vectorless::StrategyPreference; // ============================================================ // Error Types @@ -266,6 +267,83 @@ impl PyIndexContext { } } +// ============================================================ +// StrategyPreference +// ============================================================ + +/// Retrieval strategy preference. +/// +/// Controls how the engine searches the document tree. +/// +/// ```python +/// from vectorless import QueryContext, StrategyPreference +/// +/// # Force keyword-only (fastest, no LLM calls during search) +/// ctx = QueryContext("revenue").with_doc_id(doc_id).with_strategy(StrategyPreference.KEYWORD) +/// +/// # Force LLM-guided navigation (most accurate, uses more tokens) +/// ctx = QueryContext("explain the architecture").with_doc_id(doc_id).with_strategy(StrategyPreference.LLM) +/// +/// # Force hybrid (BM25 + LLM refinement) +/// ctx = QueryContext("growth trends").with_doc_id(doc_id).with_strategy(StrategyPreference.HYBRID) +/// ``` +#[pyclass(name = "StrategyPreference", skip_from_py_object)] +#[derive(Clone)] +pub struct PyStrategyPreference { + inner: StrategyPreference, +} + +#[pymethods] +impl PyStrategyPreference { + /// Auto-select based on query complexity (default). + #[classattr] + const AUTO: PyStrategyPreference = PyStrategyPreference { + inner: StrategyPreference::Auto, + }; + + /// Force keyword-based strategy (fast, no LLM during search). + #[classattr] + const KEYWORD: PyStrategyPreference = PyStrategyPreference { + inner: StrategyPreference::ForceKeyword, + }; + + /// Force LLM-guided navigation (deep reasoning). + #[classattr] + const LLM: PyStrategyPreference = PyStrategyPreference { + inner: StrategyPreference::ForceLlm, + }; + + /// Force hybrid strategy (BM25 + LLM refinement). + #[classattr] + const HYBRID: PyStrategyPreference = PyStrategyPreference { + inner: StrategyPreference::ForceHybrid, + }; + + /// Force cross-document strategy (multi-document retrieval). + #[classattr] + const CROSS_DOCUMENT: PyStrategyPreference = PyStrategyPreference { + inner: StrategyPreference::ForceCrossDocument, + }; + + /// Force page-range strategy (filter by page range). + #[classattr] + const PAGE_RANGE: PyStrategyPreference = PyStrategyPreference { + inner: StrategyPreference::ForcePageRange, + }; + + fn __repr__(&self) -> String { + let name = match self.inner { + StrategyPreference::Auto => "AUTO", + StrategyPreference::ForceKeyword => "KEYWORD", + StrategyPreference::ForceLlm => "LLM", + StrategyPreference::ForceHybrid => "HYBRID", + StrategyPreference::ForceCrossDocument => "CROSS_DOCUMENT", + StrategyPreference::ForcePageRange => "PAGE_RANGE", + }; + format!("StrategyPreference.{}", name) + } +} + // ============================================================ // QueryContext // ============================================================ @@ -335,6 +413,15 @@ impl PyQueryContext { Self { inner: ctx } } + /// Set the retrieval strategy. + /// + /// Args: + /// strategy: A StrategyPreference constant, e.g. StrategyPreference.LLM. + fn with_strategy(&self, strategy: &PyStrategyPreference) -> Self { + let ctx = self.inner.clone().with_strategy(strategy.inner); + Self { inner: ctx } + } + fn __repr__(&self) -> String { "QueryContext(...)".to_string() } @@ -1169,6 +1256,7 @@ fn _vectorless(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; diff --git a/python/vectorless/__init__.py b/python/vectorless/__init__.py index 4d66e2ca..c046ed90 100644 --- a/python/vectorless/__init__.py +++ b/python/vectorless/__init__.py @@ -26,9 +26,11 @@ IndexOptions, IndexResult, IndexItem, + IndexMetrics, QueryContext, QueryResult, QueryResultItem, + StrategyPreference, DocumentInfo, DocumentGraph, DocumentGraphNode, @@ -46,9 +48,11 @@ "IndexOptions", "IndexResult", "IndexItem", + "IndexMetrics", "QueryContext", "QueryResult", "QueryResultItem", + "StrategyPreference", "DocumentInfo", "DocumentGraph", "DocumentGraphNode", diff --git a/rust/examples/advanced.rs b/rust/examples/advanced.rs index a75608d1..1316a68d 100644 --- a/rust/examples/advanced.rs +++ b/rust/examples/advanced.rs @@ -29,7 +29,7 @@ async fn main() -> vectorless::Result<()> { // The config file must include api_key and model. // If environment variables are set, they override the config file values. let mut builder = EngineBuilder::new().with_config_path("./config.toml"); - + // Override config with env vars if present if let Ok(api_key) = std::env::var("LLM_API_KEY") { builder = builder.with_key(&api_key); @@ -72,4 +72,4 @@ async fn main() -> vectorless::Result<()> { println!("\n=== Done ==="); Ok(()) -} \ No newline at end of file +} diff --git a/rust/examples/events.rs b/rust/examples/events.rs index b0433dc7..a0cefbb2 100644 --- a/rust/examples/events.rs +++ b/rust/examples/events.rs @@ -100,12 +100,10 @@ async fn main() -> Result<(), Box> { // Build engine with LLM configuration from environment or defaults. // Adjust the defaults below to match your setup. - let api_key = std::env::var("LLM_API_KEY") - .unwrap_or_else(|_| "sk-...".to_string()); - let model = std::env::var("LLM_MODEL") - .unwrap_or_else(|_| "gpt-4o".to_string()); - let endpoint = std::env::var("LLM_ENDPOINT") - .unwrap_or_else(|_| "https://api.openai.com/v1".to_string()); + let api_key = std::env::var("LLM_API_KEY").unwrap_or_else(|_| "sk-...".to_string()); + let model = std::env::var("LLM_MODEL").unwrap_or_else(|_| "gpt-4o".to_string()); + let endpoint = + std::env::var("LLM_ENDPOINT").unwrap_or_else(|_| "https://api.openai.com/v1".to_string()); // 2. Create engine with events println!("Step 2: Creating engine with event emitter..."); @@ -130,10 +128,7 @@ async fn main() -> Result<(), Box> { // 4. Query with events println!("Step 4: Querying (with events)..."); let result = engine - .query( - QueryContext::new("What is vectorless?") - .with_doc_id(&doc_id) - ) + .query(QueryContext::new("What is vectorless?").with_doc_id(&doc_id)) .await?; if let Some(item) = result.single() { println!(" ✓ Found result ({} chars)", item.content.len()); @@ -145,7 +140,10 @@ async fn main() -> Result<(), Box> { // 5. Stats println!("\n--- Stats ---"); - println!(" Documents indexed: {}", index_count.load(Ordering::SeqCst)); + println!( + " Documents indexed: {}", + index_count.load(Ordering::SeqCst) + ); println!(" Queries executed: {}", query_count.load(Ordering::SeqCst)); println!(" Nodes visited: {}", nodes_visited.load(Ordering::SeqCst)); diff --git a/rust/examples/flow.rs b/rust/examples/flow.rs index 4778bd44..36712dd3 100644 --- a/rust/examples/flow.rs +++ b/rust/examples/flow.rs @@ -61,12 +61,9 @@ async fn main() -> vectorless::Result<()> { // Build engine with LLM configuration from environment or defaults. // Adjust the defaults below to match your setup. - let api_key = std::env::var("LLM_API_KEY") - .unwrap_or_else(|_| "sk-...".to_string()); - let model = std::env::var("LLM_MODEL") - .unwrap_or_else(|_| "gpt-4o".to_string()); - let endpoint = std::env::var("LLM_ENDPOINT") - .unwrap_or_else(|_| "https://api".to_string()); + let api_key = std::env::var("LLM_API_KEY").unwrap_or_else(|_| "sk-...".to_string()); + let model = std::env::var("LLM_MODEL").unwrap_or_else(|_| "gpt-4o".to_string()); + let endpoint = std::env::var("LLM_ENDPOINT").unwrap_or_else(|_| "https://api".to_string()); // Step 1: Create a Vectorless client println!("Step 1: Creating Vectorless client..."); diff --git a/rust/examples/graph.rs b/rust/examples/graph.rs index ac87a673..940bf7ee 100644 --- a/rust/examples/graph.rs +++ b/rust/examples/graph.rs @@ -29,10 +29,8 @@ async fn main() -> vectorless::Result<()> { // Build engine with LLM configuration from environment or defaults. // Adjust the defaults below to match your setup. - let api_key = std::env::var("LLM_API_KEY") - .unwrap_or_else(|_| "sk-...".to_string()); - let model = std::env::var("LLM_MODEL") - .unwrap_or_else(|_| "gpt-4o".to_string()); + let api_key = std::env::var("LLM_API_KEY").unwrap_or_else(|_| "sk-...".to_string()); + let model = std::env::var("LLM_MODEL").unwrap_or_else(|_| "gpt-4o".to_string()); // 1. Create engine let engine = EngineBuilder::new() @@ -106,4 +104,4 @@ async fn main() -> vectorless::Result<()> { println!("\n=== Done ==="); Ok(()) -} \ No newline at end of file +} diff --git a/rust/examples/index_incremental.rs b/rust/examples/index_incremental.rs index 32254d7d..b85a01e9 100644 --- a/rust/examples/index_incremental.rs +++ b/rust/examples/index_incremental.rs @@ -21,10 +21,9 @@ async fn main() -> vectorless::Result<()> { // Build engine with LLM configuration from environment or defaults. // Adjust the defaults below to match your setup. - let api_key = std::env::var("LLM_API_KEY") - .unwrap_or_else(|_| "sk-or-v1-...".to_string()); - let model = std::env::var("LLM_MODEL") - .unwrap_or_else(|_| "google/gemini-3-flash-preview".to_string()); + let api_key = std::env::var("LLM_API_KEY").unwrap_or_else(|_| "sk-or-v1-...".to_string()); + let model = + std::env::var("LLM_MODEL").unwrap_or_else(|_| "google/gemini-3-flash-preview".to_string()); let endpoint = std::env::var("LLM_ENDPOINT") .unwrap_or_else(|_| "http://localhost:4000/api/v1".to_string()); @@ -66,12 +65,19 @@ Deletes a user by their unique identifier. // 1. Initial full index println!("--- Initial index ---"); let result = engine - .index(IndexContext::from_content(content_v1, DocumentFormat::Markdown)) + .index(IndexContext::from_content( + content_v1, + DocumentFormat::Markdown, + )) .await?; let doc_id = result.items[0].doc_id.clone(); if let Some(m) = &result.items[0].metrics { - println!("indexed in {}ms, {} nodes", m.total_time_ms(), m.nodes_processed); + println!( + "indexed in {}ms, {} nodes", + m.total_time_ms(), + m.nodes_processed + ); } // 2. Re-index unchanged content (incremental) — skips processing @@ -98,7 +104,11 @@ Deletes a user by their unique identifier. for item in &result.items { if let Some(m) = &item.metrics { - println!("updated in {}ms, {} nodes", m.total_time_ms(), m.nodes_processed); + println!( + "updated in {}ms, {} nodes", + m.total_time_ms(), + m.nodes_processed + ); } } @@ -110,4 +120,4 @@ Deletes a user by their unique identifier. } Ok(()) -} \ No newline at end of file +} diff --git a/rust/examples/index_pdf.rs b/rust/examples/index_pdf.rs index b370b39d..d8d8b57c 100644 --- a/rust/examples/index_pdf.rs +++ b/rust/examples/index_pdf.rs @@ -49,8 +49,8 @@ async fn main() -> vectorless::Result<()> { std::process::exit(1); } }; - let model = std::env::var("LLM_MODEL") - .unwrap_or_else(|_| "google/gemini-3-flash-preview".to_string()); + let model = + std::env::var("LLM_MODEL").unwrap_or_else(|_| "google/gemini-3-flash-preview".to_string()); let endpoint = std::env::var("LLM_ENDPOINT") .unwrap_or_else(|_| "http://localhost:4000/api/v1".to_string()); @@ -70,9 +70,7 @@ async fn main() -> vectorless::Result<()> { .await .map_err(|e| vectorless::Error::Config(e.to_string()))?; - let result = engine - .index(IndexContext::from_path(pdf_path)) - .await?; + let result = engine.index(IndexContext::from_path(pdf_path)).await?; println!( "Indexed: {}, Failed: {}", diff --git a/rust/examples/index_single.rs b/rust/examples/index_single.rs index 55ec52d5..623b4cb3 100644 --- a/rust/examples/index_single.rs +++ b/rust/examples/index_single.rs @@ -21,10 +21,9 @@ async fn main() -> vectorless::Result<()> { // Build engine with LLM configuration from environment or defaults. // Adjust the defaults below to match your setup. - let api_key = std::env::var("LLM_API_KEY") - .unwrap_or_else(|_| "sk-or-v1-...".to_string()); - let model = std::env::var("LLM_MODEL") - .unwrap_or_else(|_| "google/gemini-3-flash-preview".to_string()); + let api_key = std::env::var("LLM_API_KEY").unwrap_or_else(|_| "sk-or-v1-...".to_string()); + let model = + std::env::var("LLM_MODEL").unwrap_or_else(|_| "google/gemini-3-flash-preview".to_string()); let endpoint = std::env::var("LLM_ENDPOINT") .unwrap_or_else(|_| "http://localhost:4000/api/v1".to_string()); @@ -78,7 +77,10 @@ Monitoring is implemented using a Prometheus and Grafana stack, with custom metr // Index from content string let result = engine - .index(IndexContext::from_content(content, DocumentFormat::Markdown)) + .index(IndexContext::from_content( + content, + DocumentFormat::Markdown, + )) .await?; for item in &result.items { @@ -99,4 +101,4 @@ Monitoring is implemented using a Prometheus and Grafana stack, with custom metr } Ok(()) -} \ No newline at end of file +} diff --git a/rust/examples/indexing.rs b/rust/examples/indexing.rs index e4489d29..ee77e5f2 100644 --- a/rust/examples/indexing.rs +++ b/rust/examples/indexing.rs @@ -21,10 +21,9 @@ async fn main() -> vectorless::Result<()> { // Build engine with LLM configuration from environment or defaults. // Adjust the defaults below to match your setup. - let api_key = std::env::var("LLM_API_KEY") - .unwrap_or_else(|_| "sk-or-v1-...".to_string()); - let model = std::env::var("LLM_MODEL") - .unwrap_or_else(|_| "google/gemini-3-flash-preview".to_string()); + let api_key = std::env::var("LLM_API_KEY").unwrap_or_else(|_| "sk-or-v1-...".to_string()); + let model = + std::env::var("LLM_MODEL").unwrap_or_else(|_| "google/gemini-3-flash-preview".to_string()); let endpoint = std::env::var("LLM_ENDPOINT") .unwrap_or_else(|_| "http://localhost:4000/api/v1".to_string()); @@ -40,8 +39,7 @@ async fn main() -> vectorless::Result<()> { // Index multiple documents in a single call. // Paths are resolved relative to the workspace directory. let result = engine - .index( - IndexContext::from_paths(&["../README.md", "../CLAUDE.md"])) + .index(IndexContext::from_paths(&["../README.md", "../CLAUDE.md"])) .await?; println!("Indexed {} document(s)", result.items.len()); @@ -59,4 +57,4 @@ async fn main() -> vectorless::Result<()> { } Ok(()) -} \ No newline at end of file +} diff --git a/rust/src/client/builder.rs b/rust/src/client/builder.rs index 1bc9b927..d042d6aa 100644 --- a/rust/src/client/builder.rs +++ b/rust/src/client/builder.rs @@ -403,12 +403,12 @@ impl EngineBuilder { // Apply individual overrides if let Some(api_key) = self.api_key { - // Set API key for both retrieval and summary + // Set API key for both retrieval and index config.retrieval.api_key = Some(api_key.clone()); config.summary.api_key = Some(api_key); // Also set LLM pool config - if config.llm.summary.api_key.is_none() { - config.llm.summary.api_key = config.summary.api_key.clone(); + if config.llm.index.api_key.is_none() { + config.llm.index.api_key = config.summary.api_key.clone(); } if config.llm.retrieval.api_key.is_none() { config.llm.retrieval.api_key = config.summary.api_key.clone(); diff --git a/rust/src/client/engine.rs b/rust/src/client/engine.rs index cb88a1dc..c46fe7c8 100644 --- a/rust/src/client/engine.rs +++ b/rust/src/client/engine.rs @@ -166,7 +166,11 @@ impl Engine { return Err(Error::Config(format!( "All {} source(s) failed to index: {}", failed.len(), - failed.iter().map(|f| format!("{} ({})", f.source, f.error)).collect::>().join("; ") + failed + .iter() + .map(|f| format!("{} ({})", f.source, f.error)) + .collect::>() + .join("; ") ))); } if !items.is_empty() { @@ -184,20 +188,21 @@ impl Engine { .max_concurrent_requests .min(ctx.sources.len()); - let results: Vec<(Vec, Vec)> = futures::stream::iter(ctx.sources.iter().cloned()) - .map(|source| { - let options = ctx.options.clone(); - let name = ctx.name.clone(); - let engine = self.clone(); - async move { - engine - .process_source(&source, &options, name.as_deref()) - .await - } - }) - .buffer_unordered(concurrency) - .collect() - .await; + let results: Vec<(Vec, Vec)> = + futures::stream::iter(ctx.sources.iter().cloned()) + .map(|source| { + let options = ctx.options.clone(); + let name = ctx.name.clone(); + let engine = self.clone(); + async move { + engine + .process_source(&source, &options, name.as_deref()) + .await + } + }) + .buffer_unordered(concurrency) + .collect() + .await; let mut items = Vec::new(); let mut failed = Vec::new(); @@ -210,7 +215,11 @@ impl Engine { return Err(Error::Config(format!( "All {} source(s) failed to index: {}", failed.len(), - failed.iter().map(|f| format!("{} ({})", f.source, f.error)).collect::>().join("; ") + failed + .iter() + .map(|f| format!("{} ({})", f.source, f.error)) + .collect::>() + .join("; ") ))); } @@ -416,7 +425,11 @@ impl Engine { } }; - match self.retriever.query_with_reasoning_index(&tree, &ctx.query, &options, reasoning_index).await { + match self + .retriever + .query_with_reasoning_index(&tree, &ctx.query, &options, reasoning_index) + .await + { Ok(mut result) => { result.doc_id = doc_id; items.push(result); @@ -433,7 +446,11 @@ impl Engine { return Err(Error::Config(format!( "Query failed for all {} document(s): {}", failed.len(), - failed.iter().map(|f| format!("{} ({})", f.source, f.error)).collect::>().join("; ") + failed + .iter() + .map(|f| format!("{} ({})", f.source, f.error)) + .collect::>() + .join("; ") ))); } @@ -531,7 +548,10 @@ impl Engine { // ============================================================ /// Get document structure (tree) and optional reasoning index. Internal use only. - pub(crate) async fn get_structure(&self, doc_id: &str) -> Result<(DocumentTree, Option)> { + pub(crate) async fn get_structure( + &self, + doc_id: &str, + ) -> Result<(DocumentTree, Option)> { let workspace = self .workspace .as_ref() diff --git a/rust/src/client/indexer.rs b/rust/src/client/indexer.rs index e746020e..2764aaa7 100644 --- a/rust/src/client/indexer.rs +++ b/rust/src/client/indexer.rs @@ -447,7 +447,9 @@ impl IndexerClient { } persisted.reasoning_index = doc.reasoning_index; - persisted.meta.update_processing_stats(node_count, summary_tokens, duration_ms); + persisted + .meta + .update_processing_stats(node_count, summary_tokens, duration_ms); persisted } diff --git a/rust/src/client/retriever.rs b/rust/src/client/retriever.rs index ad0638c6..29c0e0d4 100644 --- a/rust/src/client/retriever.rs +++ b/rust/src/client/retriever.rs @@ -125,7 +125,8 @@ impl RetrieverClient { question: &str, options: &RetrieveOptions, ) -> Result { - self.query_with_reasoning_index(tree, question, options, None).await + self.query_with_reasoning_index(tree, question, options, None) + .await } /// Query a document tree with optional reasoning index for fast-path lookup. diff --git a/rust/src/config/mod.rs b/rust/src/config/mod.rs index 5ab66b55..af96c518 100644 --- a/rust/src/config/mod.rs +++ b/rust/src/config/mod.rs @@ -15,6 +15,5 @@ pub(crate) use loader::ConfigLoader; pub(crate) use types::{ CacheConfig, CompressionAlgorithm, ConcurrencyConfig, Config, FallbackBehavior, FallbackConfig, IndexerConfig, LlmConfig, LlmMetricsConfig, MetricsConfig, OnAllFailedBehavior, - PilotMetricsConfig, RetrievalConfig, RetrievalMetricsConfig, StrategyConfig, SufficiencyConfig, - SummaryConfig, + PilotMetricsConfig, RetrievalConfig, RetrievalMetricsConfig, SufficiencyConfig, SummaryConfig, }; diff --git a/rust/src/config/types/llm_pool.rs b/rust/src/config/types/llm_pool.rs index c17ed966..d77d1241 100644 --- a/rust/src/config/types/llm_pool.rs +++ b/rust/src/config/types/llm_pool.rs @@ -11,15 +11,15 @@ use serde::{Deserialize, Serialize}; /// Unified LLM configuration. /// /// Contains all settings for LLM operations including: -/// - Pool of clients for different purposes (summary, retrieval, pilot) +/// - Pool of clients for different purposes (index, retrieval, pilot) /// - Retry behavior /// - Throttle/rate limiting /// - Fallback strategy #[derive(Debug, Clone, Serialize, Deserialize)] pub struct LlmPoolConfig { - /// Summary client configuration. - #[serde(default)] - pub summary: LlmClientConfig, + /// Index client configuration (used during document indexing). + #[serde(default, alias = "summary")] + pub index: LlmClientConfig, /// Retrieval client configuration. #[serde(default)] @@ -33,6 +33,10 @@ pub struct LlmPoolConfig { #[serde(default)] pub api_key: Option, + /// Default API endpoint (used if not specified per-client). + #[serde(default)] + pub endpoint: Option, + /// Retry configuration. #[serde(default)] pub retry: RetryConfig, @@ -57,13 +61,14 @@ fn default_pilot_config() -> LlmClientConfig { impl Default for LlmPoolConfig { fn default() -> Self { Self { - summary: LlmClientConfig::default(), + index: LlmClientConfig::default(), retrieval: LlmClientConfig { max_tokens: 100, ..Default::default() }, pilot: default_pilot_config(), api_key: None, + endpoint: None, retry: RetryConfig::default(), throttle: ThrottleConfig::default(), fallback: FallbackConfig::default(), @@ -87,8 +92,8 @@ impl LlmPoolConfig { pub fn get_api_key_for(&self, client_key: Option<&str>) -> Option { // First check client-specific key if let Some(key) = client_key { - if let Some(ref k) = self.summary.api_key { - if self.summary.model == key { + if let Some(ref k) = self.index.api_key { + if self.index.model == key { return Some(k.clone()); } } @@ -106,6 +111,20 @@ impl LlmPoolConfig { // Fall back to default self.api_key.clone() } + + /// Resolve API key: client-specific first, then default. + pub fn resolved_api_key(&self, client: &LlmClientConfig) -> Option { + client.api_key.clone().or_else(|| self.api_key.clone()) + } + + /// Resolve endpoint: client-specific first, then default. + pub fn resolved_endpoint(&self, client: &LlmClientConfig) -> String { + if !client.endpoint.is_empty() { + client.endpoint.clone() + } else { + self.endpoint.clone().unwrap_or_default() + } + } } /// Individual LLM client configuration. @@ -410,7 +429,7 @@ mod tests { #[test] fn test_llm_pool_config_defaults() { let config = LlmPoolConfig::default(); - assert!(config.summary.model.is_empty()); + assert!(config.index.model.is_empty()); assert!(config.retrieval.model.is_empty()); assert!(config.pilot.model.is_empty()); assert_eq!(config.retry.max_attempts, 3); diff --git a/rust/src/config/types/mod.rs b/rust/src/config/types/mod.rs index e355a178..32634a60 100644 --- a/rust/src/config/types/mod.rs +++ b/rust/src/config/types/mod.rs @@ -160,7 +160,7 @@ impl Config { )); } - // Validate summary + // Validate summary (index) if self.summary.max_tokens == 0 { errors.push(ValidationError::error( "summary.max_tokens", @@ -360,14 +360,14 @@ mod tests { assert!(config.retrieval.model.is_empty()); assert_eq!(config.concurrency.max_concurrent_requests, 10); // New fields - assert!(config.llm.summary.model.is_empty()); + assert!(config.llm.index.model.is_empty()); assert!(config.metrics.enabled); } #[test] fn test_llm_pool_config_defaults() { let config = LlmPoolConfig::default(); - assert!(config.summary.model.is_empty()); + assert!(config.index.model.is_empty()); assert!(config.retrieval.model.is_empty()); assert_eq!(config.retry.max_attempts, 3); assert_eq!(config.throttle.max_concurrent_requests, 10); diff --git a/rust/src/config/types/retrieval.rs b/rust/src/config/types/retrieval.rs index bfa2d756..fc131bc6 100644 --- a/rust/src/config/types/retrieval.rs +++ b/rust/src/config/types/retrieval.rs @@ -135,6 +135,12 @@ pub struct SearchConfig { /// Minimum score to include a path. #[serde(default = "default_min_score")] pub min_score: f32, + + /// Fallback chain: algorithms tried in order until min_score is met. + /// Options: "beam", "mcts", "pure_pilot". + /// Default: ["beam", "mcts", "pure_pilot"] + #[serde(default = "default_fallback_chain")] + pub fallback_chain: Vec, } fn default_search_top_k() -> usize { @@ -152,6 +158,9 @@ fn default_max_iterations() -> usize { fn default_min_score() -> f32 { 0.1 } +fn default_fallback_chain() -> Vec { + vec!["beam".into(), "mcts".into(), "pure_pilot".into()] +} impl Default for SearchConfig { fn default() -> Self { @@ -160,6 +169,7 @@ impl Default for SearchConfig { beam_width: default_beam_width(), max_iterations: default_max_iterations(), min_score: default_min_score(), + fallback_chain: default_fallback_chain(), } } } diff --git a/rust/src/index/parse/pdf/parser.rs b/rust/src/index/parse/pdf/parser.rs index 7702872b..a3327cc0 100644 --- a/rust/src/index/parse/pdf/parser.rs +++ b/rust/src/index/parse/pdf/parser.rs @@ -296,7 +296,9 @@ impl PdfParser { TocProcessor::with_llm_client(client.clone()) } None => { - info!("PdfParser: creating TocProcessor without LLM client (no key configured)"); + info!( + "PdfParser: creating TocProcessor without LLM client (no key configured)" + ); TocProcessor::new() } }; diff --git a/rust/src/index/parse/toc/assigner.rs b/rust/src/index/parse/toc/assigner.rs index 52d50403..b7399dce 100644 --- a/rust/src/index/parse/toc/assigner.rs +++ b/rust/src/index/parse/toc/assigner.rs @@ -3,8 +3,8 @@ //! Page assigner - assigns physical page numbers to TOC entries. -use std::collections::HashMap; use futures::stream::{self, StreamExt}; +use std::collections::HashMap; use tracing::{debug, info}; use crate::config::LlmConfig; @@ -175,10 +175,7 @@ impl PageAssigner { }) .collect(); - let verified_offsets: Vec<_> = stream::iter(futures) - .buffer_unordered(5) - .collect() - .await; + let verified_offsets: Vec<_> = stream::iter(futures).buffer_unordered(5).collect().await; // Calculate the mode (most common offset) let successful: Vec<_> = verified_offsets @@ -277,21 +274,21 @@ Reply in JSON format: let total = entries.len(); // Launch entry searches with bounded concurrency to avoid rate limiting - let futures: Vec<_> = entries.iter().map(|entry| { - let title = entry.title.clone(); - let client = client.clone(); - let pages = pages_owned.clone(); - - async move { - let groups = Self::group_pages_owned(&pages, 5); - Self::locate_title_in_groups_static(&client, &title, &groups).await - } - }).collect(); + let futures: Vec<_> = entries + .iter() + .map(|entry| { + let title = entry.title.clone(); + let client = client.clone(); + let pages = pages_owned.clone(); - let results: Vec<_> = stream::iter(futures) - .buffer_unordered(5) - .collect() - .await; + async move { + let groups = Self::group_pages_owned(&pages, 5); + Self::locate_title_in_groups_static(&client, &title, &groups).await + } + }) + .collect(); + + let results: Vec<_> = stream::iter(futures).buffer_unordered(5).collect().await; info!("Assigned pages for {}/{} entries", results.len(), total); diff --git a/rust/src/index/parse/toc/detector.rs b/rust/src/index/parse/toc/detector.rs index 032a18af..050c6b2a 100644 --- a/rust/src/index/parse/toc/detector.rs +++ b/rust/src/index/parse/toc/detector.rs @@ -79,11 +79,7 @@ impl TocDetector { let use_llm = config.use_llm_fallback; Self { config, - llm_client: if use_llm { - Some(client) - } else { - None - }, + llm_client: if use_llm { Some(client) } else { None }, patterns: Self::build_patterns(), } } @@ -350,20 +346,4 @@ mod tests { assert!(result.found); } - - #[test] - #[ignore = "requires OPENAI_API_KEY environment variable"] - fn test_no_toc() { - let detector = TocDetector::with_defaults(); - - let pages = vec![ - make_page(1, "This is a simple document."), - make_page(2, "It has no table of contents."), - ]; - - let rt = tokio::runtime::Runtime::new().unwrap(); - let result = rt.block_on(detector.detect(&pages)).unwrap(); - - assert!(!result.found); - } } diff --git a/rust/src/index/parse/toc/processor.rs b/rust/src/index/parse/toc/processor.rs index 8e5f59b0..e53b6346 100644 --- a/rust/src/index/parse/toc/processor.rs +++ b/rust/src/index/parse/toc/processor.rs @@ -180,16 +180,10 @@ impl TocProcessor { info!("No TOC found in document"); ProcessingMode::NoToc } else if detection.has_page_numbers { - info!( - "TOC found on pages {:?}, has page numbers", - detection.pages - ); + info!("TOC found on pages {:?}, has page numbers", detection.pages); ProcessingMode::TocWithPageNumbers } else { - info!( - "TOC found on pages {:?}, no page numbers", - detection.pages - ); + info!("TOC found on pages {:?}, no page numbers", detection.pages); ProcessingMode::TocWithoutPageNumbers }; @@ -222,7 +216,8 @@ impl TocProcessor { self.process_toc_with_page_numbers(detection, pages).await } ProcessingMode::TocWithoutPageNumbers => { - self.process_toc_without_page_numbers(detection, pages).await + self.process_toc_without_page_numbers(detection, pages) + .await } ProcessingMode::NoToc => { // NoToc always succeeds (produces some structure) @@ -234,9 +229,7 @@ impl TocProcessor { Ok(entries) if !entries.is_empty() => { // Verify the entries let mut mutable_entries = entries; - let report = self - .verify_and_repair(&mut mutable_entries, pages) - .await?; + let report = self.verify_and_repair(&mut mutable_entries, pages).await?; if report.accuracy >= self.config.accuracy_threshold { info!( @@ -437,8 +430,7 @@ impl TocProcessor { .filter(|(i, entry)| { let span = entry_page_span(entry, next_pages[*i], page_count); let tokens = entry_token_count(entry, pages); - span > self.config.max_pages_per_entry - && tokens > self.config.max_tokens_per_entry + span > self.config.max_pages_per_entry && tokens > self.config.max_tokens_per_entry }) .map(|(i, entry)| { let start = entry.physical_page.unwrap_or(1); @@ -541,7 +533,11 @@ impl Default for TocProcessor { /// Calculate how many pages an entry spans. /// /// From its physical_page to the next entry's physical_page (or document end). -fn entry_page_span(entry: &TocEntry, next_physical_page: Option, total_pages: usize) -> usize { +fn entry_page_span( + entry: &TocEntry, + next_physical_page: Option, + total_pages: usize, +) -> usize { let start = entry.physical_page.unwrap_or(1); let end = next_physical_page.unwrap_or(total_pages); end.saturating_sub(start) diff --git a/rust/src/index/parse/toc/repairer.rs b/rust/src/index/parse/toc/repairer.rs index 13c19877..3c7666fe 100644 --- a/rust/src/index/parse/toc/repairer.rs +++ b/rust/src/index/parse/toc/repairer.rs @@ -94,23 +94,15 @@ impl IndexRepairer { let start = expected_page.saturating_sub(search_range).max(1); let end = (expected_page + search_range).min(pages.len()); - let result = Self::find_correct_page_static( - &client, - &title, - &pages, - start..=end, - ) - .await; + let result = + Self::find_correct_page_static(&client, &title, &pages, start..=end).await; (title, expected_page, result) } }) .collect(); - let results: Vec<_> = stream::iter(tasks) - .buffer_unordered(5) - .collect() - .await; + let results: Vec<_> = stream::iter(tasks).buffer_unordered(5).collect().await; // Apply repairs let mut repaired_count = 0; diff --git a/rust/src/index/parse/toc/structure_extractor.rs b/rust/src/index/parse/toc/structure_extractor.rs index be2486d9..36925644 100644 --- a/rust/src/index/parse/toc/structure_extractor.rs +++ b/rust/src/index/parse/toc/structure_extractor.rs @@ -122,10 +122,8 @@ impl StructureExtractor { let initial = initial_entries_ref.to_vec(); async move { - let result = Self::generate_continuation_with_client( - &client, &group, &initial, - ) - .await; + let result = + Self::generate_continuation_with_client(&client, &group, &initial).await; (group.start_page, group.end_page, result) } }) @@ -150,10 +148,7 @@ impl StructureExtractor { all_entries.extend(entries); } Err(e) => { - warn!( - "Continuation group (pages {}-{}) failed: {}", - start, end, e - ); + warn!("Continuation group (pages {}-{}) failed: {}", start, end, e); } } } @@ -165,8 +160,7 @@ impl StructureExtractor { .cmp(&b.physical_page.unwrap_or(0)) }); all_entries.dedup_by(|a, b| { - a.title.trim() == b.title.trim() - && a.physical_page == b.physical_page + a.title.trim() == b.title.trim() && a.physical_page == b.physical_page }); Ok(Self::finalize_entries(all_entries, page_count)) @@ -177,10 +171,7 @@ impl StructureExtractor { for entry in &mut entries { if let Some(p) = entry.physical_page { if p > page_count { - warn!( - "Truncating out-of-range page {} for '{}'", - p, entry.title - ); + warn!("Truncating out-of-range page {} for '{}'", p, entry.title); entry.physical_page = Some(page_count); } } @@ -461,21 +452,26 @@ mod tests { // Create pages with enough text to span multiple groups let pages: Vec = (1..=10) .map(|i| { - let text = format!("Page {} content. This is a longer text to use more tokens. ", i).repeat(10); + let text = format!( + "Page {} content. This is a longer text to use more tokens. ", + i + ) + .repeat(10); PdfPage::new(i, text) }) .collect(); let groups = extractor.group_pages(&pages); - assert!(groups.len() > 1, "Expected multiple groups, got {}", groups.len()); + assert!( + groups.len() > 1, + "Expected multiple groups, got {}", + groups.len() + ); } #[test] fn test_format_group_text() { - let pages = vec![ - PdfPage::new(1, "Hello"), - PdfPage::new(2, "World"), - ]; + let pages = vec![PdfPage::new(1, "Hello"), PdfPage::new(2, "World")]; let text = format_group_text(&pages); assert!(text.contains("")); assert!(text.contains("")); diff --git a/rust/src/index/parse/toc/verifier.rs b/rust/src/index/parse/toc/verifier.rs index fd944386..3eda474c 100644 --- a/rust/src/index/parse/toc/verifier.rs +++ b/rust/src/index/parse/toc/verifier.rs @@ -79,34 +79,29 @@ impl IndexVerifier { // Launch verification checks with bounded concurrency let client = self.client.clone(); - let futures: Vec<_> = sample.iter().map(|(index, entry)| { - let index = *index; - let title = entry.title.clone(); - let physical_page = entry.physical_page; - let client = client.clone(); - let pages = pages.to_vec(); - - async move { - match physical_page { - Some(page) => { - let result = - Self::verify_entry_with_client(&client, &title, page, &pages).await; - (index, title, page, result) + let futures: Vec<_> = sample + .iter() + .map(|(index, entry)| { + let index = *index; + let title = entry.title.clone(); + let physical_page = entry.physical_page; + let client = client.clone(); + let pages = pages.to_vec(); + + async move { + match physical_page { + Some(page) => { + let result = + Self::verify_entry_with_client(&client, &title, page, &pages).await; + (index, title, page, result) + } + None => (index, title, 0, Ok(Err(ErrorType::PageOutOfRange))), } - None => ( - index, - title, - 0, - Ok(Err(ErrorType::PageOutOfRange)), - ), } - } - }).collect(); + }) + .collect(); - let results: Vec<_> = stream::iter(futures) - .buffer_unordered(5) - .collect() - .await; + let results: Vec<_> = stream::iter(futures).buffer_unordered(5).collect().await; // Aggregate results let total = results.len(); @@ -121,7 +116,12 @@ impl IndexVerifier { } Err(e) => { debug!("Verification LLM call failed: {}", e); - errors.push(VerificationError::new(index, title, page, ErrorType::TitleNotFound)); + errors.push(VerificationError::new( + index, + title, + page, + ErrorType::TitleNotFound, + )); } } } diff --git a/rust/src/index/stages/enhance.rs b/rust/src/index/stages/enhance.rs index a79b5fb3..d33e0acc 100644 --- a/rust/src/index/stages/enhance.rs +++ b/rust/src/index/stages/enhance.rs @@ -295,9 +295,8 @@ impl IndexStage for EnhanceStage { if summary.is_empty() { failed += 1; } else { - ctx.metrics.add_tokens_generated( - crate::utils::estimate_tokens(&summary), - ); + ctx.metrics + .add_tokens_generated(crate::utils::estimate_tokens(&summary)); tree.set_summary(node_id, &summary); generated += 1; ctx.metrics.increment_summaries(); diff --git a/rust/src/index/stages/optimize.rs b/rust/src/index/stages/optimize.rs index 6b21688f..209de7e2 100644 --- a/rust/src/index/stages/optimize.rs +++ b/rust/src/index/stages/optimize.rs @@ -79,11 +79,12 @@ impl OptimizeStage { curr.content.push_str("\n\n"); } // Prefix with heading to preserve boundary - curr.content - .push_str(&format!("## {}\n{}", next_node.title, next_node.content)); + curr.content.push_str(&format!( + "## {}\n{}", + next_node.title, next_node.content + )); } - curr.token_count = - Some(curr.token_count.unwrap_or(0) + next_tokens); + curr.token_count = Some(curr.token_count.unwrap_or(0) + next_tokens); } } diff --git a/rust/src/index/stages/parse.rs b/rust/src/index/stages/parse.rs index 6c8166b6..2ca30a14 100644 --- a/rust/src/index/stages/parse.rs +++ b/rust/src/index/stages/parse.rs @@ -102,7 +102,8 @@ impl IndexStage for ParseStage { ctx.name = name.clone(); // Parse content directly - crate::index::parse::parse_content(content, *format, self.llm_client.clone()).await? + crate::index::parse::parse_content(content, *format, self.llm_client.clone()) + .await? } IndexInput::Bytes { data, name, format } => { // Set name diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 689d331f..ea1d79d6 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -67,6 +67,9 @@ pub use client::{ QueryResultItem, }; +// Retrieval types +pub use retrieval::StrategyPreference; + // Error types pub use error::{Error, Result}; diff --git a/rust/src/llm/config.rs b/rust/src/llm/config.rs index e3c584df..882ca828 100644 --- a/rust/src/llm/config.rs +++ b/rust/src/llm/config.rs @@ -200,20 +200,20 @@ impl LlmConfig { /// Pool of LLM configurations for different purposes. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct LlmConfigs { - /// Configuration for summarization tasks. - #[serde(default = "default_summary_config")] - pub summary: LlmConfig, + /// Configuration for indexing tasks (document summarization, etc.). + #[serde(default = "default_index_config", alias = "summary")] + pub index: LlmConfig, /// Configuration for retrieval/navigation tasks. #[serde(default = "default_retrieval_config")] pub retrieval: LlmConfig, - /// Configuration for TOC processing tasks. - #[serde(default = "default_toc_config")] - pub toc: LlmConfig, + /// Configuration for Pilot navigation tasks. + #[serde(default = "default_pilot_config")] + pub pilot: LlmConfig, } -fn default_summary_config() -> LlmConfig { +fn default_index_config() -> LlmConfig { LlmConfig { max_tokens: 200, temperature: 0.0, @@ -229,9 +229,9 @@ fn default_retrieval_config() -> LlmConfig { } } -fn default_toc_config() -> LlmConfig { +fn default_pilot_config() -> LlmConfig { LlmConfig { - max_tokens: 2000, + max_tokens: 300, temperature: 0.0, ..LlmConfig::default() } @@ -240,9 +240,9 @@ fn default_toc_config() -> LlmConfig { impl Default for LlmConfigs { fn default() -> Self { Self { - summary: default_summary_config(), + index: default_index_config(), retrieval: default_retrieval_config(), - toc: default_toc_config(), + pilot: default_pilot_config(), } } } diff --git a/rust/src/llm/executor.rs b/rust/src/llm/executor.rs index 620f111a..d13e24fc 100644 --- a/rust/src/llm/executor.rs +++ b/rust/src/llm/executor.rs @@ -338,7 +338,8 @@ impl LlmExecutor { let api_key = self.config.api_key.clone().ok_or_else(|| { LlmError::Config( - "No API key configured. Call .with_key(\"sk-...\") when building the engine.".to_string(), + "No API key configured. Call .with_key(\"sk-...\") when building the engine." + .to_string(), ) })?; diff --git a/rust/src/llm/mod.rs b/rust/src/llm/mod.rs index c19b60e1..6d23e3dd 100644 --- a/rust/src/llm/mod.rs +++ b/rust/src/llm/mod.rs @@ -4,9 +4,9 @@ //! Unified LLM client module. //! //! This module provides a unified interface for all LLM operations across the codebase: -//! - **Summarization** — Generating document summaries +//! - **Index** — Document indexing and summarization //! - **Retrieval** — Document tree navigation -//! - **TOC Processing** — Table of contents extraction +//! - **Pilot** — Navigation guidance //! //! # Features //! @@ -22,7 +22,7 @@ //! │ LlmPool │ //! │ │ //! │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ -//! │ │ summary │ │ retrieval │ │ toc │ │ +//! │ │ index │ │ retrieval │ │ pilot │ │ //! │ │ LlmClient │ │ LlmClient │ │ LlmClient │ │ //! │ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ │ //! │ │ │ │ │ @@ -45,8 +45,8 @@ //! // Create a pool with default configurations //! let pool = LlmPool::from_defaults(); //! -//! // Use summary client -//! let summary = pool.summary().complete( +//! // Use index client +//! let summary = pool.index().complete( //! "You summarize text concisely.", //! "Long text to summarize..." //! ).await?; diff --git a/rust/src/llm/pool.rs b/rust/src/llm/pool.rs index 375731dd..51b07ff3 100644 --- a/rust/src/llm/pool.rs +++ b/rust/src/llm/pool.rs @@ -13,9 +13,9 @@ use crate::throttle::ConcurrencyController; /// /// This provides a centralized way to access LLM clients /// configured for specific tasks: -/// - **Summary** — Document summarization (fast, cheap model) +/// - **Index** — Document indexing/summarization (fast, cheap model) /// - **Retrieval** — Document navigation (capable model) -/// - **TOC** — Table of contents processing (fast, cheap model) +/// - **Pilot** — Navigation guidance (fast model) /// /// # Example /// @@ -26,8 +26,8 @@ use crate::throttle::ConcurrencyController; /// # async fn main() -> vectorless::llm::LlmResult<()> { /// let pool = LlmPool::from_defaults(); /// -/// // Use summary client for summarization -/// let summary = pool.summary().complete( +/// // Use index client for summarization +/// let summary = pool.index().complete( /// "You summarize text concisely.", /// "Long text to summarize..." /// ).await?; @@ -43,9 +43,9 @@ use crate::throttle::ConcurrencyController; /// ``` #[derive(Debug, Clone)] pub struct LlmPool { - summary: Arc, + index: Arc, retrieval: Arc, - toc: Arc, + pilot: Arc, concurrency: Option>, } @@ -53,9 +53,9 @@ impl LlmPool { /// Create a new LLM pool from configurations. pub fn new(configs: LlmConfigs) -> Self { Self { - summary: Arc::new(LlmClient::new(configs.summary)), + index: Arc::new(LlmClient::new(configs.index)), retrieval: Arc::new(LlmClient::new(configs.retrieval)), - toc: Arc::new(LlmClient::new(configs.toc)), + pilot: Arc::new(LlmClient::new(configs.pilot)), concurrency: None, } } @@ -92,14 +92,14 @@ impl LlmPool { pub fn with_concurrency(mut self, controller: ConcurrencyController) -> Self { let arc = Arc::new(controller); self.concurrency = Some(arc.clone()); - self.summary = Arc::new( - LlmClient::new(self.summary.config().clone()).with_shared_concurrency(arc.clone()), + self.index = Arc::new( + LlmClient::new(self.index.config().clone()).with_shared_concurrency(arc.clone()), ); self.retrieval = Arc::new( LlmClient::new(self.retrieval.config().clone()).with_shared_concurrency(arc.clone()), ); - self.toc = Arc::new( - LlmClient::new(self.toc.config().clone()).with_shared_concurrency(arc.clone()), + self.pilot = Arc::new( + LlmClient::new(self.pilot.config().clone()).with_shared_concurrency(arc.clone()), ); self } @@ -107,16 +107,15 @@ impl LlmPool { /// Add concurrency control from an existing Arc. pub fn with_shared_concurrency(mut self, controller: Arc) -> Self { self.concurrency = Some(controller.clone()); - self.summary = Arc::new( - LlmClient::new(self.summary.config().clone()) - .with_shared_concurrency(controller.clone()), + self.index = Arc::new( + LlmClient::new(self.index.config().clone()).with_shared_concurrency(controller.clone()), ); self.retrieval = Arc::new( LlmClient::new(self.retrieval.config().clone()) .with_shared_concurrency(controller.clone()), ); - self.toc = Arc::new( - LlmClient::new(self.toc.config().clone()).with_shared_concurrency(controller.clone()), + self.pilot = Arc::new( + LlmClient::new(self.pilot.config().clone()).with_shared_concurrency(controller.clone()), ); self } @@ -126,12 +125,12 @@ impl LlmPool { self.concurrency.as_deref() } - /// Get the summary client. + /// Get the index client. /// - /// Used for generating summaries of document sections. + /// Used for document indexing and summarization. /// Typically uses a fast, cost-effective model. - pub fn summary(&self) -> &LlmClient { - &self.summary + pub fn index(&self) -> &LlmClient { + &self.index } /// Get the retrieval client. @@ -142,28 +141,28 @@ impl LlmPool { &self.retrieval } - /// Get the TOC client. + /// Get the pilot client. /// - /// Used for TOC detection, parsing, and page assignment. - /// Typically uses a fast, cost-effective model. - pub fn toc(&self) -> &LlmClient { - &self.toc + /// Used for intelligent navigation guidance. + /// Typically uses a fast model for quick decisions. + pub fn pilot(&self) -> &LlmClient { + &self.pilot } /// Get a client for a specific purpose by name. /// /// # Arguments /// - /// * `purpose` - One of: "summary", "summarize", "retrieval", "retrieve", "navigate", "toc" + /// * `purpose` - One of: "index", "summary", "retrieval", "retrieve", "navigate", "pilot" /// /// # Returns /// /// Returns `None` if the purpose is not recognized. pub fn get(&self, purpose: &str) -> Option<&LlmClient> { match purpose { - "summary" | "summarize" => Some(&self.summary), + "index" | "summary" | "summarize" => Some(&self.index), "retrieval" | "retrieve" | "navigate" => Some(&self.retrieval), - "toc" => Some(&self.toc), + "pilot" => Some(&self.pilot), _ => None, } } @@ -175,9 +174,9 @@ impl LlmPool { let config = super::config::LlmConfig::new(model); let client = Arc::new(LlmClient::new(config)); Self { - summary: client.clone(), + index: client.clone(), retrieval: client.clone(), - toc: client, + pilot: client, concurrency: None, } } @@ -198,9 +197,9 @@ mod tests { let pool = LlmPool::from_defaults(); // Should have all clients - assert!(pool.get("summary").is_some()); + assert!(pool.get("index").is_some()); assert!(pool.get("retrieval").is_some()); - assert!(pool.get("toc").is_some()); + assert!(pool.get("pilot").is_some()); assert!(pool.get("unknown").is_none()); } @@ -209,6 +208,7 @@ mod tests { let pool = LlmPool::from_defaults(); // Test aliases + assert!(pool.get("summary").is_some()); assert!(pool.get("summarize").is_some()); assert!(pool.get("retrieve").is_some()); assert!(pool.get("navigate").is_some()); @@ -219,9 +219,9 @@ mod tests { let pool = LlmPool::single_model("gpt-4o-mini"); // All clients should use the same model - assert_eq!(pool.summary().config().model, "gpt-4o-mini"); + assert_eq!(pool.index().config().model, "gpt-4o-mini"); assert_eq!(pool.retrieval().config().model, "gpt-4o-mini"); - assert_eq!(pool.toc().config().model, "gpt-4o-mini"); + assert_eq!(pool.pilot().config().model, "gpt-4o-mini"); } #[test] @@ -233,8 +233,8 @@ mod tests { // All clients should have concurrency enabled assert!(pool.concurrency().is_some()); - assert!(pool.summary().concurrency().is_some()); + assert!(pool.index().concurrency().is_some()); assert!(pool.retrieval().concurrency().is_some()); - assert!(pool.toc().concurrency().is_some()); + assert!(pool.pilot().concurrency().is_some()); } } diff --git a/rust/src/retrieval/complexity/detector.rs b/rust/src/retrieval/complexity/detector.rs index 5079040d..602da79c 100644 --- a/rust/src/retrieval/complexity/detector.rs +++ b/rust/src/retrieval/complexity/detector.rs @@ -2,125 +2,116 @@ // SPDX-License-Identifier: Apache-2.0 //! Query complexity detector implementation. +//! +//! Uses Pilot's LLM client for accurate complexity classification when available. +//! Falls back to heuristic rules (keyword + word count) when no LLM client. use std::collections::HashSet; use super::QueryComplexity; -/// Configuration for complexity detection. -#[derive(Debug, Clone)] -pub struct ComplexityConfig { - /// Maximum words for simple query. - pub simple_max_words: usize, - /// Maximum words for medium query. - pub medium_max_words: usize, - /// Complexity indicators (words that suggest complex queries). - pub complex_indicators: Vec, - /// Simple query indicators. - pub simple_indicators: Vec, -} - -impl Default for ComplexityConfig { - fn default() -> Self { - Self { - simple_max_words: 5, - medium_max_words: 15, - complex_indicators: vec![ - "compare".to_string(), - "contrast".to_string(), - "analyze".to_string(), - "evaluate".to_string(), - "synthesize".to_string(), - "explain why".to_string(), - "how does".to_string(), - "what are the implications".to_string(), - "relationship between".to_string(), - "cause and effect".to_string(), - ], - simple_indicators: vec![ - "what is".to_string(), - "define".to_string(), - "list".to_string(), - "who".to_string(), - "when".to_string(), - "where".to_string(), - ], - } - } -} - /// Query complexity detector. /// -/// Analyzes queries to determine their complexity level, -/// which influences strategy selection. +/// Uses LLM for classification when available; falls back to heuristic rules. pub struct ComplexityDetector { - config: ComplexityConfig, + /// Optional LLM client for LLM-based detection. + llm_client: Option, } impl ComplexityDetector { - /// Create a new complexity detector. + /// Create a new complexity detector (heuristic only). pub fn new() -> Self { + Self { llm_client: None } + } + + /// Create with LLM client for accurate detection. + pub fn with_llm_client(client: crate::llm::LlmClient) -> Self { Self { - config: ComplexityConfig::default(), + llm_client: Some(client), } } - /// Create with custom configuration. - pub fn with_config(config: ComplexityConfig) -> Self { - Self { config } + /// Detect the complexity of a query. + /// + /// Uses LLM when available; falls back to heuristic rules. + pub async fn detect(&self, query: &str) -> QueryComplexity { + if let Some(ref client) = self.llm_client { + if let Some(complexity) = crate::retrieval::pilot::detect_with_llm(client, query).await + { + return complexity; + } + tracing::warn!("LLM complexity detection failed, falling back to heuristic"); + } + self.detect_heuristic(query) } - /// Detect the complexity of a query. - pub fn detect(&self, query: &str) -> QueryComplexity { + /// Heuristic-based fallback: keyword matching + word count. + fn detect_heuristic(&self, query: &str) -> QueryComplexity { let query_lower = query.to_lowercase(); - let word_count = query.split_whitespace().count(); + let word_count = estimate_word_count(query); - // Check for complex indicators - for indicator in &self.config.complex_indicators { + // Complex indicators (English + Chinese) + let complex_indicators = [ + "compare", + "contrast", + "analyze", + "evaluate", + "synthesize", + "explain why", + "how does", + "relationship between", + "cause and effect", + "对比", + "分析", + "评估", + "综合", + "为什么", + "原因", + "关系", + "影响", + "区别", + "异同", + ]; + + for indicator in &complex_indicators { if query_lower.contains(indicator) { return QueryComplexity::Complex; } } - // Check for simple indicators - for indicator in &self.config.simple_indicators { - if query_lower.contains(indicator) { - // Simple indicator found, but check word count - if word_count <= self.config.medium_max_words { - return QueryComplexity::Simple; - } + // Simple indicators + let simple_indicators = [ + "what is", + "define", + "list", + "who", + "when", + "where", + "什么是", + "定义", + "列表", + "谁", + "何时", + "哪里", + "在哪", + ]; + + for indicator in &simple_indicators { + if query_lower.contains(indicator) && word_count <= 15 { + return QueryComplexity::Simple; } } - // Check for multiple questions - let question_marks = query.matches('?').count(); + // Multiple questions + let question_marks = query.matches('?').count() + query.matches('?').count(); if question_marks > 1 { return QueryComplexity::Complex; } - // Check for conjunctions suggesting multiple parts - let conjunctions = ["and", "or", "but", "however", "although"]; - let conjunction_count = conjunctions - .iter() - .filter(|c| query_lower.split_whitespace().any(|w| w == **c)) - .count(); - - if conjunction_count >= 2 { - return QueryComplexity::Complex; - } - - // Check for nested concepts - let depth_indicators = ["in the context of", "with respect to", "regarding", "about"]; - for indicator in depth_indicators { - if query_lower.contains(indicator) { - return QueryComplexity::Medium; - } - } - - // Word count based classification - if word_count <= self.config.simple_max_words { + // Word count classification + if word_count <= 5 { QueryComplexity::Simple - } else if word_count <= self.config.medium_max_words { + } else if word_count <= 15 { QueryComplexity::Medium } else { QueryComplexity::Complex @@ -128,17 +119,16 @@ impl ComplexityDetector { } /// Get complexity score (0.0 - 1.0). - pub fn complexity_score(&self, query: &str) -> f32 { - match self.detect(query) { + pub fn complexity_score(&self, complexity: QueryComplexity) -> f32 { + match complexity { QueryComplexity::Simple => 0.2, QueryComplexity::Medium => 0.5, QueryComplexity::Complex => 0.8, } } - /// Analyze query features. + /// Analyze query features (heuristic only, no LLM call). pub fn analyze(&self, query: &str) -> QueryAnalysis { - let query_lower = query.to_lowercase(); let words: Vec<&str> = query.split_whitespace().collect(); let unique_words: HashSet<&str> = words.iter().copied().collect(); @@ -149,10 +139,10 @@ impl ComplexityDetector { } else { unique_words.len() as f32 / words.len() as f32 }, - has_question_mark: query.contains('?'), - question_count: query.matches('?').count(), - complexity: self.detect(query), - complexity_score: self.complexity_score(query), + has_question_mark: query.contains('?') || query.contains('?'), + question_count: query.matches('?').count() + query.matches('?').count(), + complexity: self.detect_heuristic(query), + complexity_score: self.complexity_score(self.detect_heuristic(query)), } } } @@ -163,6 +153,52 @@ impl Default for ComplexityDetector { } } +/// Estimate word count, handling both CJK and Latin text. +fn estimate_word_count(text: &str) -> usize { + let mut count = 0usize; + let mut in_latin_word = false; + + for ch in text.chars() { + if ch.is_whitespace() { + if in_latin_word { + count += 1; + in_latin_word = false; + } + } else if ch.is_ascii_alphanumeric() { + in_latin_word = true; + } else if is_cjk_char(ch) { + if in_latin_word { + count += 1; + in_latin_word = false; + } + count += 1; + } else { + if in_latin_word { + count += 1; + in_latin_word = false; + } + } + } + if in_latin_word { + count += 1; + } + count +} + +/// Check if a character is CJK (Chinese/Japanese/Korean). +fn is_cjk_char(ch: char) -> bool { + let cp = ch as u32; + (0x4E00..=0x9FFF).contains(&cp) + || (0x3400..=0x4DBF).contains(&cp) + || (0x20000..=0x2A6DF).contains(&cp) + || (0x2A700..=0x2B73F).contains(&cp) + || (0xF900..=0xFAFF).contains(&cp) + || (0x2F800..=0x2FA1F).contains(&cp) + || (0x3000..=0x303F).contains(&cp) + || (0x3040..=0x309F).contains(&cp) + || (0x30A0..=0x30FF).contains(&cp) +} + /// Analysis result for a query. #[derive(Debug, Clone)] pub struct QueryAnalysis { @@ -188,9 +224,18 @@ mod tests { fn test_simple_queries() { let detector = ComplexityDetector::new(); - assert_eq!(detector.detect("What is Rust?"), QueryComplexity::Simple); - assert_eq!(detector.detect("Define async"), QueryComplexity::Simple); - assert_eq!(detector.detect("List features"), QueryComplexity::Simple); + assert_eq!( + detector.detect_heuristic("What is Rust?"), + QueryComplexity::Simple + ); + assert_eq!( + detector.detect_heuristic("Define async"), + QueryComplexity::Simple + ); + assert_eq!( + detector.detect_heuristic("什么是向量检索"), + QueryComplexity::Simple + ); } #[test] @@ -198,21 +243,22 @@ mod tests { let detector = ComplexityDetector::new(); assert_eq!( - detector.detect("Compare and contrast the different approaches to async programming"), + detector.detect_heuristic( + "Compare and contrast the different approaches to async programming" + ), QueryComplexity::Complex ); assert_eq!( - detector.detect("What is the relationship between ownership and borrowing?"), + detector.detect_heuristic("What is the relationship between ownership and borrowing?"), + QueryComplexity::Complex + ); + assert_eq!( + detector.detect_heuristic("对比A和B的区别"), + QueryComplexity::Complex + ); + assert_eq!( + detector.detect_heuristic("分析索引和检索的关系"), QueryComplexity::Complex ); - } - - #[test] - fn test_medium_queries() { - let detector = ComplexityDetector::new(); - - // Medium length without complex indicators - let medium_query = "How do I implement a simple web server with error handling?"; - assert_eq!(detector.detect(medium_query), QueryComplexity::Medium); } } diff --git a/rust/src/retrieval/content/scorer.rs b/rust/src/retrieval/content/scorer.rs index 3472a733..777006da 100644 --- a/rust/src/retrieval/content/scorer.rs +++ b/rust/src/retrieval/content/scorer.rs @@ -162,11 +162,8 @@ impl RelevanceScorer { let mut components = ScoreComponents::default(); // 1. Keyword score (content + title + summary combined) - components.keyword_score = self.compute_keyword_score(&format!( - "{} {}", - chunk.title, - chunk.content - )); + components.keyword_score = + self.compute_keyword_score(&format!("{} {}", chunk.title, chunk.content)); // 2. BM25 score (if enabled) if matches!( diff --git a/rust/src/retrieval/pilot/complexity.rs b/rust/src/retrieval/pilot/complexity.rs new file mode 100644 index 00000000..5d77ca5b --- /dev/null +++ b/rust/src/retrieval/pilot/complexity.rs @@ -0,0 +1,71 @@ +// Copyright (c) 2026 vectorless developers +// SPDX-License-Identifier: Apache-2.0 + +//! LLM-based query complexity detection. +//! +//! Uses the Pilot's LLM client to classify query complexity. +//! Falls back to heuristic rules when LLM is unavailable or fails. + +use serde::Deserialize; + +use super::super::complexity::QueryComplexity; +use crate::llm::LlmClient; + +/// LLM response schema for complexity classification. +#[derive(Debug, Deserialize)] +struct ComplexityResponse { + complexity: String, +} + +/// System prompt for complexity classification. +const SYSTEM_PROMPT: &str = include_str!("prompts/system_complexity.txt"); +/// User prompt template. +const USER_PROMPT: &str = include_str!("prompts/user_complexity.txt"); + +/// Detect query complexity using LLM. +/// +/// Returns `None` if the LLM call fails (caller should fall back to heuristic). +pub async fn detect_with_llm( + client: &LlmClient, + query: &str, +) -> Option { + let user = USER_PROMPT.replace("{query}", query); + + let resp: ComplexityResponse = client + .complete_json_with_max_tokens(SYSTEM_PROMPT, &user, 80) + .await + .ok()?; + + let complexity = match resp.complexity.to_lowercase().as_str() { + "simple" => QueryComplexity::Simple, + "complex" => QueryComplexity::Complex, + _ => QueryComplexity::Medium, + }; + + tracing::debug!( + "LLM complexity detection: query='{}', result={:?}", + query, + complexity + ); + + Some(complexity) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_system_prompt_not_empty() { + assert!(!SYSTEM_PROMPT.is_empty()); + assert!(SYSTEM_PROMPT.contains("simple")); + assert!(SYSTEM_PROMPT.contains("complex")); + } + + #[test] + fn test_user_prompt_template() { + assert!(USER_PROMPT.contains("{query}")); + let filled = USER_PROMPT.replace("{query}", "test query"); + assert!(filled.contains("test query")); + } +} diff --git a/rust/src/retrieval/pilot/llm_pilot.rs b/rust/src/retrieval/pilot/llm_pilot.rs index 408397fe..df8e3b02 100644 --- a/rust/src/retrieval/pilot/llm_pilot.rs +++ b/rust/src/retrieval/pilot/llm_pilot.rs @@ -82,7 +82,8 @@ pub struct LlmPilot { /// Shared pipeline budget — the primary budget source when set. /// When available, Pilot checks this before making LLM calls and /// records token consumption here. - pipeline_budget: parking_lot::RwLock>>, + pipeline_budget: + parking_lot::RwLock>>, /// Context builder. context_builder: ContextBuilder, /// Prompt builder. @@ -223,7 +224,10 @@ impl LlmPilot { /// but token consumption is recorded against the pipeline budget. /// Call this at query time (not construction time) since the pipeline /// budget is created per-query. - pub fn set_pipeline_budget(&self, budget: Arc) { + pub fn set_pipeline_budget( + &self, + budget: Arc, + ) { *self.pipeline_budget.write() = Some(budget); } diff --git a/rust/src/retrieval/pilot/mod.rs b/rust/src/retrieval/pilot/mod.rs index 5af9cead..daae3737 100644 --- a/rust/src/retrieval/pilot/mod.rs +++ b/rust/src/retrieval/pilot/mod.rs @@ -32,6 +32,7 @@ mod budget; mod builder; +mod complexity; mod config; mod decision; mod fallback; @@ -43,6 +44,7 @@ mod parser; mod prompts; mod r#trait; +pub use complexity::detect_with_llm; pub use config::PilotConfig; pub use decision::{InterventionPoint, PilotDecision}; diff --git a/rust/src/retrieval/pilot/prompts/system_complexity.txt b/rust/src/retrieval/pilot/prompts/system_complexity.txt new file mode 100644 index 00000000..e344ae70 --- /dev/null +++ b/rust/src/retrieval/pilot/prompts/system_complexity.txt @@ -0,0 +1,21 @@ +You are a query complexity classifier for a document retrieval system. +Classify the query into exactly one of: "simple", "medium", "complex". + +Definitions: +- simple: direct lookup, definition, single-fact question (e.g. "what is X", "define Y") +- medium: requires combining information from 2-3 sections (e.g. "how does X work with Y") +- complex: requires comparison, analysis, synthesis, multi-step reasoning, or information from many parts (e.g. "compare X and Y", "analyze the impact of Z") + +The query may be in English, Chinese, or mixed language. + +CRITICAL: You MUST respond with ONLY valid JSON. No markdown, no explanation, just the JSON object. + +Your response must have this EXACT structure: +{ + "complexity": "simple", + "reasoning": "brief explanation" +} + +Where: +- complexity: MUST be exactly one of: "simple", "medium", "complex" +- reasoning: MUST be a string diff --git a/rust/src/retrieval/pilot/prompts/user_complexity.txt b/rust/src/retrieval/pilot/prompts/user_complexity.txt new file mode 100644 index 00000000..1abaeaa0 --- /dev/null +++ b/rust/src/retrieval/pilot/prompts/user_complexity.txt @@ -0,0 +1 @@ +Classify this query: {query} diff --git a/rust/src/retrieval/pipeline/context.rs b/rust/src/retrieval/pipeline/context.rs index 484f41ca..047182e7 100644 --- a/rust/src/retrieval/pipeline/context.rs +++ b/rust/src/retrieval/pipeline/context.rs @@ -23,11 +23,11 @@ use crate::retrieval::types::{ /// Search algorithm type. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum SearchAlgorithm { - /// Greedy single-path search. - Greedy, - /// Beam search with multiple paths. + /// Pure Pilot: beam=1, Pilot picks top-1 child at each layer. + PurePilot, + /// Beam search with Pilot scoring. Beam, - /// Monte Carlo Tree Search. + /// MCTS with Pilot priors. Mcts, } @@ -41,11 +41,22 @@ impl SearchAlgorithm { /// Get algorithm name. pub fn name(&self) -> &'static str { match self { - Self::Greedy => "greedy", + Self::PurePilot => "pure_pilot", Self::Beam => "beam", Self::Mcts => "mcts", } } + + /// Parse algorithm from config string. + /// Returns None for unrecognized names. + pub fn from_name(name: &str) -> Option { + match name { + "pure_pilot" | "greedy" => Some(Self::PurePilot), + "beam" => Some(Self::Beam), + "mcts" => Some(Self::Mcts), + _ => None, + } + } } /// Search configuration. @@ -239,6 +250,9 @@ pub struct PipelineContext { pub selected_algorithm: Option, /// Search configuration. pub search_config: Option, + /// Ordered fallback chain for search algorithms. + /// When the primary algorithm's result is insufficient, try the next. + pub search_fallback_chain: Vec, // ============ Search Stage Output ============ /// Candidate nodes from search. @@ -307,6 +321,11 @@ impl PipelineContext { selected_strategy: None, selected_algorithm: None, search_config: None, + search_fallback_chain: vec![ + SearchAlgorithm::Beam, + SearchAlgorithm::Mcts, + SearchAlgorithm::PurePilot, + ], candidates: Vec::new(), search_paths: Vec::new(), reasoning_chain: ReasoningChain::new(), diff --git a/rust/src/retrieval/pipeline/orchestrator.rs b/rust/src/retrieval/pipeline/orchestrator.rs index c42b5cbe..63e18b4e 100644 --- a/rust/src/retrieval/pipeline/orchestrator.rs +++ b/rust/src/retrieval/pipeline/orchestrator.rs @@ -330,7 +330,10 @@ impl RetrievalOrchestrator { // Share the pipeline budget with the Pilot (unified budget) if let Some(ref pilot) = self.pilot { - if let Some(llm_pilot) = pilot.as_any().downcast_ref::() { + if let Some(llm_pilot) = pilot + .as_any() + .downcast_ref::() + { llm_pilot.set_pipeline_budget(ctx.budget_controller.clone()); } } @@ -622,7 +625,10 @@ impl RetrievalOrchestrator { // Share the pipeline budget with the Pilot (unified budget) if let Some(ref pilot) = self.pilot { - if let Some(llm_pilot) = pilot.as_any().downcast_ref::() { + if let Some(llm_pilot) = pilot + .as_any() + .downcast_ref::() + { llm_pilot.set_pipeline_budget(ctx.budget_controller.clone()); } } @@ -924,7 +930,10 @@ impl RetrievalOrchestrator { // Share the pipeline budget with the Pilot (unified budget) if let Some(ref pilot) = self.pilot { - if let Some(llm_pilot) = pilot.as_any().downcast_ref::() { + if let Some(llm_pilot) = pilot + .as_any() + .downcast_ref::() + { llm_pilot.set_pipeline_budget(ctx.budget_controller.clone()); } } diff --git a/rust/src/retrieval/pipeline_retriever.rs b/rust/src/retrieval/pipeline_retriever.rs index 9f135cf1..2a655182 100644 --- a/rust/src/retrieval/pipeline_retriever.rs +++ b/rust/src/retrieval/pipeline_retriever.rs @@ -107,8 +107,12 @@ impl PipelineRetriever { .with_max_backtracks(self.max_backtracks) .with_max_iterations(self.max_iterations); - // Add analyze stage - orchestrator = orchestrator.stage(AnalyzeStage::new()); + // Add analyze stage (with LLM client for complexity detection) + let mut analyze_stage = AnalyzeStage::new(); + if let Some(ref client) = self.llm_client { + analyze_stage = analyze_stage.with_llm_client(client.clone()); + } + orchestrator = orchestrator.stage(analyze_stage); // Add plan stage let mut plan_stage = PlanStage::new(); diff --git a/rust/src/retrieval/search/beam.rs b/rust/src/retrieval/search/beam.rs index 73bf0cc1..a7319988 100644 --- a/rust/src/retrieval/search/beam.rs +++ b/rust/src/retrieval/search/beam.rs @@ -1,32 +1,33 @@ // Copyright (c) 2026 vectorless developers // SPDX-License-Identifier: Apache-2.0 -//! Beam search algorithm with Pilot integration. +//! Beam search algorithm with Pilot as primary scorer. //! -//! Explores multiple paths in parallel, keeping only the top-k candidates at each level. -//! When a Pilot is provided, it can intervene at fork points to provide semantic guidance. +//! Explores multiple paths in parallel, keeping only the top-k candidates +//! at each level. Pilot provides semantic guidance; NodeScorer is the +//! fallback when Pilot is unavailable. use async_trait::async_trait; use std::collections::HashSet; -use tracing::{debug, trace}; +use tracing::debug; use super::super::RetrievalContext; use super::super::types::{NavigationDecision, NavigationStep, SearchPath}; -use super::scorer::{NodeScorer, ScoringContext}; +use super::pilot_scorer::{PilotDecisionCache, score_candidates}; use super::{SearchConfig, SearchResult, SearchTree}; use crate::document::{DocumentTree, NodeId}; -use crate::retrieval::pilot::{Pilot, SearchState}; +use crate::retrieval::pilot::Pilot; -/// Beam search - explores multiple paths simultaneously. +/// Beam search — explores multiple paths simultaneously. /// /// Keeps top `beam_width` candidates at each level, providing /// a balance between exploration and computational cost. /// /// # Pilot Integration /// -/// When a Pilot is provided, the algorithm consults it at fork points -/// (when multiple candidates are available) to get semantic guidance -/// on which branches are most relevant to the query. +/// Pilot is the primary scorer (weight=0.7). NodeScorer supplements +/// for candidates Pilot didn't rank. Decisions are cached by +/// (query, parent_node_id) to avoid redundant LLM calls. pub struct BeamSearch { beam_width: usize, } @@ -44,72 +45,7 @@ impl BeamSearch { } } - /// Create a scorer for the given query. - fn create_scorer(&self, query: &str) -> NodeScorer { - NodeScorer::new(ScoringContext::new(query)) - } - - /// Score candidates using a query-specific scorer. - fn score_candidates_with_query( - &self, - tree: &DocumentTree, - candidates: &[NodeId], - query: &str, - ) -> Vec<(NodeId, f32)> { - let scorer = self.create_scorer(query); - scorer.score_and_sort(tree, candidates) - } - - /// Merge algorithm scores with Pilot decision. - /// - /// Uses weighted combination: `final = α * algo + β * pilot` - /// where α = 0.4 and β = 0.6 * confidence - fn merge_with_pilot_decision( - &self, - tree: &DocumentTree, - candidates: &[NodeId], - pilot_decision: &crate::retrieval::pilot::PilotDecision, - query: &str, - ) -> Vec<(NodeId, f32)> { - let scorer = self.create_scorer(query); - let alpha = 0.4; - let beta = 0.6 * pilot_decision.confidence; - - // Build a map from node_id to pilot score - let mut pilot_scores: std::collections::HashMap = - std::collections::HashMap::new(); - for ranked in &pilot_decision.ranked_candidates { - pilot_scores.insert(ranked.node_id, ranked.score); - } - - // Merge scores - let mut merged: Vec<(NodeId, f32)> = candidates - .iter() - .map(|&node_id| { - let algo_score = scorer.score(tree, node_id); - let pilot_score = pilot_scores.get(&node_id).copied().unwrap_or(0.0); - - // Weighted combination - let final_score = if beta > 0.0 { - (alpha * algo_score + beta * pilot_score) / (alpha + beta) - } else { - algo_score - }; - - (node_id, final_score) - }) - .collect(); - - // Sort by merged score - merged.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)); - - merged - } - /// Core beam search logic parameterized by start node. - /// - /// This is the shared implementation used by both `search` (starts from root) - /// and `search_from` (starts from an arbitrary node). async fn search_impl( &self, tree: &DocumentTree, @@ -121,8 +57,8 @@ impl BeamSearch { let mut result = SearchResult::default(); let beam_width = config.beam_width.min(self.beam_width); let mut visited: HashSet = HashSet::new(); + let cache = PilotDecisionCache::new(); - // Mark start_node as visited so we don't go back up visited.insert(start_node); debug!( @@ -130,48 +66,27 @@ impl BeamSearch { context.query, start_node, beam_width, config.min_score ); - // Track Pilot interventions let mut pilot_interventions = 0; // Initialize with start_node's children let start_children = tree.children(start_node); debug!("Start node has {} children", start_children.len()); - // Check if Pilot wants to guide the start. - // Pass start_node so the pilot evaluates the correct children. - let initial_candidates = if let Some(p) = pilot { - debug!( - "BeamSearch: Pilot is available, name={}, guide_at_start={}", - p.name(), - p.config().guide_at_start - ); - if p.config().guide_at_start { - if let Some(guidance) = p.guide_start(tree, &context.query, start_node).await { - debug!( - "Pilot provided start guidance with confidence {}", - guidance.confidence - ); - pilot_interventions += 1; - - if guidance.has_candidates() { - self.merge_with_pilot_decision( - tree, - &start_children, - &guidance, - &context.query, - ) - } else { - self.score_candidates_with_query(tree, &start_children, &context.query) - } - } else { - self.score_candidates_with_query(tree, &start_children, &context.query) - } - } else { - self.score_candidates_with_query(tree, &start_children, &context.query) - } - } else { - self.score_candidates_with_query(tree, &start_children, &context.query) - }; + let initial_candidates = score_candidates( + tree, + &start_children, + &context.query, + pilot, + &[], + &visited, + 0.7, // Beam: Pilot weight = 0.7 + Some(&cache), + ) + .await; + + if pilot.is_some() && !start_children.is_empty() { + pilot_interventions += 1; + } let mut current_beam: Vec = initial_candidates .into_iter() @@ -208,46 +123,21 @@ impl BeamSearch { // Expand this path let children = tree.children(leaf_id); - // ========== Pilot Intervention Point ========== - let scored_children = if let Some(p) = pilot { - let state = SearchState::new( - tree, - &context.query, - &path.nodes, - &children, - &visited, - ); - - if p.should_intervene(&state) { - trace!( - "Pilot intervening at fork with {} candidates", - children.len() - ); - - match p.decide(&state).await { - decision => { - pilot_interventions += 1; - debug!( - "Pilot decision: confidence={}, direction={:?}", - decision.confidence, - std::mem::discriminant(&decision.direction) - ); - - self.merge_with_pilot_decision( - tree, - &children, - &decision, - &context.query, - ) - } - } - } else { - self.score_candidates_with_query(tree, &children, &context.query) - } - } else { - self.score_candidates_with_query(tree, &children, &context.query) - }; - // ============================================== + let scored_children = score_candidates( + tree, + &children, + &context.query, + pilot, + &path.nodes, + &visited, + 0.7, // Beam: Pilot weight = 0.7 + Some(&cache), + ) + .await; + + if pilot.is_some() && !children.is_empty() { + pilot_interventions += 1; + } for (child_id, child_score) in scored_children.into_iter().take(beam_width) { let new_path = path.extend(child_id, child_score); @@ -294,9 +184,19 @@ impl BeamSearch { // Fallback: if no results found, add best candidates regardless of score if result.paths.is_empty() && config.min_score > 0.0 { debug!("No results above min_score, adding best candidates as fallback"); - let all_candidates = - self.score_candidates_with_query(tree, &tree.children(start_node), &context.query); - for (node_id, score) in all_candidates.into_iter().take(config.top_k) { + let all_children = tree.children(start_node); + let fallback = score_candidates( + tree, + &all_children, + &context.query, + None, // No Pilot for fallback + &[], + &visited, + 0.7, + None, + ) + .await; + for (node_id, score) in fallback.into_iter().take(config.top_k) { result.paths.push(SearchPath::from_node(node_id, score)); } } diff --git a/rust/src/retrieval/search/greedy.rs b/rust/src/retrieval/search/greedy.rs index 812cf5be..34ed0de5 100644 --- a/rust/src/retrieval/search/greedy.rs +++ b/rust/src/retrieval/search/greedy.rs @@ -1,91 +1,37 @@ // Copyright (c) 2026 vectorless developers // SPDX-License-Identifier: Apache-2.0 -//! Greedy search algorithm with Pilot integration. +//! Pure Pilot search — LLM-guided single-path tree navigation. //! -//! Simple depth-first search that always follows the highest-scoring child. -//! When a Pilot is provided, it can provide semantic guidance at decision points. +//! At each layer, the Pilot scores all children and picks the top-1. +//! This is the most accurate (but slowest) approach: one LLM call per layer. +//! Falls back to NodeScorer when Pilot is unavailable. use async_trait::async_trait; -use tracing::{debug, trace}; +use std::collections::HashSet; +use tracing::debug; use super::super::RetrievalContext; use super::super::types::{NavigationDecision, NavigationStep, SearchPath}; -use super::scorer::{NodeScorer, ScoringContext}; +use super::pilot_scorer::{PilotDecisionCache, score_candidates}; use super::{SearchConfig, SearchResult, SearchTree}; use crate::document::{DocumentTree, NodeId}; -use crate::retrieval::pilot::{Pilot, SearchState}; +use crate::retrieval::pilot::Pilot; -/// Greedy search - always follows the best single path. +/// Pure Pilot search — Pilot picks the best child at each layer. /// -/// Fast but may miss relevant content in other branches. -/// When a Pilot is provided, it can guide the search at key decision points. -pub struct GreedySearch; +/// beam=1: at each level, Pilot evaluates all children and the search +/// follows only the top-ranked one. When Pilot is unavailable, +/// falls back to NodeScorer (keyword/BM25). +pub struct PurePilotSearch; -impl GreedySearch { - /// Create a new greedy search. +impl PurePilotSearch { + /// Create a new Pure Pilot search. pub fn new() -> Self { Self } - /// Create a scorer for the given query. - fn create_scorer(&self, query: &str) -> NodeScorer { - NodeScorer::new(ScoringContext::new(query)) - } - - /// Score candidates using a query-specific scorer. - fn score_candidates_with_query( - &self, - tree: &DocumentTree, - candidates: &[NodeId], - query: &str, - ) -> Vec<(NodeId, f32)> { - let scorer = self.create_scorer(query); - scorer.score_and_sort(tree, candidates) - } - - /// Merge algorithm scores with Pilot decision. - fn merge_with_pilot_decision( - &self, - tree: &DocumentTree, - candidates: &[NodeId], - pilot_decision: &crate::retrieval::pilot::PilotDecision, - query: &str, - ) -> Vec<(NodeId, f32)> { - let scorer = self.create_scorer(query); - let alpha = 0.4; - let beta = 0.6 * pilot_decision.confidence; - - // Build a map from node_id to pilot score - let mut pilot_scores: std::collections::HashMap = - std::collections::HashMap::new(); - for ranked in &pilot_decision.ranked_candidates { - pilot_scores.insert(ranked.node_id, ranked.score); - } - - // Merge scores - let mut merged: Vec<(NodeId, f32)> = candidates - .iter() - .map(|&node_id| { - let algo_score = scorer.score(tree, node_id); - let pilot_score = pilot_scores.get(&node_id).copied().unwrap_or(0.0); - - let final_score = if beta > 0.0 { - (alpha * algo_score + beta * pilot_score) / (alpha + beta) - } else { - algo_score - }; - - (node_id, final_score) - }) - .collect(); - - merged.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)); - - merged - } - - /// Core greedy search logic parameterized by start node. + /// Core search logic parameterized by start node. async fn search_impl( &self, tree: &DocumentTree, @@ -97,24 +43,22 @@ impl GreedySearch { let mut result = SearchResult::default(); let mut current_path = SearchPath::new(); let mut current_node = start_node; - let mut visited: std::collections::HashSet = std::collections::HashSet::new(); + let mut visited: HashSet = HashSet::new(); + let cache = PilotDecisionCache::new(); debug!( - "GreedySearch: query='{}', start_node={:?}, max_iterations={}, min_score={:.2}", + "PurePilotSearch: query='{}', start_node={:?}, max_iterations={}, min_score={:.2}", context.query, start_node, config.max_iterations, config.min_score ); - // Track Pilot interventions let mut pilot_interventions = 0; for iteration in 0..config.max_iterations { result.iterations = iteration + 1; - // Get children of current node let children = tree.children(current_node); if children.is_empty() { - // Leaf node - add to results current_path.leaf = Some(current_node); if !config.leaf_only || tree.is_leaf(current_node) { result.paths.push(current_path.clone()); @@ -122,48 +66,25 @@ impl GreedySearch { break; } - // ========== Pilot Integration Point ========== - let scored_children = if let Some(p) = pilot { - let state = SearchState::new( - tree, - &context.query, - ¤t_path.nodes, - &children, - &visited, - ); - - if p.should_intervene(&state) { - trace!( - "Pilot intervening at greedy decision point with {} candidates", - children.len() - ); - - match p.decide(&state).await { - decision => { - pilot_interventions += 1; - debug!( - "Pilot decision: confidence={}, direction={:?}", - decision.confidence, - std::mem::discriminant(&decision.direction) - ); - - self.merge_with_pilot_decision( - tree, - &children, - &decision, - &context.query, - ) - } - } - } else { - self.score_candidates_with_query(tree, &children, &context.query) - } - } else { - self.score_candidates_with_query(tree, &children, &context.query) - }; - // ============================================== + // Pilot as primary scorer (weight=1.0), NodeScorer as fallback. + // Always consult Pilot — no should_intervene guard. + let scored_children = score_candidates( + tree, + &children, + &context.query, + pilot, + ¤t_path.nodes, + &visited, + 1.0, // PurePilot: Pilot weight = 1.0 + Some(&cache), + ) + .await; + + if pilot.is_some() { + pilot_interventions += 1; + } - // Find the best child that meets minimum score + // Take only top-1 let mut best_child = None; let mut best_score = 0.0; @@ -178,7 +99,6 @@ impl GreedySearch { if let Some(child_id) = best_child { visited.insert(child_id); - // Record navigation step let child_node = tree.get(child_id); result.trace.push(NavigationStep { node_id: format!("{:?}", child_id), @@ -190,7 +110,6 @@ impl GreedySearch { depth: child_node.map(|n| n.depth).unwrap_or(0), }); - // Update path current_path = current_path.extend(child_id, best_score); current_node = child_id; result.nodes_visited += 1; @@ -199,7 +118,6 @@ impl GreedySearch { break; } } else { - // No good children found - add current path as result current_path.leaf = Some(current_node); if current_path.score > 0.0 { result.paths.push(current_path); @@ -209,19 +127,18 @@ impl GreedySearch { } result.pilot_interventions = pilot_interventions; - result } } -impl Default for GreedySearch { +impl Default for PurePilotSearch { fn default() -> Self { Self::new() } } #[async_trait] -impl SearchTree for GreedySearch { +impl SearchTree for PurePilotSearch { async fn search( &self, tree: &DocumentTree, @@ -246,6 +163,22 @@ impl SearchTree for GreedySearch { } fn name(&self) -> &'static str { - "greedy" + "pure_pilot" + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_pure_pilot_creation() { + let _search = PurePilotSearch::new(); + } + + #[test] + fn test_pure_pilot_default() { + let search = PurePilotSearch::default(); + assert_eq!(search.name(), "pure_pilot"); } } diff --git a/rust/src/retrieval/search/mcts.rs b/rust/src/retrieval/search/mcts.rs index 667a0d28..9663d686 100644 --- a/rust/src/retrieval/search/mcts.rs +++ b/rust/src/retrieval/search/mcts.rs @@ -1,19 +1,28 @@ // Copyright (c) 2026 vectorless developers // SPDX-License-Identifier: Apache-2.0 -//! Monte Carlo Tree Search (MCTS) algorithm with Pilot integration. +//! Monte Carlo Tree Search (MCTS) with Pilot-provided priors. //! -//! Balances exploration and exploitation using UCT formula. -//! When a Pilot is provided, it can provide semantic guidance at decision points. +//! Uses UCT (Upper Confidence Bound for Trees) to balance exploration +//! and exploitation. Pilot provides prior scores for the UCT formula, +//! and guides the simulation (rollout) phase. NodeScorer is the fallback +//! when Pilot is unavailable. +//! +//! # Async +//! +//! Both selection and simulation phases are async because Pilot.decide() +//! requires an LLM call. Pilot decisions are cached by (query, parent_node_id) +//! so repeated visits to the same node don't trigger redundant LLM calls. use async_trait::async_trait; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; +use tracing::debug; use super::super::RetrievalContext; use super::super::types::{NavigationDecision, NavigationStep, SearchPath}; -use super::scorer::NodeScorer; +use super::pilot_scorer::{PilotDecisionCache, score_candidates}; +use super::scorer::{NodeScorer, ScoringContext}; use super::{SearchConfig, SearchResult, SearchTree}; -use crate::config::StrategyConfig; use crate::document::{DocumentTree, NodeId}; use crate::retrieval::pilot::Pilot; @@ -26,12 +35,12 @@ struct NodeStats { total_score: f32, } -/// Monte Carlo Tree Search implementation. +/// MCTS search with Pilot integration. /// -/// Uses UCT (Upper Confidence Bound for Trees) to balance -/// exploration of new paths with exploitation of promising ones. +/// Pilot provides prior scores that seed the UCT formula. This gives +/// MCTS semantic guidance while preserving the exploration/exploitation +/// balance. NodeScorer is used as fallback when Pilot is unavailable. pub struct MctsSearch { - scorer: NodeScorer, /// Exploration constant for UCT. exploration_weight: f32, } @@ -39,14 +48,8 @@ pub struct MctsSearch { impl MctsSearch { /// Create a new MCTS search. pub fn new() -> Self { - Self::with_config(&StrategyConfig::default()) - } - - /// Create with configuration. - pub fn with_config(config: &StrategyConfig) -> Self { Self { - scorer: NodeScorer::new(Default::default()), - exploration_weight: config.exploration_weight, + exploration_weight: 1.414, // sqrt(2), classic UCT default } } @@ -57,9 +60,11 @@ impl MctsSearch { } /// Calculate UCT score for a child node. + /// + /// `prior_score` comes from Pilot (or NodeScorer fallback). fn uct_score(&self, child_stats: &NodeStats, parent_visits: usize, prior_score: f32) -> f32 { if child_stats.visits == 0 { - // Unvisited nodes get high priority + // Unvisited nodes get high priority + prior bonus return f32::INFINITY; } @@ -67,16 +72,23 @@ impl MctsSearch { let exploration = self.exploration_weight * (parent_visits as f32).ln().sqrt() / child_stats.visits as f32; - // Combine with prior score from scorer + // Blend exploitation with Pilot prior 0.5 * (exploitation + prior_score) + exploration } - /// Select best child using UCT. - fn select_child( + /// Select best child using UCT with Pilot priors. + /// + /// When Pilot is available, fetches priors via the cache. + /// Falls back to NodeScorer when Pilot is unavailable. + async fn select_child( &self, tree: &DocumentTree, + context: &RetrievalContext, node_id: NodeId, stats: &HashMap, + pilot: Option<&dyn Pilot>, + cache: &PilotDecisionCache, + visited: &HashSet, ) -> Option<(NodeId, f32)> { let children = tree.children(node_id); if children.is_empty() { @@ -86,28 +98,66 @@ impl MctsSearch { let parent_stats = stats.get(&node_id).cloned().unwrap_or_default(); let parent_visits = parent_stats.visits.max(1); + // Get Pilot priors for all children (cached) + let priors = score_candidates( + tree, + &children, + &context.query, + pilot, + &[node_id], // simplified path for UCT context + visited, + 0.5, // MCTS prior: balanced Pilot/Scorer + Some(cache), + ) + .await; + + // Build prior map + let prior_map: HashMap = priors.into_iter().collect(); + let mut best_child = None; let mut best_score = f32::NEG_INFINITY; for &child_id in &children { - let prior_score = self.scorer.score(tree, child_id); + let prior = prior_map.get(&child_id).copied().unwrap_or_else(|| { + let scorer = NodeScorer::new(ScoringContext::new(&context.query)); + scorer.score(tree, child_id) + }); let child_stats = stats.get(&child_id).cloned().unwrap_or_default(); - let uct = self.uct_score(&child_stats, parent_visits, prior_score); + let uct = self.uct_score(&child_stats, parent_visits, prior); if uct > best_score { best_score = uct; - best_child = Some((child_id, prior_score)); + best_child = Some((child_id, prior)); } } best_child } - /// Simulate a random rollout from a node. - fn simulate(&self, tree: &DocumentTree, node_id: NodeId, max_depth: usize) -> f32 { + /// Simulate a rollout from a node using Pilot-guided greedy descent. + /// + /// When Pilot is available, each layer picks the top-1 Pilot-scored child. + /// Falls back to NodeScorer when Pilot is unavailable. + async fn simulate( + &self, + tree: &DocumentTree, + context: &RetrievalContext, + node_id: NodeId, + max_depth: usize, + pilot: Option<&dyn Pilot>, + cache: &PilotDecisionCache, + visited: &HashSet, + ) -> f32 { let mut current = node_id; let mut depth = 0; - let mut total_score = self.scorer.score(tree, current); + let mut path = vec![node_id]; + let mut total_score = 0.0f32; + let mut count = 0; + + // Initial score + let scorer = NodeScorer::new(ScoringContext::new(&context.query)); + total_score += scorer.score(tree, current); + count += 1; while depth < max_depth { let children = tree.children(current); @@ -115,18 +165,31 @@ impl MctsSearch { break; } - // Random selection (or use scorer for semi-random) - let scored = self.scorer.score_and_sort(tree, &children); - if let Some((child_id, score)) = scored.first() { + // Use Pilot for greedy descent (cached) + let scored = score_candidates( + tree, + &children, + &context.query, + pilot, + &path, + visited, + 0.5, // MCTS simulation: balanced + Some(cache), + ) + .await; + + if let Some(&(child_id, score)) = scored.first() { total_score += score; - current = *child_id; + path.push(child_id); + current = child_id; } else { break; } depth += 1; + count += 1; } - total_score / (depth + 1).max(1) as f32 + total_score / count.max(1) as f32 } /// Backpropagate score up the tree. @@ -137,43 +200,48 @@ impl MctsSearch { node_stats.total_score += score; } } -} - -impl Default for MctsSearch { - fn default() -> Self { - Self::new() - } -} -#[async_trait] -impl SearchTree for MctsSearch { - async fn search( + /// Core MCTS logic parameterized by start node. + async fn search_impl( &self, tree: &DocumentTree, context: &RetrievalContext, config: &SearchConfig, - _pilot: Option<&dyn Pilot>, + pilot: Option<&dyn Pilot>, + start_node: NodeId, ) -> SearchResult { - // Note: Pilot integration for MCTS can be added in Phase 2 - // For now, we keep the original behavior let mut result = SearchResult::default(); let mut stats: HashMap = HashMap::new(); - let root = tree.root(); + let cache = PilotDecisionCache::new(); + let visited: HashSet = HashSet::new(); // Initialize root stats - stats.insert(root, NodeStats::default()); + stats.insert(start_node, NodeStats::default()); + + debug!( + "MctsSearch: query='{}', start_node={:?}, max_iterations={}, exploration={:.2}", + context.query, start_node, config.max_iterations, self.exploration_weight + ); + + let mut pilot_interventions = 0; for iteration in 0..config.max_iterations { result.iterations = iteration + 1; - // Selection phase - traverse tree using UCT - let mut path = vec![root]; - let mut current = root; + // === Selection phase: traverse tree using UCT === + let mut path = vec![start_node]; + let mut current = start_node; while !tree.is_leaf(current) { - if let Some((child_id, score)) = self.select_child(tree, current, &stats) { + if let Some((child_id, _score)) = self + .select_child(tree, context, current, &stats, pilot, &cache, &visited) + .await + { path.push(child_id); current = child_id; + if pilot.is_some() { + pilot_interventions += 1; + } } else { break; } @@ -181,14 +249,20 @@ impl SearchTree for MctsSearch { result.nodes_visited += path.len(); - // Simulation phase - random rollout - let leaf = *path.last().unwrap_or(&root); - let sim_score = self.simulate(tree, leaf, 5); + // === Simulation phase: Pilot-guided rollout === + let leaf = *path.last().unwrap_or(&start_node); + let sim_score = self + .simulate(tree, context, leaf, 5, pilot, &cache, &visited) + .await; + + if pilot.is_some() { + pilot_interventions += 1; + } - // Backpropagation phase + // === Backpropagation phase === self.backpropagate(&mut stats, &path, sim_score); - // Record trace for the last node in path + // Record trace for the last node if let Some(&last_id) = path.last() { let node = tree.get(last_id); result.trace.push(NavigationStep { @@ -200,67 +274,139 @@ impl SearchTree for MctsSearch { }); } - // Check if we have enough visits to extract paths + // Periodically extract paths (every 10 iterations) if iteration > 0 && iteration % 10 == 0 { - // Extract best paths from visited nodes - let root_children = tree.children(root); - let mut scored_children: Vec<_> = root_children - .iter() - .filter_map(|&child_id| { - stats.get(&child_id).map(|s| { - let avg_score = if s.visits > 0 { - s.total_score / s.visits as f32 - } else { - 0.0 - }; - (child_id, avg_score) - }) - }) - .collect(); - - scored_children - .sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)); - - for (child_id, score) in scored_children.iter().take(config.top_k) { - if *score >= config.min_score { - result.paths.push(SearchPath::from_node(*child_id, *score)); - } - } + self.extract_paths( + tree, + start_node, + &stats, + config.min_score, + config.top_k, + &mut result, + ); } } // Final extraction of best paths + self.extract_paths( + tree, + start_node, + &stats, + config.min_score, + config.top_k, + &mut result, + ); + + result.pilot_interventions = pilot_interventions; + result + } + + /// Extract best paths from MCTS statistics. + fn extract_paths( + &self, + tree: &DocumentTree, + root: NodeId, + stats: &HashMap, + min_score: f32, + top_k: usize, + result: &mut SearchResult, + ) { let root_children = tree.children(root); - let mut final_paths: Vec<_> = root_children + let mut scored_children: Vec<_> = root_children .iter() .filter_map(|&child_id| { stats.get(&child_id).map(|s| { let avg_score = if s.visits > 0 { s.total_score / s.visits as f32 } else { - self.scorer.score(tree, child_id) + 0.0 }; - SearchPath::from_node(child_id, avg_score) + (child_id, avg_score) }) }) .collect(); - final_paths.sort_by(|a, b| { - b.score - .partial_cmp(&a.score) - .unwrap_or(std::cmp::Ordering::Equal) - }); - final_paths.truncate(config.top_k); + scored_children.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)); - result.paths = final_paths + // Clear existing paths and re-extract + result.paths = scored_children .into_iter() - .filter(|p| p.score >= config.min_score) + .filter(|(_, score)| *score >= min_score) + .take(top_k) + .map(|(node_id, score)| SearchPath::from_node(node_id, score)) .collect(); + } +} - result +impl Default for MctsSearch { + fn default() -> Self { + Self::new() + } +} + +#[async_trait] +impl SearchTree for MctsSearch { + async fn search( + &self, + tree: &DocumentTree, + context: &RetrievalContext, + config: &SearchConfig, + pilot: Option<&dyn Pilot>, + ) -> SearchResult { + self.search_impl(tree, context, config, pilot, tree.root()) + .await + } + + async fn search_from( + &self, + tree: &DocumentTree, + context: &RetrievalContext, + config: &SearchConfig, + pilot: Option<&dyn Pilot>, + start_node: NodeId, + ) -> SearchResult { + self.search_impl(tree, context, config, pilot, start_node) + .await } fn name(&self) -> &'static str { "mcts" } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_mcts_creation() { + let search = MctsSearch::new(); + assert!((search.exploration_weight - 1.414).abs() < 0.01); + } + + #[test] + fn test_mcts_custom_exploration() { + let search = MctsSearch::new().with_exploration(2.0); + assert!((search.exploration_weight - 2.0).abs() < 0.01); + } + + #[test] + fn test_uct_unvisited() { + let search = MctsSearch::new(); + let stats = NodeStats::default(); + let score = search.uct_score(&stats, 10, 0.5); + assert!(score.is_infinite()); + } + + #[test] + fn test_uct_visited() { + let search = MctsSearch::new(); + let stats = NodeStats { + visits: 5, + total_score: 3.0, + }; + let score = search.uct_score(&stats, 20, 0.8); + assert!(score.is_finite()); + assert!(score > 0.0); + } +} diff --git a/rust/src/retrieval/search/mod.rs b/rust/src/retrieval/search/mod.rs index 39e78cc3..cceec5e4 100644 --- a/rust/src/retrieval/search/mod.rs +++ b/rust/src/retrieval/search/mod.rs @@ -7,12 +7,14 @@ mod beam; mod bm25; mod greedy; mod mcts; +mod pilot_scorer; mod scorer; mod toc_navigator; mod r#trait; pub use beam::BeamSearch; pub use bm25::{Bm25Engine, Bm25Params, FieldDocument, STOPWORDS, extract_keywords}; -pub use greedy::GreedySearch; +pub use greedy::PurePilotSearch; +pub use mcts::MctsSearch; pub use toc_navigator::{SearchCue, ToCNavigator}; pub use r#trait::{SearchConfig, SearchResult, SearchTree}; diff --git a/rust/src/retrieval/search/pilot_scorer.rs b/rust/src/retrieval/search/pilot_scorer.rs new file mode 100644 index 00000000..22db9805 --- /dev/null +++ b/rust/src/retrieval/search/pilot_scorer.rs @@ -0,0 +1,189 @@ +// Copyright (c) 2026 vectorless developers +// SPDX-License-Identifier: Apache-2.0 + +//! Shared Pilot-as-primary scoring helper. +//! +//! All three search algorithms (PurePilot, Beam, MCTS) use this module +//! to score child candidates. Pilot is the primary scorer; NodeScorer +//! provides a fallback when Pilot is unavailable or budget is exhausted. +//! +//! # Caching +//! +//! Pilot decisions are cached by `(query, parent_node_id)` to avoid +//! redundant LLM calls when the same node is revisited (e.g. MCTS +//! selection phase revisits a node multiple times). + +use std::collections::{HashMap, HashSet}; +use std::sync::Arc; +use tokio::sync::Mutex; + +use super::scorer::{NodeScorer, ScoringContext}; +use crate::document::{DocumentTree, NodeId}; +use crate::retrieval::pilot::{Pilot, PilotDecision, SearchState}; + +/// Cache key: (query_fingerprint, parent_node_id). +type CacheKey = (u64, NodeId); + +/// Shared Pilot decision cache. +/// +/// Thread-safe, query-scoped cache that stores Pilot decisions keyed by +/// (query hash, parent node ID). Prevents redundant LLM calls when the +/// same (query, node) pair is scored multiple times (common in MCTS). +#[derive(Debug, Clone, Default)] +pub struct PilotDecisionCache { + inner: Arc>>, +} + +impl PilotDecisionCache { + /// Create a new empty cache. + pub fn new() -> Self { + Self::default() + } + + /// Compute cache key from query and parent node. + fn cache_key(query: &str, parent: NodeId) -> CacheKey { + use std::hash::{Hash, Hasher}; + let mut hasher = std::collections::hash_map::DefaultHasher::new(); + query.hash(&mut hasher); + (hasher.finish(), parent) + } + + /// Try to get a cached decision. + pub async fn get(&self, query: &str, parent: NodeId) -> Option { + let key = Self::cache_key(query, parent); + let cache = self.inner.lock().await; + cache.get(&key).cloned() + } + + /// Store a decision in the cache. + pub async fn put(&self, query: &str, parent: NodeId, decision: &PilotDecision) { + let key = Self::cache_key(query, parent); + let mut cache = self.inner.lock().await; + cache.entry(key).or_insert_with(|| decision.clone()); + } + + /// Clear the cache. + pub async fn clear(&self) { + self.inner.lock().await.clear(); + } +} + +/// Score child candidates using Pilot as primary, NodeScorer as fallback. +/// +/// Pilot decisions are cached by (query, parent_node_id). Subsequent calls +/// with the same arguments return cached results without LLM calls. +/// +/// `pilot_weight` controls how much Pilot vs NodeScorer contributes: +/// - 1.0 = PurePilot (only Pilot scores matter) +/// - 0.7 = Beam (Pilot dominant, NodeScorer as secondary) +/// - 0.5 = MCTS prior (balanced) +pub async fn score_candidates( + tree: &DocumentTree, + candidates: &[NodeId], + query: &str, + pilot: Option<&dyn Pilot>, + path: &[NodeId], + visited: &HashSet, + pilot_weight: f32, + cache: Option<&PilotDecisionCache>, +) -> Vec<(NodeId, f32)> { + if candidates.is_empty() { + return Vec::new(); + } + + // If no Pilot, pure NodeScorer + let Some(p) = pilot else { + return score_with_scorer(tree, candidates, query); + }; + + if !p.is_active() { + return score_with_scorer(tree, candidates, query); + } + + // Determine parent node (last in path) for cache key + let parent = path.last().copied().unwrap_or(tree.root()); + + // Check cache first + let decision = if let Some(c) = cache { + if let Some(cached) = c.get(query, parent).await { + tracing::trace!("Pilot cache hit for parent={:?}", parent); + cached + } else { + let state = SearchState::new(tree, query, path, candidates, visited); + let d = p.decide(&state).await; + c.put(query, parent, &d).await; + d + } + } else { + let state = SearchState::new(tree, query, path, candidates, visited); + p.decide(&state).await + }; + + // Build Pilot score map + let mut pilot_scores: HashMap = HashMap::new(); + for ranked in &decision.ranked_candidates { + pilot_scores.insert(ranked.node_id, ranked.score); + } + + // Compute NodeScorer fallback scores + let scorer_weight = 1.0 - pilot_weight; + let confidence = decision.confidence; + let effective_pilot = pilot_weight * confidence; + + let scorer = NodeScorer::new(ScoringContext::new(query)); + + let mut scored: Vec<(NodeId, f32)> = candidates + .iter() + .map(|&node_id| { + let algo_score = scorer.score(tree, node_id); + let p_score = pilot_scores.get(&node_id).copied().unwrap_or(0.0); + + let final_score = if effective_pilot > 0.0 && pilot_scores.contains_key(&node_id) { + (effective_pilot * p_score + scorer_weight * algo_score) + / (effective_pilot + scorer_weight) + } else { + algo_score + }; + + (node_id, final_score) + }) + .collect(); + + scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)); + scored +} + +/// Pure NodeScorer fallback. +fn score_with_scorer( + tree: &DocumentTree, + candidates: &[NodeId], + query: &str, +) -> Vec<(NodeId, f32)> { + let scorer = NodeScorer::new(ScoringContext::new(query)); + scorer.score_and_sort(tree, candidates) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::document::TreeNode; + use indextree::Arena; + + /// Helper to create a NodeId from an Arena for tests. + fn make_node_id(arena: &mut Arena) -> NodeId { + NodeId(arena.new_node(TreeNode::default())) + } + + #[test] + fn test_cache_key_deterministic() { + let mut arena = Arena::new(); + let nid = make_node_id(&mut arena); + + let key1 = PilotDecisionCache::cache_key("hello", nid); + let key2 = PilotDecisionCache::cache_key("hello", nid); + assert_eq!(key1, key2); + + let key3 = PilotDecisionCache::cache_key("world", nid); + assert_ne!(key1, key3); + } +} diff --git a/rust/src/retrieval/search/toc_navigator.rs b/rust/src/retrieval/search/toc_navigator.rs index ae156a21..778b5da2 100644 --- a/rust/src/retrieval/search/toc_navigator.rs +++ b/rust/src/retrieval/search/toc_navigator.rs @@ -181,13 +181,10 @@ impl ToCNavigator { for &node_id in top_level_nodes { if let Some(node) = tree.get(node_id) { - let text = format!("{} {} {}", node.title, node.summary, node.content) - .to_lowercase(); + let text = + format!("{} {} {}", node.title, node.summary, node.content).to_lowercase(); - let match_count = query_words - .iter() - .filter(|w| text.contains(*w)) - .count(); + let match_count = query_words.iter().filter(|w| text.contains(*w)).count(); let mut score = if query_words.is_empty() { 0.0 @@ -234,10 +231,7 @@ impl ToCNavigator { return Vec::new(); } - scored.sort_by(|a, b| { - b.1.partial_cmp(&a.1) - .unwrap_or(std::cmp::Ordering::Equal) - }); + scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)); scored.truncate(self.max_branches); if !scored.is_empty() { @@ -367,7 +361,9 @@ Rules: } if cues.is_empty() { - warn!("LLM refinement returned no valid candidates, falling back to summary matching"); + warn!( + "LLM refinement returned no valid candidates, falling back to summary matching" + ); let summary_cues = self.match_by_summary(query, tree, top_level_nodes); if summary_cues.is_empty() { return vec![SearchCue { @@ -386,7 +382,10 @@ Rules: cues } Err(e) => { - warn!("LLM refinement failed: {}, falling back to summary matching", e); + warn!( + "LLM refinement failed: {}, falling back to summary matching", + e + ); // Don't fall directly to root — try summary matching first let summary_cues = self.match_by_summary(query, tree, top_level_nodes); if summary_cues.is_empty() { diff --git a/rust/src/retrieval/stages/analyze.rs b/rust/src/retrieval/stages/analyze.rs index c3928574..34d93352 100644 --- a/rust/src/retrieval/stages/analyze.rs +++ b/rust/src/retrieval/stages/analyze.rs @@ -144,8 +144,12 @@ impl AnalyzeStage { self } - /// Enable query decomposition with LLM client. + /// Enable query decomposition and LLM-based complexity detection. pub fn with_llm_client(mut self, client: crate::llm::LlmClient) -> Self { + // Use LLM client for complexity detection + self.complexity_detector = + ComplexityDetector::with_llm_client(client.clone()); + // Also enable query decomposition if self.query_decomposer.is_none() { self.query_decomposer = Some(QueryDecomposer::new(DecompositionConfig::default()).with_llm_client(client)); @@ -356,8 +360,8 @@ impl RetrievalStage for AnalyzeStage { async fn execute(&self, ctx: &mut PipelineContext) -> crate::error::Result { info!("Analyzing query: '{}'", ctx.query); - // 1. Detect complexity - ctx.complexity = Some(self.complexity_detector.detect(&ctx.query)); + // 1. Detect complexity (LLM-based when available, heuristic fallback) + ctx.complexity = Some(self.complexity_detector.detect(&ctx.query).await); info!("Query complexity: {:?}", ctx.complexity); // 2. Extract keywords diff --git a/rust/src/retrieval/stages/evaluate.rs b/rust/src/retrieval/stages/evaluate.rs index 972f9667..d0d51bef 100644 --- a/rust/src/retrieval/stages/evaluate.rs +++ b/rust/src/retrieval/stages/evaluate.rs @@ -137,7 +137,8 @@ impl EvaluateStage { let node_content = self.build_node_content(&ctx.tree, candidate.node_id); // Cache for build_response reuse - ctx.node_content_cache.insert(candidate.node_id, node_content.clone()); + ctx.node_content_cache + .insert(candidate.node_id, node_content.clone()); // Add to aggregated content if !node_content.is_empty() { @@ -252,11 +253,7 @@ impl EvaluateStage { _ => { // Cache miss (edge case): compute inline let built = self.build_node_content(&ctx.tree, candidate.node_id); - if built.is_empty() { - None - } else { - Some(built) - } + if built.is_empty() { None } else { Some(built) } } } } else { diff --git a/rust/src/retrieval/stages/plan.rs b/rust/src/retrieval/stages/plan.rs index 1f3b9cba..4442551c 100644 --- a/rust/src/retrieval/stages/plan.rs +++ b/rust/src/retrieval/stages/plan.rs @@ -5,7 +5,7 @@ //! //! This stage selects: //! - Retrieval strategy (Keyword/Semantic/LLM) -//! - Search algorithm (Greedy/Beam/MCTS) +//! - Search algorithm (PurePilot/Beam/MCTS) //! - Search configuration use async_trait::async_trait; @@ -121,17 +121,16 @@ impl PlanStage { let algorithm = match complexity { QueryComplexity::Simple => { - // Simple queries can use greedy search - SearchAlgorithm::Greedy + // Simple queries: PurePilot (beam=1, fast) + SearchAlgorithm::PurePilot } QueryComplexity::Medium => { - // Medium queries benefit from beam search + // Medium queries: Beam search SearchAlgorithm::Beam } QueryComplexity::Complex => { - // Complex queries may benefit from MCTS - // But for now, use beam search as MCTS is more expensive - SearchAlgorithm::Beam + // Complex queries: MCTS for thorough exploration + SearchAlgorithm::Mcts } }; @@ -144,7 +143,7 @@ impl PlanStage { let complexity = ctx.complexity.unwrap_or(QueryComplexity::Medium); let (beam_width, max_depth) = match complexity { - QueryComplexity::Simple => (1, 5), // Greedy-like + QueryComplexity::Simple => (1, 5), // PurePilot-like QueryComplexity::Medium => (ctx.options.beam_width, 10), QueryComplexity::Complex => (ctx.options.beam_width + 2, 15), }; @@ -188,6 +187,20 @@ impl RetrievalStage for PlanStage { // 3. Build search config ctx.search_config = Some(self.build_search_config(ctx)); + // 4. Build fallback chain: primary algorithm first, then alternatives + // The chain determines which algorithms to try if the primary + // doesn't produce results above min_score. + let primary = ctx.selected_algorithm.unwrap_or(SearchAlgorithm::Beam); + let mut chain = vec![primary]; + for name in &ctx.options.fallback_chain { + if let Some(algo) = SearchAlgorithm::from_name(name) { + if algo != primary { + chain.push(algo); + } + } + } + ctx.search_fallback_chain = chain; + info!( "Plan complete: strategy={:?}, algorithm={:?}, beam_width={}", ctx.selected_strategy, diff --git a/rust/src/retrieval/stages/search.rs b/rust/src/retrieval/stages/search.rs index fcef9052..8f431dec 100644 --- a/rust/src/retrieval/stages/search.rs +++ b/rust/src/retrieval/stages/search.rs @@ -23,7 +23,8 @@ use crate::retrieval::pipeline::{ }; use crate::retrieval::search::extract_keywords; use crate::retrieval::search::{ - BeamSearch, GreedySearch, SearchConfig as SearchAlgConfig, SearchCue, SearchTree, ToCNavigator, + BeamSearch, MctsSearch, PurePilotSearch, SearchConfig as SearchAlgConfig, SearchCue, + SearchTree, ToCNavigator, }; use crate::retrieval::strategy::{ HybridConfig, HybridStrategy, KeywordStrategy, LlmStrategy, RetrievalStrategy, @@ -211,14 +212,106 @@ impl SearchStage { vec![ctx.query.clone()] } - /// Run search across all queries and cues, collecting and deduplicating results. + /// Run search across the fallback chain. + /// + /// Iterates through algorithms in the fallback chain. After each algorithm, + /// checks if the best candidate score meets `min_score`. If sufficient, + /// returns early. Otherwise tries the next algorithm in the chain. async fn run_search( &self, ctx: &mut PipelineContext, queries: &[String], cues: &[SearchCue], ) -> (Vec, Vec) { - let algorithm = ctx.selected_algorithm.unwrap_or(SearchAlgorithm::Beam); + let config = ctx.search_config.clone().unwrap_or_default(); + let min_score = config.min_score; + + // Build fallback chain: primary algorithm first, then remaining from chain + let primary = ctx.selected_algorithm.unwrap_or(SearchAlgorithm::Beam); + let chain = &ctx.search_fallback_chain; + + // Build ordered algorithm list: primary first, then chain (excluding primary) + let mut algorithms = vec![primary]; + for &algo in chain { + if algo != primary { + algorithms.push(algo); + } + } + + info!( + "Search fallback chain: {:?} (min_score={:.2})", + algorithms.iter().map(|a| a.name()).collect::>(), + min_score + ); + + let mut best_paths = Vec::new(); + let mut best_candidates = Vec::new(); + let mut total_pilot_interventions = 0u64; + + for (idx, &algorithm) in algorithms.iter().enumerate() { + let (paths, candidates) = self + .run_single_algorithm(ctx, queries, cues, algorithm) + .await; + + // Accumulate pilot interventions + total_pilot_interventions += paths.len() as u64; // approximate + + // Merge results: collect all paths and candidates across fallback rounds + best_paths.extend(paths); + best_candidates.extend(candidates); + + // Check if best candidate meets the threshold + let best_score = best_candidates + .iter() + .map(|c| c.score) + .fold(0.0f32, f32::max); + + if best_score >= min_score { + info!( + "Algorithm {} (#{}) sufficient: best_score={:.3} >= min_score={:.3}", + algorithm.name(), + idx + 1, + best_score, + min_score + ); + break; + } + + info!( + "Algorithm {} (#{}) insufficient: best_score={:.3} < min_score={:.3}, trying next", + algorithm.name(), + idx + 1, + best_score, + min_score + ); + } + + // Deduplicate candidates by node_id, keeping highest score + best_candidates.sort_by(|a, b| { + b.score + .partial_cmp(&a.score) + .unwrap_or(std::cmp::Ordering::Equal) + }); + best_candidates.dedup_by(|a, b| a.node_id == b.node_id); + + info!( + "Search complete: {} paths, {} candidates (pilot interventions: {})", + best_paths.len(), + best_candidates.len(), + total_pilot_interventions + ); + + (best_paths, best_candidates) + } + + /// Run a single search algorithm across all queries and cues. + async fn run_single_algorithm( + &self, + ctx: &mut PipelineContext, + queries: &[String], + cues: &[SearchCue], + algorithm: SearchAlgorithm, + ) -> (Vec, Vec) { let config = ctx.search_config.clone().unwrap_or_default(); let search_config = SearchAlgConfig { @@ -232,7 +325,6 @@ impl SearchStage { let pilot_ref: Option<&dyn Pilot> = self.pilot.as_deref(); let mut all_paths = Vec::new(); - let mut total_pilot_interventions = 0u64; for query in queries { let legacy_ctx = @@ -240,13 +332,16 @@ impl SearchStage { for cue in cues { debug!( - "Searching: algorithm={:?}, query='{}', cue.root={:?}, cue.confidence={:.3}", - algorithm, query, cue.root, cue.confidence + "Searching: algorithm={}, query='{}', cue.root={:?}, cue.confidence={:.3}", + algorithm.name(), + query, + cue.root, + cue.confidence ); let result = match algorithm { - SearchAlgorithm::Greedy => { - GreedySearch::new() + SearchAlgorithm::PurePilot => { + PurePilotSearch::new() .search_from( &ctx.tree, &legacy_ctx, @@ -267,9 +362,8 @@ impl SearchStage { ) .await } - // MCTS is not truly implemented — falls back to Beam behavior. SearchAlgorithm::Mcts => { - BeamSearch::new() + MctsSearch::new() .search_from( &ctx.tree, &legacy_ctx, @@ -282,28 +376,11 @@ impl SearchStage { }; all_paths.extend(result.paths); - total_pilot_interventions += result.pilot_interventions as u64; } } - let mut all_candidates = self.extract_candidates(&all_paths, &ctx.tree); - - // Deduplicate by node_id, keeping the highest-scored entry - all_candidates.sort_by(|a, b| { - b.score - .partial_cmp(&a.score) - .unwrap_or(std::cmp::Ordering::Equal) - }); - all_candidates.dedup_by(|a, b| a.node_id == b.node_id); - - info!( - "Search complete: {} paths, {} candidates (pilot interventions: {})", - all_paths.len(), - all_candidates.len(), - total_pilot_interventions - ); - - (all_paths, all_candidates) + let candidates = self.extract_candidates(&all_paths, &ctx.tree); + (all_paths, candidates) } /// Check if a query is asking for a document summary/overview. @@ -333,7 +410,9 @@ impl SearchStage { // Phrase patterns — match with intervening words removed. // "what is this project about" → remove common filler words, check for "what is this about" - let filler_words = ["project", "document", "file", "paper", "article", "text", "book", "the", "a", "an"]; + let filler_words = [ + "project", "document", "file", "paper", "article", "text", "book", "the", "a", "an", + ]; let cleaned: String = lower .split_whitespace() .filter(|w| !filler_words.contains(w)) @@ -597,10 +676,9 @@ impl RetrievalStage for SearchStage { for cue in &mut cues { if let Some(node) = ctx.tree.get(cue.root) { let node_path = node.title.as_str(); - if let Some((_, cached_conf)) = l2_paths - .iter() - .find(|(path, _)| node_path.contains(path.as_str()) || path.contains(node_path)) - { + if let Some((_, cached_conf)) = l2_paths.iter().find(|(path, _)| { + node_path.contains(path.as_str()) || path.contains(node_path) + }) { // Blend current confidence with historical: 60% current + 40% cached cue.confidence = cue.confidence * 0.6 + cached_conf * 0.4; debug!( @@ -692,9 +770,7 @@ impl RetrievalStage for SearchStage { for candidate in &mut ctx.candidates { if let Some(node) = ctx.tree.get(candidate.node_id) { let content_fp = crate::utils::fingerprint::Fingerprint::from_str(&node.content); - if let Some((cached_score, _strategy)) = - ctx.reasoning_cache.l3_get(&content_fp) - { + if let Some((cached_score, _strategy)) = ctx.reasoning_cache.l3_get(&content_fp) { // Blend: if L3 has a higher score for this node, boost it if cached_score > candidate.score { candidate.score = (candidate.score + cached_score) / 2.0; diff --git a/rust/src/retrieval/strategy/cross_document.rs b/rust/src/retrieval/strategy/cross_document.rs index 40871057..c296ec24 100644 --- a/rust/src/retrieval/strategy/cross_document.rs +++ b/rust/src/retrieval/strategy/cross_document.rs @@ -252,9 +252,7 @@ impl CrossDocumentStrategy { .collect(); for node_id in high_score_nodes { - let depth_results = self - .search_subtree(&doc.tree, node_id, context, 0, 2) - .await; + let depth_results = self.search_subtree(&doc.tree, node_id, context, 0, 2).await; scored_nodes.extend(depth_results); } @@ -289,44 +287,43 @@ impl CrossDocumentStrategy { context: &'a RetrievalContext, current_depth: usize, max_depth: usize, - ) -> std::pin::Pin> + Send + 'a>> { + ) -> std::pin::Pin< + Box> + Send + 'a>, + > { Box::pin(async move { - if current_depth >= max_depth { - return Vec::new(); - } + if current_depth >= max_depth { + return Vec::new(); + } - let children = tree.children(parent_id); - if children.is_empty() { - return Vec::new(); - } + let children = tree.children(parent_id); + if children.is_empty() { + return Vec::new(); + } - let evaluations = self - .inner - .evaluate_nodes(tree, &children, context) - .await; + let evaluations = self.inner.evaluate_nodes(tree, &children, context).await; - let mut results = Vec::new(); - let mut explore_further = Vec::new(); + let mut results = Vec::new(); + let mut explore_further = Vec::new(); - for (node_id, eval) in children.into_iter().zip(evaluations.into_iter()) { - if eval.score >= self.config.min_score { - results.push((node_id, eval.clone())); - } - // Only explore deeper if score is promising - if eval.score >= self.config.min_score * 1.5 { - explore_further.push(node_id); + for (node_id, eval) in children.into_iter().zip(evaluations.into_iter()) { + if eval.score >= self.config.min_score { + results.push((node_id, eval.clone())); + } + // Only explore deeper if score is promising + if eval.score >= self.config.min_score * 1.5 { + explore_further.push(node_id); + } } - } - // Recurse into promising children - for child_id in explore_further { - let deeper = self - .search_subtree(tree, child_id, context, current_depth + 1, max_depth) - .await; - results.extend(deeper); - } + // Recurse into promising children + for child_id in explore_further { + let deeper = self + .search_subtree(tree, child_id, context, current_depth + 1, max_depth) + .await; + results.extend(deeper); + } - results + results }) } diff --git a/rust/src/retrieval/strategy/llm.rs b/rust/src/retrieval/strategy/llm.rs index 41cd8987..e22b8b43 100644 --- a/rust/src/retrieval/strategy/llm.rs +++ b/rust/src/retrieval/strategy/llm.rs @@ -240,10 +240,7 @@ Rules: let toc = self.toc_view.generate_from(tree, node_ids[0]); let toc_markdown = self.toc_view.format_markdown(&toc); let toc_preview: String = toc_markdown.chars().take(800).collect(); - format!( - "\n\nDocument ToC:\n{}\n", - toc_preview - ) + format!("\n\nDocument ToC:\n{}\n", toc_preview) } else { String::new() }; diff --git a/rust/src/retrieval/types.rs b/rust/src/retrieval/types.rs index 1c99e79c..a559912c 100644 --- a/rust/src/retrieval/types.rs +++ b/rust/src/retrieval/types.rs @@ -125,6 +125,11 @@ pub struct RetrieveOptions { /// Cross-document graph for graph-aware retrieval boosting. pub document_graph: Option>, + + /// Search fallback chain: algorithm names tried in order until min_score is met. + /// Options: "beam", "mcts", "pure_pilot". + /// Default: ["beam", "mcts", "pure_pilot"] + pub fallback_chain: Vec, } impl Default for RetrieveOptions { @@ -145,6 +150,7 @@ impl Default for RetrieveOptions { use_async_context: false, streaming: false, document_graph: None, + fallback_chain: vec!["beam".into(), "mcts".into(), "pure_pilot".into()], } } } @@ -263,6 +269,16 @@ impl RetrieveOptions { self.document_graph = Some(graph); self } + + /// Set the search fallback chain. + /// + /// Algorithm names: "beam", "mcts", "pure_pilot". + /// Primary algorithm is prepended automatically by the Plan stage. + #[must_use] + pub fn with_fallback_chain(mut self, chain: Vec) -> Self { + self.fallback_chain = chain; + self + } } /// A single retrieval result. diff --git a/samples/083a0e39-5c92-404b-9fb7-8458152dd65f.bin b/samples/083a0e39-5c92-404b-9fb7-8458152dd65f.bin new file mode 100644 index 00000000..d8b3841e --- /dev/null +++ b/samples/083a0e39-5c92-404b-9fb7-8458152dd65f.bin @@ -0,0 +1 @@ +{"version":1,"checksum":"df9576fe0cb0f42948a619b0352f6fbd3c647258317ff2ffe5b170baec6eb302","payload":{"meta":{"content_fingerprint":"zsbpiOWjNlqXOyJuG/CAgQ==","created_at":"2026-04-13T15:19:05.175062664Z","description":"","format":"pdf","id":"083a0e39-5c92-404b-9fb7-8458152dd65f","line_count":null,"logic_fingerprint":"4p/tkAx4Dcrk805539ue0Q==","modified_at":"2026-04-13T15:19:05.178281613Z","name":"Docker_Cheat_Sheet","node_count":7,"page_count":null,"processing_duration_ms":99141,"processing_version":0,"source_path":"/home/ztgx/Desktop/vectorless/samples/Docker_Cheat_Sheet.pdf","total_summary_tokens":378},"pages":[],"reasoning_index":{"config_hash":0,"hot_nodes":{},"section_map":{"1":{"index1":2,"stamp":0},"docker cheat sheet":{"index1":2,"stamp":0}},"summary_shortcut":{"document_summary":"Docker Cheat Sheet: This cheat sheet provides a quick reference for essential Docker and Docker Compose command-line operations. It covers core topics including container process management, image and repository handling, volume and port mapping, and system troubleshooting. Use this guide to quickly locate commands for building, running, and managing Dockerized environments.","root_node":{"index1":1,"stamp":0},"section_summaries":[{"depth":1,"node_id":{"index1":2,"stamp":0},"summary":"This cheat sheet provides a quick reference for essential Docker and Docker Compose command-line operations. It covers core topics including container process management, image and repository handling, volume and port mapping, and system troubleshooting. Use this guide to quickly locate commands for building, running, and managing Dockerized environments.","title":"Docker Cheat Sheet"}]},"topic_paths":{"active":[{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0}],"additionally":[{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0}],"advanced":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"alongside":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0}],"analysis":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"applications":[{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0}],"basic":[{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"brief":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"building":[{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0}],"categories":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0}],"cheat":[{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0},{"depth":0,"node_id":{"index1":1,"stamp":0},"weight":0.5714285969734192},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":0.4285714626312256},{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":0.4285714626312256}],"cli":[{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0}],"command":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0},{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0}],"commands":[{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":0.5},{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":0.5},{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":0.25},{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":0.25},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":0.25}],"complete":[{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0}],"compose":[{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":0.8571429252624512},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":0.4285714626312256},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":0.4285714626312256},{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":0.4285714626312256},{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":0.4285714626312256}],"comprehensive":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"consumption":[{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0}],"container":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":0.6666666865348816},{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":0.6666666865348816},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":0.6666666865348816},{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":0.3333333432674408}],"containerized":[{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0}],"containers":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0}],"copying":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0}],"core":[{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0}],"covering":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"covers":[{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0}],"data":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0}],"description":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"destroy":[{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0}],"details":[{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0}],"docker":[{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":0.9000000357627869},{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":0.699999988079071},{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":0.6000000238418579},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":0.6000000238418579},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":0.6000000238418579},{"depth":0,"node_id":{"index1":1,"stamp":0},"weight":0.4000000059604645}],"dockerized":[{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0}],"document":[{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"environments":[{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0},{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0}],"essential":[{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0},{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"everything":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"executing":[{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0}],"features":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"files":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0}],"five":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"four":[{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0}],"fundamental":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0}],"guide":[{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0}],"handling":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0},{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0}],"health":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0}],"host":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0}],"image":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0},{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0}],"images":[{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0}],"including":[{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0}],"inspecting":[{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0}],"lifecycle":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0}],"like":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"line":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0},{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0}],"list":[{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0}],"locate":[{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0}],"log":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"logs":[{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0}],"machines":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0}],"main":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"manage":[{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0}],"management":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":0.4285714626312256},{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":0.4285714626312256}],"managing":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0},{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0}],"mapping":[{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0}],"metadata":[{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0}],"monitor":[{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0}],"monitoring":[{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0}],"mounting":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"multi":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0}],"necessary":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0}],"networking":[{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0}],"operations":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0},{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":0.5},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":0.5}],"orchestrating":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0}],"orchestration":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"organized":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0}],"outlines":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0}],"overall":[{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0}],"persistence":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0}],"port":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0},{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0}],"ports":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":0.4285714626312256},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":0.4285714626312256}],"presented":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"process":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0},{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":0.4285714626312256}],"processes":[{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0}],"provides":[{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0},{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0}],"pushing":[{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0}],"quick":[{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0}],"quickly":[{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0}],"reference":[{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0}],"repository":[{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0},{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":0.4285714626312256},{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":0.4285714626312256}],"required":[{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0}],"resource":[{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0}],"resources":[{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0}],"running":[{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0}],"section":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0}],"serves":[{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0}],"sheet":[{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0},{"depth":0,"node_id":{"index1":1,"stamp":0},"weight":0.5714285969734192},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":0.4285714626312256},{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":0.4285714626312256}],"start":[{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0}],"stop":[{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0}],"storage":[{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0}],"syntax":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"system":[{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0}],"topics":[{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0}],"tracking":[{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0}],"troubleshooting":[{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":0.4285714626312256},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":0.4285714626312256},{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":0.4285714626312256},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":0.4285714626312256}],"usage":[{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0}],"use":[{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0}],"utilities":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0}],"viewing":[{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0}],"volume":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0},{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0}],"volumes":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":0.4285714626312256},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":0.4285714626312256}],"well":[{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0}],"within":[{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0}]}},"tree":{"arena":{"first_free_slot":null,"last_free_slot":null,"nodes":[{"data":{"Data":{"content":"","depth":0,"end_index":1,"end_page":1,"node_id":"0001","physical_index":null,"references":[],"start_index":1,"start_page":1,"structure":"","summary":"","title":"Docker_Cheat_Sheet","token_count":null}},"first_child":{"index1":2,"stamp":0},"last_child":{"index1":2,"stamp":0},"next_sibling":null,"parent":null,"previous_sibling":null,"stamp":0},{"data":{"Data":{"content":"Process Management\n\n# Show all running docker containers\ndocker ps\n\n# Show all docker containers\ndocker ps -a\n\n# Run a container\ndocker run :\n\n# Run a container and connect to it\ndocker run -it :\n\n# Run a container in the background\ndocker run -d :\n\n# Stop a container\ndocker stop \n\n# Kill a container\ndocker kill \n Images/Repository\n\n# List available local images\ndocker images\n\n# Search for docker images\ndocker search \n\n# Pull a docker image\ndocker pull \n\n# Build an image with a dockerfile\ndocker build -t : -f \n\n# Login to a remote repository\ndocker login \n\n# Push an image to your remotee repository\ndocker push :\n\n# Remove a local docker image\ndocker rmi :\n\n# Show metadata for an image\ndocker inspect \n\n# Remove all unused docker images\ndocker image prune\n\nVolumes & Ports\n\n# List volumes\ndocker volume ls\n\n# Create a volume\ndocker volume create \n\n# Delete a volume\ndocker volume rm \n\n# Show volume metadata\ndocker volume inspect \n\n# Delete all volumes not attached to a container\ndocker volume prune\n\n# Mount a local directory to your container\ndocker run -v : \n\n# Copy file or folder from a docker container to host machine\ndocker cp : \n\n# Copy file or folder from local machine onto a container\ndocker cp :\n\n# Map a local port to a docker instance\ndocker run -d -p 127.0.0.1:: \n\n# List the ports a docker container is running on\ndocker port \n Troubleshooting\n\n# Show the logs of a container\ndocker logs \n\n# Follow/tail the logs of a container\ndocker logs -f \n\n# Show timestamps on docker logs\ndocker logs -t \n\n# Show details/metadata of a container\ndocker inspect \n\n# Show a 'top' view of processes running on a container\ndocker top \n\n# Show a 'top' view of all docker containers\ndocker stats\n\n# Show any files that have changed since startup\ndocker diff \n\n# Connect to an already running container\ndocker attach \n\n# Execute a command on a container\ndocker exec -it /bin/bash\n\n# Show docker system wide information\ndocker system info\n\n# Show docker disk space used\ndocker system df\n\n \n\nDocker Compose\n\n# Start your docker-compose defined resources in detached mode\ndocker-compose up -d -f \n\n# Stop all docker-compose resources\ndocker-compose stop\n\n# Destroy all docker-compose resources\ndocker-compose down\n\n# Show docker-compose processes\ndocker-compose ps\n\n# Show docker-compose logs\ndocker-compose logs\n\n# Show docker-compose resource consumption\ndocker-compose top","depth":1,"end_index":1,"end_page":1,"node_id":"0002","physical_index":null,"references":[],"start_index":1,"start_page":1,"structure":"1","summary":"This cheat sheet provides a quick reference for essential Docker and Docker Compose command-line operations. It covers core topics including container process management, image and repository handling, volume and port mapping, and system troubleshooting. Use this guide to quickly locate commands for building, running, and managing Dockerized environments.","title":"Docker Cheat Sheet","token_count":676}},"first_child":{"index1":3,"stamp":0},"last_child":{"index1":7,"stamp":0},"next_sibling":null,"parent":{"index1":1,"stamp":0},"previous_sibling":null,"stamp":0},{"data":{"Data":{"content":"# Show all running docker containers\ndocker ps\n\n# Show all docker containers\ndocker ps -a\n\n# Run a container\ndocker run :\n\n# Run a container and connect to it\ndocker run -it :\n\n# Run a container in the background\ndocker run -d :\n\n# Stop a container\ndocker stop \n\n# Kill a container\ndocker kill \n Images/Repository\n\n# List available local images\ndocker images\n\n# Search for docker images\ndocker search \n\n# Pull a docker image\ndocker pull \n\n# Build an image with a dockerfile\ndocker build -t : -f \n\n# Login to a remote repository\ndocker login \n\n# Push an image to your remotee repository\ndocker push :\n\n# Remove a local docker image\ndocker rmi :\n\n# Show metadata for an image\ndocker inspect \n\n# Remove all unused docker images\ndocker image prune\n\nVolumes & Ports\n\n# List volumes\ndocker volume ls\n\n# Create a volume\ndocker volume create \n\n# Delete a volume\ndocker volume rm \n\n# Show volume metadata\ndocker volume inspect \n\n# Delete all volumes not attached to a container\ndocker volume prune\n\n# Mount a local directory to your container\ndocker run -v : \n\n# Copy file or folder from a docker container to host machine\ndocker cp : \n\n# Copy file or folder from local machine onto a container\ndocker cp :\n\n# Map a local port to a docker instance\ndocker run -d -p 127.0.0.1:: \n\n# List the ports a docker container is running on\ndocker port \n Troubleshooting\n\n# Show the logs of a container\ndocker logs \n\n# Follow/tail the logs of a container\ndocker logs -f \n\n# Show timestamps on docker logs\ndocker logs -t \n\n# Show details/metadata of a container\ndocker inspect \n\n# Show a 'top' view of processes running on a container\ndocker top \n\n# Show a 'top' view of all docker containers\ndocker stats\n\n# Show any files that have changed since startup\ndocker diff \n\n# Connect to an already running container\ndocker attach \n\n# Execute a command on a container\ndocker exec -it /bin/bash\n\n# Show docker system wide information\ndocker system info\n\n# Show docker disk space used\ndocker system df\n\n \n\nDocker Compose\n\n# Start your docker-compose defined resources in detached mode\ndocker-compose up -d -f \n\n# Stop all docker-compose resources\ndocker-compose stop\n\n# Destroy all docker-compose resources\ndocker-compose down\n\n# Show docker-compose processes\ndocker-compose ps\n\n# Show docker-compose logs\ndocker-compose logs\n\n# Show docker-compose resource consumption\ndocker-compose top","depth":2,"end_index":1,"end_page":1,"node_id":"0003","physical_index":null,"references":[],"start_index":1,"start_page":1,"structure":"1.1","summary":"This document provides a comprehensive cheat sheet of essential Docker commands organized into five main categories: container process management, image/repository operations, volumes and ports handling, troubleshooting, and Docker Compose. Each command is presented with a brief description and the necessary syntax, covering everything from basic container lifecycle operations to advanced features like volume mounting, port mapping, log analysis, and multi-container orchestration.","title":"Process Management","token_count":673}},"first_child":null,"last_child":null,"next_sibling":{"index1":4,"stamp":0},"parent":{"index1":2,"stamp":0},"previous_sibling":null,"stamp":0},{"data":{"Data":{"content":"# List available local images\ndocker images\n\n# Search for docker images\ndocker search \n\n# Pull a docker image\ndocker pull \n\n# Build an image with a dockerfile\ndocker build -t : -f \n\n# Login to a remote repository\ndocker login \n\n# Push an image to your remotee repository\ndocker push :\n\n# Remove a local docker image\ndocker rmi :\n\n# Show metadata for an image\ndocker inspect \n\n# Remove all unused docker images\ndocker image prune\n\nVolumes & Ports\n\n# List volumes\ndocker volume ls\n\n# Create a volume\ndocker volume create \n\n# Delete a volume\ndocker volume rm \n\n# Show volume metadata\ndocker volume inspect \n\n# Delete all volumes not attached to a container\ndocker volume prune\n\n# Mount a local directory to your container\ndocker run -v : \n\n# Copy file or folder from a docker container to host machine\ndocker cp : \n\n# Copy file or folder from local machine onto a container\ndocker cp :\n\n# Map a local port to a docker instance\ndocker run -d -p 127.0.0.1:: \n\n# List the ports a docker container is running on\ndocker port \n Troubleshooting\n\n# Show the logs of a container\ndocker logs \n\n# Follow/tail the logs of a container\ndocker logs -f \n\n# Show timestamps on docker logs\ndocker logs -t \n\n# Show details/metadata of a container\ndocker inspect \n\n# Show a 'top' view of processes running on a container\ndocker top \n\n# Show a 'top' view of all docker containers\ndocker stats\n\n# Show any files that have changed since startup\ndocker diff \n\n# Connect to an already running container\ndocker attach \n\n# Execute a command on a container\ndocker exec -it /bin/bash\n\n# Show docker system wide information\ndocker system info\n\n# Show docker disk space used\ndocker system df\n\n \n\nDocker Compose\n\n# Start your docker-compose defined resources in detached mode\ndocker-compose up -d -f \n\n# Stop all docker-compose resources\ndocker-compose stop\n\n# Destroy all docker-compose resources\ndocker-compose down\n\n# Show docker-compose processes\ndocker-compose ps\n\n# Show docker-compose logs\ndocker-compose logs\n\n# Show docker-compose resource consumption\ndocker-compose top","depth":2,"end_index":1,"end_page":1,"node_id":"0004","physical_index":null,"references":[],"start_index":1,"start_page":1,"structure":"1.2","summary":"This document serves as a Docker command cheat sheet organized into four categories: Image/Repository management, Volumes & Ports, Troubleshooting, and Docker Compose. It provides essential CLI commands for the complete container lifecycle, from building and pushing images to managing storage, networking, and orchestrating multi-container applications.","title":"Images/Repository","token_count":578}},"first_child":null,"last_child":null,"next_sibling":{"index1":5,"stamp":0},"parent":{"index1":2,"stamp":0},"previous_sibling":{"index1":3,"stamp":0},"stamp":0},{"data":{"Data":{"content":"# List volumes\ndocker volume ls\n\n# Create a volume\ndocker volume create \n\n# Delete a volume\ndocker volume rm \n\n# Show volume metadata\ndocker volume inspect \n\n# Delete all volumes not attached to a container\ndocker volume prune\n\n# Mount a local directory to your container\ndocker run -v : \n\n# Copy file or folder from a docker container to host machine\ndocker cp : \n\n# Copy file or folder from local machine onto a container\ndocker cp :\n\n# Map a local port to a docker instance\ndocker run -d -p 127.0.0.1:: \n\n# List the ports a docker container is running on\ndocker port \n Troubleshooting\n\n# Show the logs of a container\ndocker logs \n\n# Follow/tail the logs of a container\ndocker logs -f \n\n# Show timestamps on docker logs\ndocker logs -t \n\n# Show details/metadata of a container\ndocker inspect \n\n# Show a 'top' view of processes running on a container\ndocker top \n\n# Show a 'top' view of all docker containers\ndocker stats\n\n# Show any files that have changed since startup\ndocker diff \n\n# Connect to an already running container\ndocker attach \n\n# Execute a command on a container\ndocker exec -it /bin/bash\n\n# Show docker system wide information\ndocker system info\n\n# Show docker disk space used\ndocker system df\n\n \n\nDocker Compose\n\n# Start your docker-compose defined resources in detached mode\ndocker-compose up -d -f \n\n# Stop all docker-compose resources\ndocker-compose stop\n\n# Destroy all docker-compose resources\ndocker-compose down\n\n# Show docker-compose processes\ndocker-compose ps\n\n# Show docker-compose logs\ndocker-compose logs\n\n# Show docker-compose resource consumption\ndocker-compose top","depth":2,"end_index":1,"end_page":1,"node_id":"0005","physical_index":null,"references":[],"start_index":1,"start_page":1,"structure":"1.3","summary":"This section serves as a quick reference guide for essential Docker command-line operations. It details the necessary commands for managing data persistence through volumes, mapping ports, and copying files between host machines and containers. Additionally, it outlines troubleshooting utilities for monitoring container health and logs, alongside fundamental Docker Compose commands for orchestrating multi-container environments.","title":"Volumes & Ports","token_count":441}},"first_child":null,"last_child":null,"next_sibling":{"index1":6,"stamp":0},"parent":{"index1":2,"stamp":0},"previous_sibling":{"index1":4,"stamp":0},"stamp":0},{"data":{"Data":{"content":"# Show the logs of a container\ndocker logs \n\n# Follow/tail the logs of a container\ndocker logs -f \n\n# Show timestamps on docker logs\ndocker logs -t \n\n# Show details/metadata of a container\ndocker inspect \n\n# Show a 'top' view of processes running on a container\ndocker top \n\n# Show a 'top' view of all docker containers\ndocker stats\n\n# Show any files that have changed since startup\ndocker diff \n\n# Connect to an already running container\ndocker attach \n\n# Execute a command on a container\ndocker exec -it /bin/bash\n\n# Show docker system wide information\ndocker system info\n\n# Show docker disk space used\ndocker system df\n\n \n\nDocker Compose\n\n# Start your docker-compose defined resources in detached mode\ndocker-compose up -d -f \n\n# Stop all docker-compose resources\ndocker-compose stop\n\n# Destroy all docker-compose resources\ndocker-compose down\n\n# Show docker-compose processes\ndocker-compose ps\n\n# Show docker-compose logs\ndocker-compose logs\n\n# Show docker-compose resource consumption\ndocker-compose top","depth":2,"end_index":1,"end_page":1,"node_id":"0006","physical_index":null,"references":[],"start_index":1,"start_page":1,"structure":"1.4","summary":"This section provides a reference list of essential Docker and Docker Compose CLI commands used for troubleshooting and managing container environments. It details commands for inspecting container metadata, viewing logs, tracking resource usage, and executing commands within running containers. Additionally, it outlines the basic lifecycle and monitoring commands required to manage Docker Compose applications.","title":"Troubleshooting","token_count":252}},"first_child":null,"last_child":null,"next_sibling":{"index1":7,"stamp":0},"parent":{"index1":2,"stamp":0},"previous_sibling":{"index1":5,"stamp":0},"stamp":0},{"data":{"Data":{"content":"# Start your docker-compose defined resources in detached mode\ndocker-compose up -d -f \n\n# Stop all docker-compose resources\ndocker-compose stop\n\n# Destroy all docker-compose resources\ndocker-compose down\n\n# Show docker-compose processes\ndocker-compose ps\n\n# Show docker-compose logs\ndocker-compose logs\n\n# Show docker-compose resource consumption\ndocker-compose top","depth":2,"end_index":1,"end_page":1,"node_id":"0007","physical_index":null,"references":[],"start_index":1,"start_page":1,"structure":"1.5","summary":"This section provides a quick reference guide for essential Docker Compose commands used to manage containerized environments. It outlines the CLI commands necessary to start, stop, and destroy resources, as well as how to monitor their active processes, logs, and overall resource consumption.","title":"Docker Compose","token_count":79}},"first_child":null,"last_child":null,"next_sibling":null,"parent":{"index1":2,"stamp":0},"previous_sibling":{"index1":6,"stamp":0},"stamp":0}]},"root_id":{"index1":1,"stamp":0}}}} \ No newline at end of file