diff --git a/rust/examples/flow.rs b/rust/examples/flow.rs index ab936bb5..ff1b6ca7 100644 --- a/rust/examples/flow.rs +++ b/rust/examples/flow.rs @@ -20,17 +20,35 @@ use vectorless::client::{IndexContext, IndexOptions, QueryContext}; /// Sample markdown content for demonstration. const SAMPLE_MARKDOWN: &str = r#" -# Project Documentation - -This document describes the architecture and usage of the vectorless library. +# Vectorless Architecture Guide ## Overview -Vectorless is a document indexing and retrieval library that uses tree-based navigation instead of vector embeddings. +Vectorless is a reasoning-native document intelligence engine that transforms documents into hierarchical semantic trees. Unlike traditional RAG systems that rely on vector embeddings and similarity search, Vectorless uses LLM-powered tree navigation to retrieve relevant content through deep contextual understanding. + +The core idea is simple: structured documents already have inherent semantic relationships encoded in their headings, sections, and paragraphs. By preserving this structure as a navigable tree, an LLM can efficiently locate relevant information by following the document's own logical organization. + +## Architecture + +The system consists of three main components: an indexing pipeline, a storage layer, and a retrieval engine. The indexing pipeline parses documents into tree structures and generates summaries. The storage layer persists indexed documents to disk. The retrieval engine navigates the tree at query time using search algorithms guided by LLM decisions. + +### Indexing Pipeline + +The indexing pipeline processes documents through multiple stages: parsing, tree building, enhancement (LLM summary generation), and reasoning index construction. Each stage is independently configurable and can be enabled or disabled based on requirements. The pipeline supports incremental re-indexing with content fingerprinting to avoid redundant work when documents haven't changed. + +### Retrieval Engine + +The retrieval engine supports multiple search strategies including greedy depth-first search, beam search, and MCTS. A Pilot component provides LLM-guided navigation at key decision points during tree traversal. The engine is budget-aware, tracking token usage and making cost-conscious decisions about when to invoke the LLM versus using cheaper heuristic scoring. + +## Performance + +Under typical workloads, indexing a 50-page document takes approximately 10-30 seconds depending on LLM response latency and the complexity of the document structure. Query latency ranges from 200ms for simple keyword-matched queries to 3-5 seconds for complex multi-hop reasoning queries that require multiple LLM calls during tree navigation. + +The system is designed for accuracy over speed. By leveraging document structure and LLM reasoning, it achieves higher retrieval quality than vector-based approaches on structured documents like technical reports, legal contracts, and research papers. "#; #[tokio::main] -async fn main() -> Result<(), Box> { +async fn main() -> vectorless::Result<()> { // Initialize tracing for debug output (set RUST_LOG=debug to see more) tracing_subscriber::fmt::init(); @@ -39,13 +57,14 @@ async fn main() -> Result<(), Box> { // Step 1: Create a Vectorless client println!("Step 1: Creating Vectorless client..."); - let client = EngineBuilder::new() - .with_workspace("./workspace") - .with_key("sk-...") + let engine = EngineBuilder::new() + .with_workspace("./worksspace_flow_example") + .with_key("sk...") .with_model("gpt-4o") + .with_endpoint("https://api") .build() .await - .map_err(|e: vectorless::BuildError| vectorless::Error::Config(e.to_string()))?; + .map_err(|e| vectorless::Error::Config(e.to_string()))?; println!(" - Client created successfully"); println!(); @@ -57,7 +76,7 @@ async fn main() -> Result<(), Box> { let md_path = temp_dir.path().join("sample.md"); tokio::fs::write(&md_path, SAMPLE_MARKDOWN).await?; - let index_result = client + let index_result = engine .index(IndexContext::from_path(&md_path).with_options(IndexOptions::new().with_summaries())) .await?; let doc_id = index_result.doc_id().unwrap().to_string(); @@ -68,7 +87,7 @@ async fn main() -> Result<(), Box> { // Step 3: List indexed documents println!("Step 3: Indexed documents:"); - for doc in client.list().await? { + for doc in engine.list().await? { println!(" - {} ({})", doc.name, doc.id); } println!(); @@ -76,12 +95,12 @@ async fn main() -> Result<(), Box> { // Step 4: Query the document println!("Step 4: Querying the document..."); - let queries = vec!["What is this project about?"]; + let queries = vec!["What is the seconds for complex multi-hop?"]; for query in queries { println!(" Query: \"{}\"", query); - match client + match engine .query(QueryContext::new(query).with_doc_id(&doc_id)) .await { @@ -92,7 +111,7 @@ async fn main() -> Result<(), Box> { } else { println!(" - Found relevant content:"); let preview = if item.content.len() > 200 { - format!("{}...", &item.content[..200]) + format!("{}...", &item.content) } else { item.content.clone() }; @@ -114,8 +133,8 @@ async fn main() -> Result<(), Box> { // Step 5: Cleanup println!("Step 5: Cleanup..."); - client.remove(&doc_id).await?; - println!(" - Document removed"); + // engine.remove(&doc_id).await?; + // println!(" - Document removed"); println!("\n=== Example Complete ==="); Ok(()) diff --git a/rust/src/client/engine.rs b/rust/src/client/engine.rs index 21f09c08..cb88a1dc 100644 --- a/rust/src/client/engine.rs +++ b/rust/src/client/engine.rs @@ -407,8 +407,8 @@ impl Engine { let mut failed = Vec::new(); for doc_id in doc_ids { - let tree = match self.get_structure(&doc_id).await { - Ok(t) => t, + let (tree, reasoning_index) = match self.get_structure(&doc_id).await { + Ok((t, ri)) => (t, ri), Err(e) => { tracing::warn!("Skipping document {}: {}", doc_id, e); failed.push(FailedItem::new(&doc_id, e.to_string())); @@ -416,7 +416,7 @@ impl Engine { } }; - match self.retriever.query(&tree, &ctx.query, &options).await { + match self.retriever.query_with_reasoning_index(&tree, &ctx.query, &options, reasoning_index).await { Ok(mut result) => { result.doc_id = doc_id; items.push(result); @@ -431,8 +431,9 @@ impl Engine { // If everything failed, return error if items.is_empty() && !failed.is_empty() { return Err(Error::Config(format!( - "Query failed for all {} document(s)", - failed.len() + "Query failed for all {} document(s): {}", + failed.len(), + failed.iter().map(|f| format!("{} ({})", f.source, f.error)).collect::>().join("; ") ))); } @@ -455,7 +456,7 @@ impl Engine { } }; - let tree = self.get_structure(&doc_id).await?; + let (tree, _reasoning_index) = self.get_structure(&doc_id).await?; let options = ctx.to_retrieve_options(&self.config); let rx = self @@ -529,8 +530,8 @@ impl Engine { // Internal // ============================================================ - /// Get document structure (tree). Internal use only. - pub(crate) async fn get_structure(&self, doc_id: &str) -> Result { + /// Get document structure (tree) and optional reasoning index. Internal use only. + pub(crate) async fn get_structure(&self, doc_id: &str) -> Result<(DocumentTree, Option)> { let workspace = self .workspace .as_ref() @@ -541,7 +542,7 @@ impl Engine { .await? .ok_or_else(|| Error::DocumentNotFound(format!("Document not found: {}", doc_id)))?; - Ok(doc.tree) + Ok((doc.tree, doc.reasoning_index)) } /// Resolve QueryScope into a list of document IDs. diff --git a/rust/src/client/retriever.rs b/rust/src/client/retriever.rs index a5b8676e..ad0638c6 100644 --- a/rust/src/client/retriever.rs +++ b/rust/src/client/retriever.rs @@ -24,11 +24,11 @@ use tracing::info; use super::events::{EventEmitter, QueryEvent}; use super::types::QueryResultItem; use crate::config::Config; -use crate::document::{DocumentTree, NodeId}; +use crate::document::{DocumentTree, NodeId, ReasoningIndex}; use crate::error::{Error, Result}; use crate::retrieval::content::ContentAggregatorConfig; use crate::retrieval::stream::RetrieveEventReceiver; -use crate::retrieval::{RetrievalResult, RetrieveOptions, RetrieveResponse, Retriever}; +use crate::retrieval::{RetrievalResult, RetrieveOptions, RetrieveResponse}; /// Document retrieval client. /// @@ -124,6 +124,21 @@ impl RetrieverClient { tree: &DocumentTree, question: &str, options: &RetrieveOptions, + ) -> Result { + self.query_with_reasoning_index(tree, question, options, None).await + } + + /// Query a document tree with optional reasoning index for fast-path lookup. + /// + /// # Errors + /// + /// Returns an error if the retrieval pipeline fails. + pub async fn query_with_reasoning_index( + &self, + tree: &DocumentTree, + question: &str, + options: &RetrieveOptions, + reasoning_index: Option, ) -> Result { self.events.emit_query(QueryEvent::Started { query: question.to_string(), @@ -131,10 +146,10 @@ impl RetrieverClient { info!("Querying: {:?}", question); - // Execute retrieval + // Execute retrieval with reasoning index let response = self .retriever - .retrieve(tree, question, options) + .retrieve_with_reasoning_index(tree, question, options, reasoning_index) .await .map_err(|e| Error::Retrieval(e.to_string()))?; diff --git a/rust/src/retrieval/mod.rs b/rust/src/retrieval/mod.rs index 35b4508d..982906c2 100644 --- a/rust/src/retrieval/mod.rs +++ b/rust/src/retrieval/mod.rs @@ -67,7 +67,7 @@ pub mod sufficiency; pub use context::{PruningStrategy, TokenEstimation}; pub use pipeline_retriever::PipelineRetriever; -pub use retriever::{RetrievalContext, Retriever}; +pub use retriever::RetrievalContext; pub use types::*; // Sufficiency exports diff --git a/rust/src/retrieval/pilot/llm_pilot.rs b/rust/src/retrieval/pilot/llm_pilot.rs index 2e96120f..a7bb62a9 100644 --- a/rust/src/retrieval/pilot/llm_pilot.rs +++ b/rust/src/retrieval/pilot/llm_pilot.rs @@ -10,7 +10,7 @@ use async_trait::async_trait; use std::sync::Arc; use tracing::{debug, info, warn}; -use crate::document::DocumentTree; +use crate::document::{DocumentTree, NodeId}; use crate::llm::{LlmClient, LlmExecutor}; use crate::memo::{MemoKey, MemoStore, MemoValue}; use crate::utils::fingerprint::Fingerprint; @@ -631,8 +631,16 @@ impl Pilot for LlmPilot { decision } - async fn guide_start(&self, tree: &DocumentTree, query: &str) -> Option { - println!("[DEBUG] LlmPilot::guide_start() called, query='{}'", query); + async fn guide_start( + &self, + tree: &DocumentTree, + query: &str, + start_node: NodeId, + ) -> Option { + println!( + "[DEBUG] LlmPilot::guide_start() called, query='{}', start_node={:?}", + query, start_node + ); // Check if guide_at_start is enabled if !self.config.guide_at_start { @@ -650,10 +658,14 @@ impl Pilot for LlmPilot { // Build start context let context = self.context_builder.build_start_context(tree, query); - // Get root's children as candidates - let node_ids = tree.children(tree.root()); + // Get start_node's children as candidates (NOT root's children) + let node_ids = tree.children(start_node); + if node_ids.is_empty() { + debug!("Start node has no children, no guidance needed"); + return None; + } println!( - "[DEBUG] LlmPilot::guide_start() - {} root children candidates", + "[DEBUG] LlmPilot::guide_start() - {} children candidates from start_node", node_ids.len() ); diff --git a/rust/src/retrieval/pilot/noop.rs b/rust/src/retrieval/pilot/noop.rs index ffedf5b8..fa2fba39 100644 --- a/rust/src/retrieval/pilot/noop.rs +++ b/rust/src/retrieval/pilot/noop.rs @@ -9,7 +9,7 @@ use async_trait::async_trait; -use crate::document::DocumentTree; +use crate::document::{DocumentTree, NodeId}; use super::{InterventionPoint, Pilot, PilotConfig, PilotDecision, SearchState}; @@ -69,7 +69,12 @@ impl Pilot for NoopPilot { } } - async fn guide_start(&self, _tree: &DocumentTree, _query: &str) -> Option { + async fn guide_start( + &self, + _tree: &DocumentTree, + _query: &str, + _start_node: NodeId, + ) -> Option { // No guidance at start None } @@ -138,7 +143,7 @@ mod tests { let pilot = NoopPilot::new(); let tree = DocumentTree::new("test", "test content"); - let guidance = pilot.guide_start(&tree, "test").await; + let guidance = pilot.guide_start(&tree, "test", tree.root()).await; assert!(guidance.is_none()); } diff --git a/rust/src/retrieval/pilot/trait.rs b/rust/src/retrieval/pilot/trait.rs index 5873d2b0..54936b9a 100644 --- a/rust/src/retrieval/pilot/trait.rs +++ b/rust/src/retrieval/pilot/trait.rs @@ -167,8 +167,16 @@ pub trait Pilot: Send + Sync { /// Called once at the beginning of search to help determine /// the starting point and initial direction. /// + /// `start_node` is the node from which the search begins. The pilot + /// should evaluate that node's children (not root's children) as candidates. + /// /// Returns `None` if no guidance is available or needed. - async fn guide_start(&self, tree: &DocumentTree, query: &str) -> Option; + async fn guide_start( + &self, + tree: &DocumentTree, + query: &str, + start_node: NodeId, + ) -> Option; /// Provide guidance during backtracking. /// diff --git a/rust/src/retrieval/pipeline/context.rs b/rust/src/retrieval/pipeline/context.rs index 7990ce92..b2a2745a 100644 --- a/rust/src/retrieval/pipeline/context.rs +++ b/rust/src/retrieval/pipeline/context.rs @@ -256,6 +256,9 @@ pub struct PipelineContext { pub accumulated_content: String, /// Estimated token count. pub token_count: usize, + /// Fingerprint of candidate node IDs from previous evaluate call. + /// Used to detect stagnant loops (same candidates → same evaluation). + pub prev_candidate_fingerprint: Option, // ============ Final Result ============ /// Final retrieval response. @@ -307,6 +310,7 @@ impl PipelineContext { sufficiency: SufficiencyLevel::default(), accumulated_content: String::new(), token_count: 0, + prev_candidate_fingerprint: None, result: None, stage_results: HashMap::new(), metrics: RetrievalMetrics::default(), @@ -402,6 +406,25 @@ impl PipelineContext { self.metrics.backtracks += 1; } + /// Compute a fingerprint of the current candidate node IDs. + fn candidate_fingerprint(&self) -> u64 { + use std::hash::{Hash, Hasher}; + let mut hasher = std::collections::hash_map::DefaultHasher::new(); + for c in &self.candidates { + format!("{:?}", c.node_id).hash(&mut hasher); + } + hasher.finish() + } + + /// Check if candidates changed since the last call, and update the stored fingerprint. + /// Returns `true` if candidates are the same as before (stagnant loop detected). + pub fn check_candidates_stagnant(&mut self) -> bool { + let fp = self.candidate_fingerprint(); + let stagnant = self.prev_candidate_fingerprint == Some(fp); + self.prev_candidate_fingerprint = Some(fp); + stagnant + } + /// Check if token limit is reached. pub fn is_token_limit_reached(&self) -> bool { self.token_count >= self.options.max_tokens diff --git a/rust/src/retrieval/pipeline_retriever.rs b/rust/src/retrieval/pipeline_retriever.rs index 2b5c25ec..9f135cf1 100644 --- a/rust/src/retrieval/pipeline_retriever.rs +++ b/rust/src/retrieval/pipeline_retriever.rs @@ -15,7 +15,7 @@ use super::retriever::{CostEstimate, Retriever, RetrieverError, RetrieverResult} use super::stages::{AnalyzeStage, EvaluateStage, PlanStage, SearchStage}; use super::stream::RetrieveEventReceiver; use super::types::{RetrieveOptions, RetrieveResponse}; -use crate::document::DocumentTree; +use crate::document::{DocumentTree, ReasoningIndex}; use crate::llm::LlmClient; use crate::memo::MemoStore; use crate::retrieval::pilot::{LlmPilot, PilotConfig}; @@ -151,6 +151,30 @@ impl PipelineRetriever { options.clone() } + /// Retrieve with optional reasoning index for fast-path lookup. + pub async fn retrieve_with_reasoning_index( + &self, + tree: &DocumentTree, + query: &str, + options: &RetrieveOptions, + reasoning_index: Option, + ) -> RetrieverResult { + let mut orchestrator = self.build_orchestrator(); + let tree_arc = Arc::new(tree.clone()); + + let response = orchestrator + .execute_with_reasoning_index( + tree_arc, + query, + self.options_to_retrieve_options(options), + reasoning_index, + ) + .await + .map_err(|e| RetrieverError::Internal(e.to_string()))?; + + Ok(response) + } + /// Execute streaming retrieval. /// /// Returns a channel receiver that yields [`RetrieveEvent`]s as the diff --git a/rust/src/retrieval/search/beam.rs b/rust/src/retrieval/search/beam.rs index f285cb30..73bf0cc1 100644 --- a/rust/src/retrieval/search/beam.rs +++ b/rust/src/retrieval/search/beam.rs @@ -137,7 +137,8 @@ impl BeamSearch { let start_children = tree.children(start_node); debug!("Start node has {} children", start_children.len()); - // Check if Pilot wants to guide the start + // Check if Pilot wants to guide the start. + // Pass start_node so the pilot evaluates the correct children. let initial_candidates = if let Some(p) = pilot { debug!( "BeamSearch: Pilot is available, name={}, guide_at_start={}", @@ -145,7 +146,7 @@ impl BeamSearch { p.config().guide_at_start ); if p.config().guide_at_start { - if let Some(guidance) = p.guide_start(tree, &context.query).await { + if let Some(guidance) = p.guide_start(tree, &context.query, start_node).await { debug!( "Pilot provided start guidance with confidence {}", guidance.confidence diff --git a/rust/src/retrieval/search/toc_navigator.rs b/rust/src/retrieval/search/toc_navigator.rs index 6e56a406..ae156a21 100644 --- a/rust/src/retrieval/search/toc_navigator.rs +++ b/rust/src/retrieval/search/toc_navigator.rs @@ -79,6 +79,8 @@ impl ToCNavigator { /// Phase A: Score top-level nodes with BM25 and keep the top-N. /// Phase B: If the best BM25 score is below `llm_threshold` and an LLM /// client is available, ask the LLM to refine the selection. + /// Phase C: If BM25 produced no results and LLM is unavailable, fall back + /// to keyword-overlap matching against section summaries. pub async fn locate( &self, query: &str, @@ -120,17 +122,133 @@ impl ToCNavigator { } } - // Fallback: if no branches passed the filter, search from root - if top_branches.is_empty() { - debug!("ToCNavigator: no branches above threshold, falling back to root"); - return vec![SearchCue { - root: tree.root(), - confidence: 0.5, - }]; + if !top_branches.is_empty() { + // Return BM25 results as cues + return top_branches + .into_iter() + .map(|(node_id, score)| SearchCue { + root: node_id, + confidence: score, + }) + .collect(); + } + + // Phase C: BM25 produced nothing — try keyword overlap on summaries. + // This handles abstract queries like "What is this project about?" + // where the query keywords don't appear in section titles but the + // summaries contain relevant semantic matches. + let summary_cues = self.match_by_summary(query, tree, top_level_nodes); + if !summary_cues.is_empty() { + return summary_cues; + } + + // Final fallback: search from root + debug!("ToCNavigator: no branches above threshold, falling back to root"); + vec![SearchCue { + root: tree.root(), + confidence: 0.5, + }] + } + + /// Match query against section summaries using keyword overlap. + /// + /// This is a lightweight fallback for abstract queries where BM25 + /// fails because query terms don't appear verbatim in section titles + /// or short content snippets. + /// + /// For overview-style queries (e.g. "What is this project about?"), + /// if no keywords match any section, returns all top-level sections + /// with the overview/introduction section boosted. + fn match_by_summary( + &self, + query: &str, + tree: &DocumentTree, + top_level_nodes: &[NodeId], + ) -> Vec { + let query_lower = query.to_lowercase(); + let query_words: Vec<&str> = query_lower + .split_whitespace() + .filter(|w| w.len() > 2) + .collect(); + + let is_overview = Self::is_overview_query(query); + + if query_words.is_empty() && !is_overview { + return Vec::new(); + } + + let mut scored: Vec<(NodeId, f32)> = Vec::new(); + + for &node_id in top_level_nodes { + if let Some(node) = tree.get(node_id) { + let text = format!("{} {} {}", node.title, node.summary, node.content) + .to_lowercase(); + + let match_count = query_words + .iter() + .filter(|w| text.contains(*w)) + .count(); + + let mut score = if query_words.is_empty() { + 0.0 + } else { + match_count as f32 / query_words.len() as f32 + }; + + // For overview queries, also check if the section title/summary + // contains overview-like terms + if is_overview { + let title_lower = node.title.to_lowercase(); + let summary_lower = node.summary.to_lowercase(); + let looks_like_overview = title_lower.contains("overview") + || title_lower.contains("introduction") + || title_lower.contains("summary") + || title_lower.contains("简介") + || title_lower.contains("概述") + || summary_lower.contains("overview") + || summary_lower.contains("introduction"); + + if looks_like_overview { + score = (score + 0.5).min(1.0); + } + } + + if score > 0.1 { + scored.push((node_id, score)); + } + } + } + + // For overview queries with no matches at all, return the first + // section as a reasonable default (it's usually the introduction). + if scored.is_empty() && is_overview { + if let Some(&first_id) = top_level_nodes.first() { + info!( + "ToCNavigator: overview query with no keyword matches, using first section as default" + ); + return vec![SearchCue { + root: first_id, + confidence: 0.6, + }]; + } + return Vec::new(); + } + + scored.sort_by(|a, b| { + b.1.partial_cmp(&a.1) + .unwrap_or(std::cmp::Ordering::Equal) + }); + scored.truncate(self.max_branches); + + if !scored.is_empty() { + info!( + "ToCNavigator summary match: {} cues from {} nodes", + scored.len(), + top_level_nodes.len() + ); } - // Return BM25 results as cues - top_branches + scored .into_iter() .map(|(node_id, score)| SearchCue { root: node_id, @@ -139,6 +257,26 @@ impl ToCNavigator { .collect() } + /// Check if a query is asking for a general overview or summary of a document. + fn is_overview_query(query: &str) -> bool { + let lower = query.to_lowercase(); + + let patterns = [ + "about", + "overview", + "summary", + "introduction", + "describe", + "what is this", + "tell me about", + "main idea", + "key points", + "purpose", + ]; + + patterns.iter().any(|p| lower.contains(p)) + } + /// Phase B: Ask the LLM to pick the most relevant subtrees. /// /// Presents the full top-level TOC to the LLM and lets it select the @@ -229,11 +367,15 @@ Rules: } if cues.is_empty() { - warn!("LLM refinement returned no valid candidates, falling back to BM25"); - return vec![SearchCue { - root: tree.root(), - confidence: 0.5, - }]; + warn!("LLM refinement returned no valid candidates, falling back to summary matching"); + let summary_cues = self.match_by_summary(query, tree, top_level_nodes); + if summary_cues.is_empty() { + return vec![SearchCue { + root: tree.root(), + confidence: 0.5, + }]; + } + return summary_cues; } info!( @@ -244,11 +386,17 @@ Rules: cues } Err(e) => { - warn!("LLM refinement failed: {}, falling back to root", e); - vec![SearchCue { - root: tree.root(), - confidence: 0.5, - }] + warn!("LLM refinement failed: {}, falling back to summary matching", e); + // Don't fall directly to root — try summary matching first + let summary_cues = self.match_by_summary(query, tree, top_level_nodes); + if summary_cues.is_empty() { + vec![SearchCue { + root: tree.root(), + confidence: 0.5, + }] + } else { + summary_cues + } } } } diff --git a/rust/src/retrieval/stages/evaluate.rs b/rust/src/retrieval/stages/evaluate.rs index 32f9945c..c9f05800 100644 --- a/rust/src/retrieval/stages/evaluate.rs +++ b/rust/src/retrieval/stages/evaluate.rs @@ -168,11 +168,16 @@ impl EvaluateStage { } /// Collect content from leaf descendants of a node (excluding the node itself). + /// + /// Uses BFS (FIFO) traversal to preserve document order — the first + /// section in the document appears first in the output. fn collect_leaf_content( &self, tree: &crate::document::DocumentTree, node_id: crate::document::NodeId, ) -> String { + use std::collections::VecDeque; + let mut content_parts = Vec::new(); // Start with children, not the node itself @@ -182,9 +187,9 @@ impl EvaluateStage { return String::new(); } - let mut stack: Vec = children; + let mut queue: VecDeque = children.into_iter().collect(); - while let Some(current_id) = stack.pop() { + while let Some(current_id) = queue.pop_front() { let current_children = tree.children(current_id); if current_children.is_empty() { @@ -195,8 +200,8 @@ impl EvaluateStage { } } } else { - // Non-leaf node - add children to stack - stack.extend(current_children); + // Non-leaf node - add children to queue (FIFO preserves order) + queue.extend(current_children); } } @@ -285,7 +290,6 @@ impl EvaluateStage { /// Calculate overall confidence score. fn calculate_confidence(&self, ctx: &PipelineContext) -> f32 { if ctx.candidates.is_empty() { - println!("[DEBUG] calculate_confidence: no candidates, returning 0.0"); return 0.0; } @@ -299,12 +303,7 @@ impl EvaluateStage { SufficiencyLevel::Insufficient => 0.4, }; - let confidence = avg_score * sufficiency_factor; - println!( - "[DEBUG] calculate_confidence: avg_score={:.3}, sufficiency={:?}, factor={:.1}, confidence={:.3}", - avg_score, ctx.sufficiency, sufficiency_factor, confidence - ); - confidence + avg_score * sufficiency_factor } } @@ -333,12 +332,6 @@ impl RetrievalStage for EvaluateStage { async fn execute(&self, ctx: &mut PipelineContext) -> crate::error::Result { let start = std::time::Instant::now(); - println!( - "[DEBUG] EvaluateStage: {} candidates, iteration {}", - ctx.candidates.len(), - ctx.search_iterations - ); - info!( "Judging sufficiency: {} candidates, iteration {}", ctx.candidates.len(), @@ -359,6 +352,26 @@ impl RetrievalStage for EvaluateStage { ctx.sufficiency = self.check_sufficiency(ctx); info!("Sufficiency level: {:?}", ctx.sufficiency); + // 3.5 Detect stagnant candidates (same results as previous iteration) + // If candidates haven't changed, further backtracking won't help. + let stagnant = ctx.check_candidates_stagnant(); + if stagnant { + info!( + "Candidates unchanged after backtrack, completing with {} candidates", + ctx.candidates.len() + ); + ctx.result = Some(self.build_response(ctx)); + ctx.record_reasoning( + StageName::Evaluate, + format!( + "Candidates stagnant (unchanged), forced completion with {} candidates", + ctx.candidates.len() + ), + NavigationDecision::Skip, + ); + return Ok(StageOutcome::complete()); + } + // Update metrics ctx.metrics.evaluate_time_ms += start.elapsed().as_millis() as u64; ctx.metrics.tokens_used = tokens; diff --git a/rust/src/retrieval/stages/search.rs b/rust/src/retrieval/stages/search.rs index ad522634..31e6ae9a 100644 --- a/rust/src/retrieval/stages/search.rs +++ b/rust/src/retrieval/stages/search.rs @@ -326,9 +326,9 @@ impl SearchStage { /// Check if a query is asking for a document summary/overview. fn is_summary_query(query: &str) -> bool { let lower = query.to_lowercase(); + + // Direct keyword matches let patterns = [ - "what is this document", - "what is this about", "summarize", "summary", "overview", @@ -344,7 +344,30 @@ impl SearchStage { "文档简介", "介绍一下", ]; - patterns.iter().any(|p| lower.contains(p)) + if patterns.iter().any(|p| lower.contains(p)) { + return true; + } + + // Phrase patterns — match with intervening words removed. + // "what is this project about" → remove common filler words, check for "what is this about" + let filler_words = ["project", "document", "file", "paper", "article", "text", "book", "the", "a", "an"]; + let cleaned: String = lower + .split_whitespace() + .filter(|w| !filler_words.contains(w)) + .collect::>() + .join(" "); + + let phrase_patterns = [ + "what is this about", + "what is this document", + "what is this about", + "what does this mean", + "tell me about this", + "what is the main idea", + "what are the key points", + "what is the purpose", + ]; + phrase_patterns.iter().any(|p| cleaned.contains(p)) } /// Try to match the query against pre-computed reasoning index entries. @@ -357,21 +380,33 @@ impl SearchStage { // Check 1: Summary shortcut — handle "overview" style queries if let Some(ref shortcut) = ridx.summary_shortcut() { if Self::is_summary_query(&ctx.query) { - let mut candidates = vec![CandidateNode::new( + // For summary queries, return all top-level sections as candidates. + // Don't include the root node itself — it has no direct content, + // only descendant leaf content which is already covered by sections. + let candidates: Vec = shortcut + .section_summaries + .iter() + .map(|section| { + CandidateNode::new( + section.node_id, + 1.0, + section.depth, + ctx.tree.is_leaf(section.node_id), + ) + }) + .collect(); + + if !candidates.is_empty() { + return Some(candidates); + } + + // Fallback: if no sections, use root node + return Some(vec![CandidateNode::new( shortcut.root_node, 1.0, 0, ctx.tree.is_leaf(shortcut.root_node), - )]; - for section in &shortcut.section_summaries { - candidates.push(CandidateNode::new( - section.node_id, - 0.9, - section.depth, - ctx.tree.is_leaf(section.node_id), - )); - } - return Some(candidates); + )]); } } diff --git a/rust/src/storage/persistence.rs b/rust/src/storage/persistence.rs index 7dd8cbcc..b2be0030 100644 --- a/rust/src/storage/persistence.rs +++ b/rust/src/storage/persistence.rs @@ -250,13 +250,13 @@ pub struct PageContent { /// Wrapper for persisted data with checksum. #[derive(Debug, Serialize, Deserialize)] -struct PersistedWrapper { +struct PersistedWrapper { /// Format version. version: u32, /// SHA-256 checksum of the payload. checksum: String, - /// The actual data. - payload: T, + /// The actual data as raw JSON value (avoids re-serialization drift). + payload: serde_json::Value, } /// Options for save/load operations. @@ -330,17 +330,20 @@ pub fn save_document_with_options( doc: &PersistedDocument, options: &PersistenceOptions, ) -> Result<()> { - // Serialize the payload first - let payload_bytes = serde_json::to_vec(doc).map_err(|e| Error::Serialization(e.to_string()))?; + // Serialize to serde_json::Value first (avoids HashMap key ordering drift) + let payload_value = + serde_json::to_value(doc).map_err(|e| Error::Serialization(e.to_string()))?; - // Calculate checksum + // Calculate checksum on the Value's canonical bytes + let payload_bytes = + serde_json::to_vec(&payload_value).map_err(|e| Error::Serialization(e.to_string()))?; let checksum = calculate_checksum(&payload_bytes); // Create wrapper let wrapper = PersistedWrapper { version: FORMAT_VERSION, checksum, - payload: doc.clone(), + payload: payload_value, }; // Serialize wrapper @@ -407,8 +410,8 @@ pub fn load_document_with_options( let file = File::open(path).map_err(Error::Io)?; let reader = BufReader::new(file); - // Parse wrapper - let wrapper: PersistedWrapper = serde_json::from_reader(reader) + // Parse wrapper (payload is serde_json::Value) + let wrapper: PersistedWrapper = serde_json::from_reader(reader) .map_err(|e| Error::Parse(format!("Failed to parse document: {}", e)))?; // Check version @@ -434,7 +437,11 @@ pub fn load_document_with_options( } } - Ok(wrapper.payload) + // Deserialize Value to target type + let doc: PersistedDocument = serde_json::from_value(wrapper.payload) + .map_err(|e| Error::Parse(format!("Failed to deserialize document: {}", e)))?; + + Ok(doc) } /// Save the workspace index (metadata for all documents). @@ -448,16 +455,19 @@ pub fn save_index_with_options( entries: &[DocumentMeta], options: &PersistenceOptions, ) -> Result<()> { - // Serialize payload + // Serialize to serde_json::Value first + let payload_value = + serde_json::to_value(entries).map_err(|e| Error::Serialization(e.to_string()))?; + let payload_bytes = - serde_json::to_vec(entries).map_err(|e| Error::Serialization(e.to_string()))?; + serde_json::to_vec(&payload_value).map_err(|e| Error::Serialization(e.to_string()))?; let checksum = calculate_checksum(&payload_bytes); let wrapper = PersistedWrapper { version: FORMAT_VERSION, checksum, - payload: entries.to_vec(), + payload: payload_value, }; let json = @@ -505,7 +515,7 @@ pub fn load_index_with_options( let file = File::open(path).map_err(Error::Io)?; let reader = BufReader::new(file); - let wrapper: PersistedWrapper> = serde_json::from_reader(reader) + let wrapper: PersistedWrapper = serde_json::from_reader(reader) .map_err(|e| Error::Parse(format!("Failed to parse index: {}", e)))?; // Check version @@ -531,7 +541,11 @@ pub fn load_index_with_options( } } - Ok(wrapper.payload) + // Deserialize Value to target type + let entries: Vec = serde_json::from_value(wrapper.payload) + .map_err(|e| Error::Parse(format!("Failed to deserialize index: {}", e)))?; + + Ok(entries) } // ============================================================================ @@ -542,17 +556,20 @@ pub fn load_index_with_options( /// /// This is useful for storage backends that work with byte arrays. pub fn save_document_to_bytes(doc: &PersistedDocument) -> Result> { - // Serialize the payload first - let payload_bytes = serde_json::to_vec(doc).map_err(|e| Error::Serialization(e.to_string()))?; + // Serialize to serde_json::Value first + let payload_value = + serde_json::to_value(doc).map_err(|e| Error::Serialization(e.to_string()))?; - // Calculate checksum + // Calculate checksum on the Value's canonical bytes + let payload_bytes = + serde_json::to_vec(&payload_value).map_err(|e| Error::Serialization(e.to_string()))?; let checksum = calculate_checksum(&payload_bytes); // Create wrapper let wrapper = PersistedWrapper { version: FORMAT_VERSION, checksum, - payload: doc.clone(), + payload: payload_value, }; // Serialize wrapper @@ -571,8 +588,8 @@ pub fn load_document_from_bytes_with_options( data: &[u8], verify_checksum: bool, ) -> Result { - // Parse wrapper - let wrapper: PersistedWrapper = serde_json::from_slice(data) + // Parse wrapper (payload is serde_json::Value) + let wrapper: PersistedWrapper = serde_json::from_slice(data) .map_err(|e| Error::Parse(format!("Failed to parse document: {}", e)))?; // Check version @@ -598,20 +615,26 @@ pub fn load_document_from_bytes_with_options( } } - Ok(wrapper.payload) + // Deserialize Value to target type + let doc: PersistedDocument = serde_json::from_value(wrapper.payload) + .map_err(|e| Error::Parse(format!("Failed to deserialize document: {}", e)))?; + + Ok(doc) } /// Serialize an index to bytes. pub fn save_index_to_bytes(entries: &[DocumentMeta]) -> Result> { - let payload_bytes = - serde_json::to_vec(entries).map_err(|e| Error::Serialization(e.to_string()))?; + let payload_value = + serde_json::to_value(entries).map_err(|e| Error::Serialization(e.to_string()))?; + let payload_bytes = + serde_json::to_vec(&payload_value).map_err(|e| Error::Serialization(e.to_string()))?; let checksum = calculate_checksum(&payload_bytes); let wrapper = PersistedWrapper { version: FORMAT_VERSION, checksum, - payload: entries.to_vec(), + payload: payload_value, }; serde_json::to_vec(&wrapper).map_err(|e| Error::Serialization(e.to_string())) @@ -627,7 +650,7 @@ pub fn load_index_from_bytes_with_options( data: &[u8], verify_checksum: bool, ) -> Result> { - let wrapper: PersistedWrapper> = serde_json::from_slice(data) + let wrapper: PersistedWrapper = serde_json::from_slice(data) .map_err(|e| Error::Parse(format!("Failed to parse index: {}", e)))?; // Check version @@ -653,7 +676,11 @@ pub fn load_index_from_bytes_with_options( } } - Ok(wrapper.payload) + // Deserialize Value to target type + let entries: Vec = serde_json::from_value(wrapper.payload) + .map_err(|e| Error::Parse(format!("Failed to deserialize index: {}", e)))?; + + Ok(entries) } #[cfg(test)] @@ -734,8 +761,9 @@ mod tests { // Now corrupt the checksum field specifically let content = std::fs::read_to_string(&path).unwrap(); // Change the checksum value but keep the payload intact + let payload_value = serde_json::to_value(&doc).unwrap(); let corrupted = content.replace( - &calculate_checksum(&serde_json::to_vec(&doc).unwrap()), + &calculate_checksum(&serde_json::to_vec(&payload_value).unwrap()), "0000000000000000000000000000000000000000000000000000000000000000", ); std::fs::write(&path, corrupted).unwrap();