From 9ceacbf3141f65cc03170aa8e3f63691c5824c4c Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Sun, 12 Apr 2026 09:10:24 +0800
Subject: [PATCH 1/5] feat(client): enhance error messages with detailed
 failure information

- Add specific error details showing which sources failed and their respective error messages
- Replace generic "All X source(s) failed to index" message with detailed breakdown
- Include source name and error description in the error output for better debugging
---
 rust/src/client/engine.rs | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/rust/src/client/engine.rs b/rust/src/client/engine.rs
index 683ac782..21f09c08 100644
--- a/rust/src/client/engine.rs
+++ b/rust/src/client/engine.rs
@@ -164,8 +164,9 @@ impl Engine {
                 .await;
             if items.is_empty() && !failed.is_empty() {
                 return Err(Error::Config(format!(
-                    "All {} source(s) failed to index",
-                    failed.len()
+                    "All {} source(s) failed to index: {}",
+                    failed.len(),
+                    failed.iter().map(|f| format!("{} ({})", f.source, f.error)).collect::<Vec<_>>().join("; ")
                 )));
             }
             if !items.is_empty() {
@@ -207,8 +208,9 @@ impl Engine {
 
         if items.is_empty() && !failed.is_empty() {
             return Err(Error::Config(format!(
-                "All {} source(s) failed to index",
-                failed.len()
+                "All {} source(s) failed to index: {}",
+                failed.len(),
+                failed.iter().map(|f| format!("{} ({})", f.source, f.error)).collect::<Vec<_>>().join("; ")
             )));
         }
 

From d7e016c070f4c38fcc32ff0c95dce63286ea653d Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Sun, 12 Apr 2026 09:21:32 +0800
Subject: [PATCH 2/5] feat(rust): add incremental and single document indexing
 examples

- Add index_incremental.rs example demonstrating incremental indexing
  with change detection capabilities
- Add index_single.rs example showing single document indexing workflow
- Update existing indexing.rs example to focus on batch indexing with
  multiple document paths
- Update example configurations to use google/gemini-3-flash-preview
  model and proper endpoint
- Include comprehensive cleanup logic in all examples

chore: update .gitignore with workspace patterns

- Add workspace* pattern to ignore test workspace directories
- Keep ENV/ directory in gitignore as was previously intended
---
 .gitignore                         |   5 +-
 rust/examples/index_incremental.rs |  96 ++++++++++++++++++++++++
 rust/examples/index_single.rs      |  72 ++++++++++++++++++
 rust/examples/indexing.rs          | 116 +++++++----------------------
 4 files changed, 198 insertions(+), 91 deletions(-)
 create mode 100644 rust/examples/index_incremental.rs
 create mode 100644 rust/examples/index_single.rs

diff --git a/.gitignore b/.gitignore
index 553e4b7c..a05dac13 100644
--- a/.gitignore
+++ b/.gitignore
@@ -83,4 +83,7 @@ wheels/
 .ruff_cache/
 .venv/
 venv/
-ENV/
\ No newline at end of file
+ENV/
+
+# Test workspace
+workspace*
\ No newline at end of file
diff --git a/rust/examples/index_incremental.rs b/rust/examples/index_incremental.rs
new file mode 100644
index 00000000..6b710a93
--- /dev/null
+++ b/rust/examples/index_incremental.rs
@@ -0,0 +1,96 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Incremental indexing example — re-index with change detection.
+//!
+//! ```bash
+//! cargo run --example index_incremental
+//! ```
+
+use vectorless::{DocumentFormat, EngineBuilder, IndexContext, IndexMode};
+
+#[tokio::main]
+async fn main() -> vectorless::Result<()> {
+    let engine = EngineBuilder::new()
+        .with_workspace("./workspace_incremental_example")
+        .with_key("sk-or-v1-...")
+        .with_model("google/gemini-3-flash-preview")
+        .with_endpoint("http://localhost:4000/api/v1")
+        .build()
+        .await
+        .map_err(|e| vectorless::Error::Config(e.to_string()))?;
+
+    let content_v1 = r#"# API Reference
+
+## GET /users
+
+Returns a list of all users in the system.
+
+## POST /users
+
+Creates a new user account.
+"#;
+
+    let content_v2 = r#"# API Reference
+
+## GET /users
+
+Returns a paginated list of users. Supports `?page=` and `?limit=` parameters.
+
+## POST /users
+
+Creates a new user account. Requires email and password fields.
+
+## DELETE /users/:id
+
+Deletes a user by their unique identifier.
+"#;
+
+    // 1. Initial full index
+    println!("--- Initial index ---");
+    let result = engine
+        .index(IndexContext::from_content(content_v1, DocumentFormat::Markdown))
+        .await?;
+
+    let doc_id = result.items[0].doc_id.clone();
+    if let Some(m) = &result.items[0].metrics {
+        println!("indexed in {}ms, {} nodes", m.total_time_ms(), m.nodes_processed);
+    }
+
+    // 2. Re-index unchanged content (incremental) — skips processing
+    println!("\n--- Re-index unchanged (incremental) ---");
+    let result = engine
+        .index(
+            IndexContext::from_content(content_v1, DocumentFormat::Markdown)
+                .with_mode(IndexMode::Incremental),
+        )
+        .await?;
+
+    for item in &result.items {
+        println!("doc_id: {} (unchanged, skipped)", item.doc_id);
+    }
+
+    // 3. Re-index with changes (incremental) — detects diff and updates
+    println!("\n--- Re-index with changes (incremental) ---");
+    let result = engine
+        .index(
+            IndexContext::from_content(content_v2, DocumentFormat::Markdown)
+                .with_mode(IndexMode::Incremental),
+        )
+        .await?;
+
+    for item in &result.items {
+        if let Some(m) = &item.metrics {
+            println!("updated in {}ms, {} nodes", m.total_time_ms(), m.nodes_processed);
+        }
+    }
+
+    println!("\ndoc_id: {doc_id}");
+
+    // Cleanup
+    for doc in engine.list().await? {
+        engine.remove(&doc.id).await?;
+    }
+
+    Ok(())
+}
diff --git a/rust/examples/index_single.rs b/rust/examples/index_single.rs
new file mode 100644
index 00000000..aa396395
--- /dev/null
+++ b/rust/examples/index_single.rs
@@ -0,0 +1,72 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Single document indexing example — index one document from content.
+//!
+//! ```bash
+//! cargo run --example index_single
+//! ```
+
+use vectorless::{DocumentFormat, EngineBuilder, IndexContext};
+
+#[tokio::main]
+async fn main() -> vectorless::Result<()> {
+    let engine = EngineBuilder::new()
+        .with_workspace("./workspace_single_example")
+        .with_key("sk-or-v1-...")
+        .with_model("google/gemini-3-flash-preview")
+        .with_endpoint("http://localhost:4000/api/v1")
+        .build()
+        .await
+        .map_err(|e| vectorless::Error::Config(e.to_string()))?;
+
+    let content = r#"# Project Overview
+
+## Introduction
+
+This document describes the architecture of a distributed system
+designed for high-throughput data processing.
+
+## Components
+
+### API Gateway
+
+Handles authentication, rate limiting, and request routing.
+Supports both REST and gRPC protocols.
+
+### Worker Pool
+
+Processes tasks from the message queue. Each worker handles
+one task at a time with configurable timeout.
+
+## Performance
+
+Under load testing, the system achieves 50k requests/second
+with p99 latency under 200ms.
+
+## Conclusion
+
+The modular design allows independent scaling of each component.
+"#;
+
+    // Index from content string
+    let result = engine
+        .index(IndexContext::from_content(content, DocumentFormat::Markdown))
+        .await?;
+
+    for item in &result.items {
+        println!("doc_id:  {}", item.doc_id);
+        println!("name:    {}", item.name);
+        println!("format:  {:?}", item.format);
+        if let Some(m) = &item.metrics {
+            println!("time:    {}ms, nodes: {}", m.total_time_ms(), m.nodes_processed);
+        }
+    }
+
+    // Cleanup
+    for doc in engine.list().await? {
+        engine.remove(&doc.id).await?;
+    }
+
+    Ok(())
+}
diff --git a/rust/examples/indexing.rs b/rust/examples/indexing.rs
index fc764835..53d8fe92 100644
--- a/rust/examples/indexing.rs
+++ b/rust/examples/indexing.rs
@@ -1,110 +1,46 @@
 // Copyright (c) 2026 vectorless developers
 // SPDX-License-Identifier: Apache-2.0
 
-//! Index pipeline example for Vectorless.
-//!
-//! Demonstrates the full indexing flow: create engine → index document → inspect metrics.
-//!
-//! # Usage
+//! Batch indexing example — index multiple documents at once.
 //!
 //! ```bash
 //! cargo run --example indexing
 //! ```
 
-use vectorless::{EngineBuilder, IndexContext, IndexMode};
+use vectorless::{EngineBuilder, IndexContext};
 
 #[tokio::main]
 async fn main() -> vectorless::Result<()> {
-    println!("=== Index Pipeline Example ===\n");
-
-    // 1. Create engine
     let engine = EngineBuilder::new()
-        .with_workspace("./workspace_index_example")
-        .with_key("sk-...")
-        .with_model("gpt-4o")
+        .with_workspace("./workspace_batch_example")
+        .with_key("sk-or-v1-...")
+        .with_model("google/gemini-3-flash-preview")
+        .with_endpoint("http://localhost:4000/api/v1")
         .build()
         .await
-        .map_err(|e: vectorless::BuildError| vectorless::Error::Config(e.to_string()))?;
-
-    println!("Engine created\n");
-
-    // 2. Index a single document with default options
-    println!("--- Single document (default mode) ---");
-    let result = engine.index(IndexContext::from_path("./README.md")).await?;
-
+        .map_err(|e| vectorless::Error::Config(e.to_string()))?;
+
+    // Index multiple files from different paths
+    let result = engine
+        .index(IndexContext::from_paths(&[
+            "../README.md",
+            "../CLAUDE.md",
+            "../LICENSE",
+        ]))
+        .await?;
+
+    println!("indexed: {}, failed: {}", result.items.len(), result.failed.len());
     for item in &result.items {
-        println!("  doc_id:  {}", item.doc_id);
-        println!("  name:    {}", item.name);
-        println!("  format:  {:?}", item.format);
-
-        if let Some(ref metrics) = item.metrics {
-            println!("  metrics:");
-            println!("    total time:  {}ms", metrics.total_time_ms());
-            println!("    parse:       {}ms", metrics.parse_time_ms);
-            println!("    build:       {}ms", metrics.build_time_ms);
-            println!("    enhance:     {}ms", metrics.enhance_time_ms);
-            println!("    enrich:      {}ms", metrics.enrich_time_ms);
-            println!("    optimize:    {}ms", metrics.optimize_time_ms);
-            println!("    reasoning:   {}ms", metrics.reasoning_index_time_ms);
-            println!("    nodes:       {}", metrics.nodes_processed);
-            println!("    summaries:   {}", metrics.summaries_generated);
-            println!("    llm calls:   {}", metrics.llm_calls);
-            println!("    tokens:      {}", metrics.total_tokens_generated);
-            println!("    topics:      {}", metrics.topics_indexed);
-            println!("    keywords:    {}", metrics.keywords_indexed);
-        }
-
-        // doc_id preserved across the loop for readability
-        let _doc_id = item.doc_id.clone();
-
-        // 3. Re-index with incremental mode — should detect no change
-        println!("\n--- Re-index (incremental, unchanged) ---");
-        let result2 = engine
-            .index(IndexContext::from_path("./README.md").with_mode(IndexMode::Incremental))
-            .await?;
-
-        for item in &result2.items {
-            println!(
-                "  {} (metrics present: {})",
-                item.doc_id,
-                item.metrics.is_some()
-            );
-        }
-
-        // 4. Index multiple documents at once
-        println!("\n--- Batch indexing ---");
-        let batch = engine
-            .index(IndexContext::from_paths(&["./README.md", "./CLAUDE.md"]))
-            .await?;
-
-        println!(
-            "  indexed: {}, failed: {}",
-            batch.items.len(),
-            batch.failed.len()
-        );
-        for item in &batch.items {
-            let time = item
-                .metrics
-                .as_ref()
-                .map(|m| m.total_time_ms())
-                .unwrap_or(0);
-            let nodes = item
-                .metrics
-                .as_ref()
-                .map(|m| m.nodes_processed)
-                .unwrap_or(0);
-            println!("  {} — {}ms, {} nodes", item.name, time, nodes);
-        }
+        println!("  {} — doc_id: {}", item.name, item.doc_id);
+    }
+    for fail in &result.failed {
+        println!("  FAILED: {} — {}", fail.source, fail.error);
+    }
 
-        // 5. Cleanup
-        println!("\n--- Cleanup ---");
-        let docs = engine.list().await?;
-        for doc in &docs {
-            engine.remove(&doc.id).await?;
-        }
-        println!("  removed {} document(s)", docs.len());
+    // Cleanup
+    for doc in engine.list().await? {
+        engine.remove(&doc.id).await?;
     }
 
-    println!("\n=== Done ===");
     Ok(())
 }

From dc6cdd6ee1d5de752bc3dc7f401c8764b32457d4 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Sun, 12 Apr 2026 10:06:54 +0800
Subject: [PATCH 3/5] feat(indexer): add processing statistics to persisted
 documents

Extract node count, token count, and processing duration from document
metrics and update processing stats in the persisted document metadata.

fix(optimize): prevent merging non-leaf nodes and improve merge logic

Change merge_leaf_threshold default to 0 to disable unwanted merging.
Improve merge_small_leaves function to only merge adjacent leaf nodes
with actual content, preserving section boundaries by adding proper
headings when merging content.

docs(optimize): clarify merge behavior for leaf nodes only
---
 rust/src/client/indexer.rs        | 15 +++++++-
 rust/src/index/config.rs          |  2 +-
 rust/src/index/stages/optimize.rs | 61 ++++++++++++++++++++++---------
 3 files changed, 57 insertions(+), 21 deletions(-)

diff --git a/rust/src/client/indexer.rs b/rust/src/client/indexer.rs
index f0e43890..373be62c 100644
--- a/rust/src/client/indexer.rs
+++ b/rust/src/client/indexer.rs
@@ -428,13 +428,24 @@ impl IndexerClient {
         let logic_fp = pipeline_options.logic_fingerprint();
         meta = meta.with_logic_fingerprint(logic_fp);
 
-        let mut persisted =
-            PersistedDocument::new(meta, doc.tree.expect("IndexedDocument must have a tree"));
+        let tree = doc.tree.expect("IndexedDocument must have a tree");
+
+        // Extract stats from metrics
+        let node_count = tree.node_count();
+        let (summary_tokens, duration_ms) = if let Some(ref m) = doc.metrics {
+            (m.total_tokens_generated, m.total_time_ms())
+        } else {
+            (0, 0)
+        };
+
+        let mut persisted = PersistedDocument::new(meta, tree);
 
         for page in doc.pages {
             persisted.add_page(page.page, &page.content);
         }
 
+        persisted.meta.update_processing_stats(node_count, summary_tokens, duration_ms);
+
         persisted
     }
 }
diff --git a/rust/src/index/config.rs b/rust/src/index/config.rs
index f06fa22c..d43d7900 100644
--- a/rust/src/index/config.rs
+++ b/rust/src/index/config.rs
@@ -55,7 +55,7 @@ impl Default for OptimizationConfig {
             enabled: true,
             max_depth: None,
             max_children: None,
-            merge_leaf_threshold: 50,
+            merge_leaf_threshold: 0,
         }
     }
 }
diff --git a/rust/src/index/stages/optimize.rs b/rust/src/index/stages/optimize.rs
index 9eca0b8f..6b21688f 100644
--- a/rust/src/index/stages/optimize.rs
+++ b/rust/src/index/stages/optimize.rs
@@ -22,7 +22,11 @@ impl OptimizeStage {
         Self
     }
 
-    /// Merge adjacent small leaf nodes.
+    /// Merge adjacent small leaf nodes that are siblings under the same parent.
+    ///
+    /// Only merges nodes that are both **leaves** (no children of their own).
+    /// Non-leaf nodes (section headings with subsections) are never merged,
+    /// even if their own content is empty.
     fn merge_small_leaves(
         tree: &mut crate::document::DocumentTree,
         min_tokens: usize,
@@ -30,7 +34,7 @@ impl OptimizeStage {
     ) -> usize {
         let mut merged_count = 0;
 
-        // Get all non-leaf nodes
+        // Get all non-leaf nodes (parents whose children may be candidates)
         let non_leaves: Vec<NodeId> = tree
             .traverse()
             .into_iter()
@@ -43,27 +47,43 @@ impl OptimizeStage {
                 continue;
             }
 
-            // Find pairs of adjacent small nodes
+            // Collect children info: only leaf nodes are merge candidates
+            let candidates: Vec<(NodeId, usize, bool)> = children
+                .iter()
+                .map(|&id| {
+                    let tokens = tree.get(id).and_then(|n| n.token_count).unwrap_or(0);
+                    let is_leaf = tree.is_leaf(id);
+                    (id, tokens, is_leaf)
+                })
+                .collect();
+
+            // Find pairs of adjacent small leaf siblings
             let mut i = 0;
-            while i < children.len() - 1 {
-                let curr_id = children[i];
-                let next_id = children[i + 1];
-
-                let curr_tokens = tree.get(curr_id).and_then(|n| n.token_count).unwrap_or(0);
-                let next_tokens = tree.get(next_id).and_then(|n| n.token_count).unwrap_or(0);
-
-                // If both are small, merge next into current
-                if curr_tokens < min_tokens && next_tokens < min_tokens {
-                    // Merge content
+            while i < candidates.len() - 1 {
+                let (curr_id, curr_tokens, curr_is_leaf) = candidates[i];
+                let (next_id, next_tokens, next_is_leaf) = candidates[i + 1];
+
+                // Both must be leaves with actual content, and both must be small
+                if curr_is_leaf
+                    && next_is_leaf
+                    && curr_tokens > 0
+                    && curr_tokens < min_tokens
+                    && next_tokens > 0
+                    && next_tokens < min_tokens
+                {
+                    // Merge next into current
                     if let Some(next_node) = tree.get(next_id).cloned() {
                         if let Some(curr) = tree.get_mut(curr_id) {
                             if !next_node.content.is_empty() {
                                 if !curr.content.is_empty() {
-                                    curr.content.push('\n');
+                                    curr.content.push_str("\n\n");
                                 }
-                                curr.content.push_str(&next_node.content);
+                                // Prefix with heading to preserve boundary
+                                curr.content
+                                    .push_str(&format!("## {}\n{}", next_node.title, next_node.content));
                             }
-                            curr.token_count = Some(curr.token_count.unwrap_or(0) + next_tokens);
+                            curr.token_count =
+                                Some(curr.token_count.unwrap_or(0) + next_tokens);
                         }
                     }
 
@@ -86,15 +106,20 @@ impl OptimizeStage {
         merged_count
     }
 
-    /// Remove empty intermediate nodes.
+    /// Remove empty intermediate nodes (skip root).
     fn remove_empty_nodes(tree: &mut crate::document::DocumentTree) -> usize {
         let mut removed_count = 0;
+        let root = tree.root();
 
-        // Find nodes with no content and only one child
+        // Find non-root nodes with no content and only one child
         let candidates: Vec<NodeId> = tree
             .traverse()
             .into_iter()
             .filter(|id| {
+                // Skip root node
+                if *id == root {
+                    return false;
+                }
                 if tree.is_leaf(*id) {
                     return false;
                 }

From 5b30f8b52eb24255ed78db121d989919996bc290 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Sun, 12 Apr 2026 11:16:06 +0800
Subject: [PATCH 4/5] feat(rust/examples): enhance metrics display in
 index_single example

- Replace generic 'm' variable with descriptive 'metrics' variable
- Add detailed metrics output including parse, build, enhance, enrich,
  optimize, and reasoning times
- Include additional metrics for nodes processed, summaries generated,
  LLM calls, tokens generated, topics indexed, and keywords indexed
---
 rust/examples/index_single.rs | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/rust/examples/index_single.rs b/rust/examples/index_single.rs
index aa396395..8218cc00 100644
--- a/rust/examples/index_single.rs
+++ b/rust/examples/index_single.rs
@@ -58,8 +58,22 @@ The modular design allows independent scaling of each component.
         println!("doc_id:  {}", item.doc_id);
         println!("name:    {}", item.name);
         println!("format:  {:?}", item.format);
-        if let Some(m) = &item.metrics {
-            println!("time:    {}ms, nodes: {}", m.total_time_ms(), m.nodes_processed);
+
+        if let Some(metrics) = &item.metrics {
+            println!("  metrics:");
+            println!("    total time:  {}ms", metrics.total_time_ms());
+            println!("    parse:       {}ms", metrics.parse_time_ms);
+            println!("    build:       {}ms", metrics.build_time_ms);
+            println!("    enhance:     {}ms", metrics.enhance_time_ms);
+            println!("    enrich:      {}ms", metrics.enrich_time_ms);
+            println!("    optimize:    {}ms", metrics.optimize_time_ms);
+            println!("    reasoning:   {}ms", metrics.reasoning_index_time_ms);
+            println!("    nodes:       {}", metrics.nodes_processed);
+            println!("    summaries:   {}", metrics.summaries_generated);
+            println!("    llm calls:   {}", metrics.llm_calls);
+            println!("    tokens:      {}", metrics.total_tokens_generated);
+            println!("    topics:      {}", metrics.topics_indexed);
+            println!("    keywords:    {}", metrics.keywords_indexed);
         }
     }
 

From d5263283c7b85ded5422071207ece900f84af8b9 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Sun, 12 Apr 2026 11:26:43 +0800
Subject: [PATCH 5/5] fix(rust/examples): update file paths in graph example

Change file paths from relative to parent directory
to ensure correct file location resolution.
---
 rust/examples/graph.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rust/examples/graph.rs b/rust/examples/graph.rs
index 141b66df..cdefb451 100644
--- a/rust/examples/graph.rs
+++ b/rust/examples/graph.rs
@@ -30,7 +30,7 @@ async fn main() -> vectorless::Result<()> {
 
     // 2. Index documents — graph is rebuilt automatically
     let result = engine
-        .index(IndexContext::from_paths(&["./README.md", "./CLAUDE.md"]))
+        .index(IndexContext::from_paths(&["../README.md", "../CLAUDE.md"]))
         .await?;
 
     println!("Indexed {} document(s)", result.items.len());