From 68231d4d127324c668701f406c53d3c82fe48c9e Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Mon, 13 Apr 2026 13:08:13 +0800
Subject: [PATCH 1/5] feat(pdf-parser): switch to pdf-extract for reliable text
 extraction

Uses pdf-extract for text extraction which handles CJK, ToUnicode CMap,
font encoding, and other complex PDF text scenarios more reliably than
the previous lopdf-based approach. Falls back gracefully to basic
metadata extraction when lopdf parsing fails.

BREAKING CHANGE: Changes internal PDF parsing mechanism from lopdf to
pdf-extract while maintaining the same public API.

feat(toc-processor): add multi-mode extraction with automatic degradation

Introduces a three-mode TOC extraction pipeline with automatic fallback:
1. TocWithPageNumbers - when TOC with page numbers is available
2. TocWithoutPageNumbers - when TOC exists but lacks page numbers
3. NoToc - direct structure extraction from content using LLM

Each mode degrades to the next when accuracy thresholds aren't met.

feat(structure-extractor): add LLM-powered structure extraction for no-TOC docs

Implements document structure extraction from page content when no TOC
is available. Groups pages by token count and uses LLM analysis to
identify hierarchical sections. Adds support for continuation across
page groups with overlap handling.

feat(toc-processor): add refinement for oversized TOC entries

Adds capability to recursively split large TOC entries that span too
many pages or exceed token limits. Uses the same structure extraction
approach to identify sub-sections within oversized entries, improving
granularity of document structure.
---
 rust/src/index/parse/pdf/parser.rs            | 241 ++---------
 rust/src/index/parse/toc/mod.rs               |   1 +
 rust/src/index/parse/toc/processor.rs         | 377 +++++++++++++++---
 .../index/parse/toc/structure_extractor.rs    | 362 +++++++++++++++++
 rust/src/index/parse/toc/types.rs             |  40 ++
 5 files changed, 757 insertions(+), 264 deletions(-)
 create mode 100644 rust/src/index/parse/toc/structure_extractor.rs
diff --git a/rust/src/index/parse/pdf/parser.rs b/rust/src/index/parse/pdf/parser.rs
index 4684ae1a..b2ae6b5d 100644
--- a/rust/src/index/parse/pdf/parser.rs
+++ b/rust/src/index/parse/pdf/parser.rs
@@ -1,7 +1,11 @@
 // Copyright (c) 2026 vectorless developers
 // SPDX-License-Identifier: Apache-2.0
 
-//! PDF document parser using lopdf.
+//! PDF document parser.
+//!
+//! Uses [`pdf_extract`] for reliable text extraction (handles CJK, ToUnicode
+//! CMap, font encoding, etc.) and [`lopdf`] only for metadata extraction from
+//! the PDF Info dictionary.
 
 use std::path::Path;
 
@@ -35,7 +39,7 @@ impl Default for PdfParserConfig {
     fn default() -> Self {
         Self {
             max_pages: 0,
-            extract_toc: true, // Default enabled
+            extract_toc: true,
         }
     }
 }
@@ -65,19 +69,42 @@ impl PdfParser {
         bytes: &[u8],
         filename: Option<&str>,
     ) -> Result<PdfParseResult> {
-        let doc = LopdfDocument::load_mem(bytes)
-            .map_err(|e| Error::Parse(format!("Failed to parse PDF: {}", e)))?;
+        // Use pdf-extract for text (handles CJK, ToUnicode CMap, etc.)
+        let pages = self.extract_pages(bytes)?;
+
+        // Use lopdf only for metadata; fall back gracefully if it fails
+        let metadata = match LopdfDocument::load_mem(bytes) {
+            Ok(doc) => self.extract_metadata(&doc, filename),
+            Err(_) => PdfMetadata {
+                title: filename.unwrap_or("Document").to_string(),
+                page_count: pages.len(),
+                ..Default::default()
+            },
+        };
 
-        // Extract metadata
-        let metadata = self.extract_metadata(&doc, filename);
+        Ok(PdfParseResult::new(metadata, pages))
+    }
 
-        // Extract pages
-        let pages = self.extract_pages(&doc)?;
+    /// Extract text from all pages using pdf-extract.
+    fn extract_pages(&self, bytes: &[u8]) -> Result<Vec<PdfPage>> {
+        let page_texts = pdf_extract::extract_text_from_mem_by_pages(bytes)
+            .map_err(|e| Error::Parse(format!("pdf-extract failed: {}", e)))?;
 
-        Ok(PdfParseResult::new(metadata, pages))
+        let mut pages = Vec::new();
+        for (i, text) in page_texts.iter().enumerate() {
+            if self.config.max_pages > 0 && i >= self.config.max_pages {
+                break;
+            }
+            let page_num = i + 1; // 1-based
+            if !text.trim().is_empty() {
+                pages.push(PdfPage::new(page_num, text.clone()));
+            }
+        }
+
+        Ok(pages)
     }
 
-    /// Extract metadata from PDF document.
+    /// Extract metadata from PDF Info dictionary via lopdf.
     fn extract_metadata(&self, doc: &LopdfDocument, filename: Option<&str>) -> PdfMetadata {
         let mut metadata = PdfMetadata {
             title: filename.unwrap_or("Document").to_string(),
@@ -85,26 +112,22 @@ impl PdfParser {
             ..Default::default()
         };
 
-        // Try to extract metadata from Info dictionary
         if let Ok(info) = doc.trailer.get(b"Info") {
             if let Ok(info_ref) = info.as_reference() {
                 if let Ok(info_obj) = doc.get_object(info_ref) {
                     if let Ok(dict) = info_obj.as_dict() {
-                        // Title
                         if let Ok(title_obj) = dict.get(b"Title") {
                             if let Ok(title) = title_obj.as_str() {
                                 metadata.title = self.decode_pdf_string(title);
                             }
                         }
 
-                        // Author
                         if let Ok(author_obj) = dict.get(b"Author") {
                             if let Ok(author) = author_obj.as_str() {
                                 metadata.author = Some(self.decode_pdf_string(author));
                             }
                         }
 
-                        // Subject
                         if let Ok(subject_obj) = dict.get(b"Subject") {
                             if let Ok(subject) = subject_obj.as_str() {
                                 metadata.subject = Some(self.decode_pdf_string(subject));
@@ -118,158 +141,9 @@ impl PdfParser {
         metadata
     }
 
-    /// Extract text from all pages.
-    fn extract_pages(&self, doc: &LopdfDocument) -> Result<Vec<PdfPage>> {
-        let page_map = doc.get_pages();
-        let mut pages = Vec::new();
-
-        for (i, (page_num, object_id)) in page_map.iter().enumerate() {
-            // Check max pages limit
-            if self.config.max_pages > 0 && i >= self.config.max_pages {
-                break;
-            }
-
-            let text = self.extract_page_text(doc, *object_id, *page_num as usize);
-
-            // Skip empty pages
-            if !text.trim().is_empty() {
-                pages.push(PdfPage::new(*page_num as usize, text));
-            }
-        }
-
-        Ok(pages)
-    }
-
-    /// Extract text from a single page.
-    fn extract_page_text(
-        &self,
-        doc: &LopdfDocument,
-        object_id: lopdf::ObjectId,
-        _page_num: usize,
-    ) -> String {
-        let mut text = String::new();
-
-        if let Ok(page_obj) = doc.get_object(object_id) {
-            if let Ok(page_dict) = page_obj.as_dict() {
-                if let Ok(contents) = page_dict.get(b"Contents") {
-                    match contents {
-                        lopdf::Object::Reference(ref_id) => {
-                            if let Ok(content_obj) = doc.get_object(*ref_id) {
-                                if let Ok(stream) = content_obj.as_stream() {
-                                    text = self.decode_stream_content(stream);
-                                }
-                            }
-                        }
-                        lopdf::Object::Array(arr) => {
-                            for obj in arr {
-                                if let Ok(ref_id) = obj.as_reference() {
-                                    if let Ok(content_obj) = doc.get_object(ref_id) {
-                                        if let Ok(stream) = content_obj.as_stream() {
-                                            let content = self.decode_stream_content(stream);
-                                            if !text.is_empty() {
-                                                text.push('\n');
-                                            }
-                                            text.push_str(&content);
-                                        }
-                                    }
-                                }
-                            }
-                        }
-                        _ => {}
-                    }
-                }
-            }
-        }
-
-        // Post-process text
-        self.post_process_text(&text)
-    }
-
-    /// Decode stream content to text.
-    fn decode_stream_content(&self, stream: &lopdf::Stream) -> String {
-        // Try to decode the stream
-        if let Ok(content) = stream.decompressed_content() {
-            self.extract_text_from_content(&content)
-        } else {
-            self.extract_text_from_content(&stream.content)
-        }
-    }
-
-    /// Extract text from PDF content stream (simplified).
-    fn extract_text_from_content(&self, content: &[u8]) -> String {
-        let content_str = String::from_utf8_lossy(content);
-        let mut text = String::new();
-
-        for line in content_str.lines() {
-            let line = line.trim();
-
-            // Tj operator: (text) Tj
-            if line.ends_with("Tj") {
-                if let Some(text_part) = self.extract_parentheses_text(line) {
-                    text.push_str(&text_part);
-                }
-            }
-            // TJ operator: [(text) ...] TJ
-            else if line.ends_with("TJ") {
-                if let Some(text_parts) = self.extract_array_text(line) {
-                    text.push_str(&text_parts);
-                }
-            }
-        }
-
-        text
-    }
-
-    /// Extract text from parentheses in Tj operator.
-    fn extract_parentheses_text(&self, line: &str) -> Option<String> {
-        let start = line.find('(')?;
-        let end = line.rfind(')')?;
-        if end > start {
-            let raw = &line[start + 1..end];
-            Some(self.decode_pdf_string(raw.as_bytes()))
-        } else {
-            None
-        }
-    }
-
-    /// Extract text from array in TJ operator.
-    fn extract_array_text(&self, line: &str) -> Option<String> {
-        let start = line.find('[')?;
-        let end = line.rfind(']')?;
-        if end > start {
-            let content = &line[start + 1..end];
-            let mut text = String::new();
-
-            let mut in_parens = false;
-            let mut current = String::new();
-
-            for ch in content.chars() {
-                match ch {
-                    '(' => {
-                        in_parens = true;
-                        current.clear();
-                    }
-                    ')' => {
-                        if in_parens {
-                            text.push_str(&self.decode_pdf_string(current.as_bytes()));
-                        }
-                        in_parens = false;
-                    }
-                    _ => {
-                        if in_parens {
-                            current.push(ch);
-                        }
-                    }
-                }
-            }
-
-            Some(text)
-        } else {
-            None
-        }
-    }
-
-    /// Decode PDF string (handle escape sequences).
+    /// Decode PDF string literal (handles escape sequences).
+    ///
+    /// Used only for metadata field values extracted via lopdf.
     fn decode_pdf_string(&self, bytes: &[u8]) -> String {
         let mut result = String::new();
         let mut i = 0;
@@ -299,26 +173,6 @@ impl PdfParser {
         result
     }
 
-    /// Post-process extracted text.
-    fn post_process_text(&self, text: &str) -> String {
-        let mut result = String::new();
-        let mut prev_space = false;
-
-        for ch in text.chars() {
-            if ch.is_whitespace() {
-                if !prev_space {
-                    result.push(' ');
-                    prev_space = true;
-                }
-            } else {
-                result.push(ch);
-                prev_space = false;
-            }
-        }
-
-        result.trim().to_string()
-    }
-
     /// Convert TOC entries to RawNodes.
     fn toc_entries_to_raw_nodes(
         &self,
@@ -328,7 +182,6 @@ impl PdfParser {
         let mut nodes = Vec::new();
 
         for entry in entries {
-            // Get content from the page range
             let content = self.get_content_for_entry(entry, pages);
 
             let mut node = RawNode::new(&entry.title)
@@ -353,12 +206,10 @@ impl PdfParser {
     ) -> String {
         let start_page = entry.physical_page.unwrap_or(1);
 
-        // Find content on this page
         pages
             .iter()
             .find(|p| p.number == start_page)
             .map(|p| {
-                // Try to find the title position and extract content after it
                 let text = &p.text;
                 if let Some(pos) = text.find(&entry.title) {
                     text[pos + entry.title.len()..].trim().to_string()
@@ -445,7 +296,6 @@ impl PdfParser {
             self.pages_to_raw_nodes(&result.pages)
         };
 
-        // Build metadata
         let meta = DocumentMeta {
             name: result.metadata.title,
             format: DocumentFormat::Pdf,
@@ -486,15 +336,4 @@ mod tests {
         let decoded = parser.decode_pdf_string(b"Hello\\nWorld");
         assert_eq!(decoded, "Hello\nWorld");
     }
-
-    #[test]
-    fn test_post_process_text() {
-        let parser = PdfParser::new();
-
-        let processed = parser.post_process_text("Hello   World");
-        assert_eq!(processed, "Hello World");
-
-        let processed = parser.post_process_text("  Hello  World  ");
-        assert_eq!(processed, "Hello World");
-    }
 }
diff --git a/rust/src/index/parse/toc/mod.rs b/rust/src/index/parse/toc/mod.rs
index a540cd1a..beac24d7 100644
--- a/rust/src/index/parse/toc/mod.rs
+++ b/rust/src/index/parse/toc/mod.rs
@@ -17,6 +17,7 @@ mod detector;
 mod parser;
 mod processor;
 mod repairer;
+mod structure_extractor;
 mod types;
 mod verifier;
 
diff --git a/rust/src/index/parse/toc/processor.rs b/rust/src/index/parse/toc/processor.rs
index 79ef9a15..978ba4e3 100644
--- a/rust/src/index/parse/toc/processor.rs
+++ b/rust/src/index/parse/toc/processor.rs
@@ -2,6 +2,10 @@
 // SPDX-License-Identifier: Apache-2.0
 
 //! TOC processor - integrates all TOC processing components.
+//!
+//! The processor orchestrates a multi-mode extraction pipeline with automatic
+//! degradation: if one mode fails verification, it falls back to a lower-quality
+//! but more reliable mode.
 
 use tracing::{debug, info, warn};
 
@@ -12,7 +16,8 @@ use super::assigner::{PageAssigner, PageAssignerConfig};
 use super::detector::{TocDetector, TocDetectorConfig};
 use super::parser::{TocParser, TocParserConfig};
 use super::repairer::{IndexRepairer, RepairerConfig};
-use super::types::{TocEntry, VerificationReport};
+use super::structure_extractor::{StructureExtractor, StructureExtractorConfig};
+use super::types::{ProcessingMode, TocEntry, VerificationReport};
 use super::verifier::{IndexVerifier, VerifierConfig};
 
 /// TOC processor configuration.
@@ -33,11 +38,17 @@ pub struct TocProcessorConfig {
     /// Repairer configuration.
     pub repairer: RepairerConfig,
 
-    /// Accuracy threshold for acceptance.
+    /// Accuracy threshold for acceptance (0.0 - 1.0).
     pub accuracy_threshold: f32,
 
-    /// Maximum repair attempts.
+    /// Maximum repair attempts per verification cycle.
     pub max_repair_attempts: usize,
+
+    /// Maximum page span for a single entry before recursive refinement.
+    pub max_pages_per_entry: usize,
+
+    /// Maximum estimated tokens for a single entry before recursive refinement.
+    pub max_tokens_per_entry: usize,
 }
 
 impl Default for TocProcessorConfig {
@@ -50,6 +61,8 @@ impl Default for TocProcessorConfig {
             repairer: RepairerConfig::default(),
             accuracy_threshold: 0.6,
             max_repair_attempts: 3,
+            max_pages_per_entry: 30,
+            max_tokens_per_entry: 20000,
         }
     }
 }
@@ -64,6 +77,18 @@ impl Default for TocProcessorConfig {
 /// 4. **Assign** - Map TOC pages to physical pages
 /// 5. **Verify** - Sample verification of page assignments
 /// 6. **Repair** - Fix incorrect assignments (if needed)
+/// 7. **Refine** - Sub-divide oversized entries (if needed)
+///
+/// # Degradation Strategy
+///
+/// The pipeline tries three modes in order of quality:
+///
+/// 1. `TocWithPageNumbers` - TOC found with page numbers (offset calculation)
+/// 2. `TocWithoutPageNumbers` - TOC found without page numbers (LLM positioning)
+/// 3. `NoToc` - No TOC available (LLM structure extraction from content)
+///
+/// If a mode fails verification (accuracy < threshold), it automatically
+/// degrades to the next mode.
 ///
 /// # Example
 ///
@@ -73,11 +98,9 @@ impl Default for TocProcessorConfig {
 ///
 /// # #[tokio::main]
 /// # async fn main() -> vectorless::Result<()> {
-/// // Parse PDF
 /// let pdf_parser = PdfParser::new();
-/// let result = pdf_parser.parse_file("document.pdf".as_ref())?;
+/// let result = pdf_parser.parse_file("document.pdf".as_ref()).await?;
 ///
-/// // Extract TOC
 /// let processor = TocProcessor::new();
 /// let entries = processor.process(&result.pages).await?;
 ///
@@ -114,9 +137,10 @@ impl TocProcessor {
         }
     }
 
-    /// Process PDF pages and extract TOC.
+    /// Process PDF pages and extract hierarchical structure.
     ///
-    /// This is the main entry point for TOC extraction.
+    /// This is the main entry point. It detects TOC, selects the best
+    /// processing mode, and automatically degrades if needed.
     pub async fn process(&self, pages: &[PdfPage]) -> Result<Vec<TocEntry>> {
         if pages.is_empty() {
             return Ok(Vec::new());
@@ -126,45 +150,185 @@ impl TocProcessor {
 
         // Step 1: Detect TOC
         let detection = self.detector.detect(pages).await?;
-        if !detection.found {
+
+        // Step 2: Determine initial mode based on detection result
+        let initial_mode = if !detection.found {
             info!("No TOC found in document");
-            return self.process_without_toc(pages).await;
+            ProcessingMode::NoToc
+        } else if detection.has_page_numbers {
+            info!(
+                "TOC found on pages {:?}, has page numbers",
+                detection.pages
+            );
+            ProcessingMode::TocWithPageNumbers
+        } else {
+            info!(
+                "TOC found on pages {:?}, no page numbers",
+                detection.pages
+            );
+            ProcessingMode::TocWithoutPageNumbers
+        };
+
+        // Step 3: Process with degradation
+        let entries = self
+            .process_with_degradation(initial_mode, &detection, pages)
+            .await?;
+
+        // Step 4: Refine oversized entries
+        self.refine_large_entries(entries, pages).await
+    }
+
+    /// Process with automatic mode degradation.
+    ///
+    /// Tries the given mode, verifies the result, and degrades to a
+    /// lower-quality mode if accuracy is below threshold.
+    async fn process_with_degradation(
+        &self,
+        initial_mode: ProcessingMode,
+        detection: &super::types::TocDetection,
+        pages: &[PdfPage],
+    ) -> Result<Vec<TocEntry>> {
+        let mut mode = initial_mode;
+
+        loop {
+            info!("Attempting extraction with mode {:?}", mode);
+
+            let result = match mode {
+                ProcessingMode::TocWithPageNumbers => {
+                    self.process_toc_with_page_numbers(detection, pages).await
+                }
+                ProcessingMode::TocWithoutPageNumbers => {
+                    self.process_toc_without_page_numbers(detection, pages).await
+                }
+                ProcessingMode::NoToc => {
+                    // NoToc always succeeds (produces some structure)
+                    return self.process_without_toc(pages).await;
+                }
+            };
+
+            match result {
+                Ok(entries) if !entries.is_empty() => {
+                    // Verify the entries
+                    let mut mutable_entries = entries;
+                    let report = self
+                        .verify_and_repair(&mut mutable_entries, pages)
+                        .await?;
+
+                    if report.accuracy >= self.config.accuracy_threshold {
+                        info!(
+                            "Mode {:?} succeeded: {} entries, accuracy {:.1}%",
+                            mode,
+                            mutable_entries.len(),
+                            report.accuracy * 100.0
+                        );
+                        return Ok(mutable_entries);
+                    }
+
+                    // Accuracy too low, try degrading
+                    warn!(
+                        "Mode {:?} accuracy {:.1}% below threshold {:.1}%",
+                        mode,
+                        report.accuracy * 100.0,
+                        self.config.accuracy_threshold * 100.0
+                    );
+
+                    match mode.degrade() {
+                        Some(next) => {
+                            info!("Degrading from {:?} to {:?}", mode, next);
+                            mode = next;
+                            // Continue loop with degraded mode
+                        }
+                        None => {
+                            warn!("No further degradation possible, returning best effort");
+                            return Ok(mutable_entries);
+                        }
+                    }
+                }
+                Ok(_) => {
+                    // Empty entries, degrade
+                    warn!("Mode {:?} produced no entries", mode);
+                    match mode.degrade() {
+                        Some(next) => {
+                            mode = next;
+                        }
+                        None => return Ok(Vec::new()),
+                    }
+                }
+                Err(e) => {
+                    warn!("Mode {:?} failed: {}", mode, e);
+                    match mode.degrade() {
+                        Some(next) => {
+                            mode = next;
+                        }
+                        None => return Err(e),
+                    }
+                }
+            }
         }
+    }
 
-        info!(
-            "TOC found on pages {:?}, has_page_numbers: {}",
-            detection.pages, detection.has_page_numbers
-        );
+    /// Mode 1: TOC with page numbers.
+    ///
+    /// Parse the TOC, calculate physical-page offset from anchor entries,
+    /// and apply the offset to all entries.
+    async fn process_toc_with_page_numbers(
+        &self,
+        detection: &super::types::TocDetection,
+        pages: &[PdfPage],
+    ) -> Result<Vec<TocEntry>> {
+        let toc_text = self.extract_toc_text(pages, &detection.pages);
+        if toc_text.trim().is_empty() {
+            return Ok(Vec::new());
+        }
 
-        // Step 2: Extract TOC text
+        let mut entries = self.parser.parse(&toc_text).await?;
+        if entries.is_empty() {
+            return Ok(Vec::new());
+        }
+
+        // Assign physical pages using offset calculation
+        self.assigner.assign(&mut entries, pages).await?;
+
+        Ok(entries)
+    }
+
+    /// Mode 2: TOC without page numbers.
+    ///
+    /// Parse the TOC, then use LLM to locate each entry in the document.
+    async fn process_toc_without_page_numbers(
+        &self,
+        detection: &super::types::TocDetection,
+        pages: &[PdfPage],
+    ) -> Result<Vec<TocEntry>> {
         let toc_text = self.extract_toc_text(pages, &detection.pages);
         if toc_text.trim().is_empty() {
-            warn!("TOC text is empty, falling back to structure extraction");
-            return self.process_without_toc(pages).await;
+            return Ok(Vec::new());
         }
 
-        // Step 3: Parse TOC
         let mut entries = self.parser.parse(&toc_text).await?;
         if entries.is_empty() {
-            warn!("No entries parsed from TOC");
             return Ok(Vec::new());
         }
 
-        info!("Parsed {} TOC entries", entries.len());
+        // Clear any TOC page numbers (they're unreliable in this mode)
+        for entry in &mut entries {
+            entry.toc_page = None;
+        }
 
-        // Step 4: Assign physical pages
+        // Assign physical pages using LLM positioning
         self.assigner.assign(&mut entries, pages).await?;
 
-        // Step 5: Verify and repair
-        let report = self.verify_and_repair(&mut entries, pages).await?;
+        Ok(entries)
+    }
 
-        info!(
-            "TOC processing complete: {} entries, accuracy {:.1}%",
-            entries.len(),
-            report.accuracy * 100.0
-        );
+    /// Mode 3: No TOC available.
+    ///
+    /// Extract document structure directly from page content using LLM.
+    async fn process_without_toc(&self, pages: &[PdfPage]) -> Result<Vec<TocEntry>> {
+        info!("Extracting structure from page content (no TOC available)");
 
-        Ok(entries)
+        let extractor = StructureExtractor::new(StructureExtractorConfig::default());
+        extractor.extract(pages).await
     }
 
     /// Extract TOC text from pages.
@@ -177,37 +341,6 @@ impl TocProcessor {
             .join("\n\n")
     }
 
-    /// Process document without TOC (structure extraction).
-    async fn process_without_toc(&self, pages: &[PdfPage]) -> Result<Vec<TocEntry>> {
-        warn!("Processing without TOC - this is a placeholder implementation");
-
-        // TODO: Implement structure extraction for documents without TOC
-        // For now, return a simple structure based on page count
-
-        let mut entries = Vec::new();
-
-        // Group pages into chunks
-        let chunk_size = 10;
-        for chunk in pages.chunks(chunk_size) {
-            let start_page = chunk.first().map(|p| p.number).unwrap_or(1);
-            let end_page = chunk.last().map(|p| p.number).unwrap_or(1);
-
-            let title = if chunk.len() == 1 {
-                format!("Page {}", start_page)
-            } else {
-                format!("Pages {}-{}", start_page, end_page)
-            };
-
-            entries.push(
-                TocEntry::new(title, 1)
-                    .with_physical_page(start_page)
-                    .with_confidence(0.5),
-            );
-        }
-
-        Ok(entries)
-    }
-
     /// Verify entries and repair if needed.
     async fn verify_and_repair(
         &self,
@@ -217,7 +350,6 @@ impl TocProcessor {
         let mut attempts = 0;
 
         while attempts < self.config.max_repair_attempts {
-            // Verify
             let report = self.verifier.verify(entries, pages).await?;
 
             if report.accuracy >= self.config.accuracy_threshold {
@@ -232,7 +364,6 @@ impl TocProcessor {
                 return Ok(report);
             }
 
-            // Repair
             let repaired = self.repairer.repair(entries, &report.errors, pages).await?;
 
             if repaired == 0 {
@@ -244,9 +375,109 @@ impl TocProcessor {
             debug!("Repair attempt {} complete", attempts);
         }
 
-        // Final verification
         self.verifier.verify(entries, pages).await
     }
+
+    /// Refine oversized entries by extracting sub-structure.
+    ///
+    /// Entries that span too many pages or tokens are broken down using
+    /// the same structure extraction approach used for no-TOC documents.
+    async fn refine_large_entries(
+        &self,
+        entries: Vec<TocEntry>,
+        pages: &[PdfPage],
+    ) -> Result<Vec<TocEntry>> {
+        if entries.is_empty() {
+            return Ok(entries);
+        }
+
+        let page_count = pages.len();
+
+        // Pre-compute next-entry page numbers before consuming entries
+        let next_pages: Vec<Option<usize>> = entries
+            .iter()
+            .enumerate()
+            .map(|(i, _)| {
+                entries.get(i + 1).and_then(|e| e.physical_page)
+            })
+            .collect();
+
+        let mut refined = Vec::with_capacity(entries.len());
+
+        for (i, entry) in entries.into_iter().enumerate() {
+            let span = entry_page_span(&entry, next_pages[i], page_count);
+            let tokens = entry_token_count(&entry, pages);
+
+            if span > self.config.max_pages_per_entry
+                && tokens > self.config.max_tokens_per_entry
+            {
+                debug!(
+                    "Refining oversized entry '{}' ({} pages, ~{} tokens)",
+                    entry.title, span, tokens
+                );
+
+                // Extract sub-pages covered by this entry
+                let start = entry.physical_page.unwrap_or(1);
+                let end = next_pages[i].unwrap_or(page_count);
+                let sub_pages: Vec<PdfPage> = pages
+                    .iter()
+                    .filter(|p| p.number >= start && p.number <= end)
+                    .cloned()
+                    .collect();
+
+                if sub_pages.is_empty() {
+                    refined.push(entry);
+                } else {
+                    // Run structure extraction on the sub-pages
+                    let extractor =
+                        StructureExtractor::new(StructureExtractorConfig::default());
+                    match extractor.extract(&sub_pages).await {
+                        Ok(sub_entries) if !sub_entries.is_empty() => {
+                            // If the first sub-entry has the same title as the
+                            // parent, skip it — the parent already represents
+                            // that content's starting point.
+                            let skip = if sub_entries
+                                .first()
+                                .map(|e| e.title.trim() == entry.title.trim())
+                                .unwrap_or(false)
+                            {
+                                1
+                            } else {
+                                0
+                            };
+
+                            for sub in &sub_entries[skip..] {
+                                let level_offset = entry.level;
+                                refined.push(
+                                    TocEntry::new(&sub.title, sub.level + level_offset)
+                                        .with_physical_page(sub.physical_page.unwrap_or(start))
+                                        .with_confidence(sub.confidence * 0.9),
+                                );
+                            }
+
+                            info!(
+                                "Refined '{}' into {} sub-entries",
+                                entry.title,
+                                sub_entries.len() - skip
+                            );
+                        }
+                        Ok(_) => {
+                            debug!("Sub-extraction produced no entries, keeping original");
+                            refined.push(entry);
+                        }
+                        Err(e) => {
+                            warn!("Sub-extraction failed for '{}': {}", entry.title, e);
+                            refined.push(entry);
+                        }
+                    }
+                }
+            } else {
+                refined.push(entry);
+            }
+        }
+
+        Ok(refined)
+    }
 }
 
 impl Default for TocProcessor {
@@ -255,6 +486,26 @@ impl Default for TocProcessor {
     }
 }
 
+/// Calculate how many pages an entry spans.
+///
+/// From its physical_page to the next entry's physical_page (or document end).
+fn entry_page_span(entry: &TocEntry, next_physical_page: Option<usize>, total_pages: usize) -> usize {
+    let start = entry.physical_page.unwrap_or(1);
+    let end = next_physical_page.unwrap_or(total_pages);
+    end.saturating_sub(start)
+}
+
+/// Estimate total tokens for the content covered by an entry.
+fn entry_token_count(entry: &TocEntry, pages: &[PdfPage]) -> usize {
+    let start = entry.physical_page.unwrap_or(1);
+    pages
+        .iter()
+        .filter(|p| p.number >= start)
+        .take(30) // cap at max_pages_per_entry default
+        .map(|p| p.token_count)
+        .sum()
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/rust/src/index/parse/toc/structure_extractor.rs b/rust/src/index/parse/toc/structure_extractor.rs
new file mode 100644
index 00000000..a6dd807d
--- /dev/null
+++ b/rust/src/index/parse/toc/structure_extractor.rs
@@ -0,0 +1,362 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Structure extraction from documents without a Table of Contents.
+//!
+//! When a PDF has no TOC (or all TOC-based extraction modes failed), this
+//! module uses LLM to analyse page content and extract the document's
+//! hierarchical structure directly.
+
+use tracing::{debug, info, warn};
+
+use crate::config::LlmConfig;
+use crate::error::Result;
+use crate::index::parse::pdf::PdfPage;
+
+use super::types::TocEntry;
+use crate::llm::LlmClient;
+
+/// Configuration for structure extraction.
+#[derive(Debug, Clone)]
+pub struct StructureExtractorConfig {
+    /// Maximum estimated tokens per page group sent to LLM.
+    pub max_tokens_per_group: usize,
+
+    /// Number of overlap pages between consecutive groups.
+    pub overlap_pages: usize,
+
+    /// LLM configuration.
+    pub llm_config: LlmConfig,
+}
+
+impl Default for StructureExtractorConfig {
+    fn default() -> Self {
+        Self {
+            max_tokens_per_group: 20_000,
+            overlap_pages: 1,
+            llm_config: LlmConfig::default(),
+        }
+    }
+}
+
+/// A group of consecutive pages with their combined text.
+struct PageGroup {
+    /// Combined text with page markers: `<page_N>\n...\n</page_N>`.
+    text: String,
+    /// Start page number (1-based).
+    start_page: usize,
+    /// End page number (1-based, inclusive).
+    end_page: usize,
+}
+
+/// Extracts document structure from page content using LLM.
+///
+/// Used when a document has no Table of Contents, or when TOC-based extraction
+/// failed. Pages are grouped by token count and analysed sequentially: the
+/// first group generates an initial structure, subsequent groups append to it.
+pub struct StructureExtractor {
+    config: StructureExtractorConfig,
+    client: LlmClient,
+}
+
+impl StructureExtractor {
+    /// Create a new structure extractor.
+    pub fn new(config: StructureExtractorConfig) -> Self {
+        let client = LlmClient::new(config.llm_config.clone().into());
+        Self { config, client }
+    }
+
+    /// Create an extractor with default configuration.
+    pub fn with_defaults() -> Self {
+        Self::new(StructureExtractorConfig::default())
+    }
+
+    /// Extract hierarchical structure from all pages.
+    pub async fn extract(&self, pages: &[PdfPage]) -> Result<Vec<TocEntry>> {
+        if pages.is_empty() {
+            return Ok(Vec::new());
+        }
+
+        let groups = self.group_pages(pages);
+        info!(
+            "Extracting structure from {} pages in {} groups",
+            pages.len(),
+            groups.len()
+        );
+
+        let mut all_entries = Vec::new();
+        let page_count = pages.len();
+
+        for (i, group) in groups.iter().enumerate() {
+            let group_entries = if i == 0 {
+                self.generate_initial(group).await?
+            } else {
+                self.generate_continuation(group, &all_entries).await?
+            };
+
+            debug!(
+                "Group {}/{} (pages {}-{}): extracted {} entries",
+                i + 1,
+                groups.len(),
+                group.start_page,
+                group.end_page,
+                group_entries.len()
+            );
+
+            all_entries.extend(group_entries);
+        }
+
+        // Truncate physical_page values that exceed document length
+        for entry in &mut all_entries {
+            if let Some(p) = entry.physical_page {
+                if p > page_count {
+                    warn!(
+                        "Truncating out-of-range page {} for '{}'",
+                        p, entry.title
+                    );
+                    entry.physical_page = Some(page_count);
+                }
+            }
+        }
+
+        info!("Structure extraction complete: {} entries", all_entries.len());
+        Ok(all_entries)
+    }
+
+    /// Group pages by estimated token count.
+    ///
+    /// Each group stays under `max_tokens_per_group`. Consecutive groups
+    /// overlap by `overlap_pages` pages to avoid splitting content at
+    /// section boundaries.
+    fn group_pages(&self, pages: &[PdfPage]) -> Vec<PageGroup> {
+        let mut groups = Vec::new();
+        let mut group_tokens = 0usize;
+        let mut group_pages_buf = Vec::new();
+
+        for (i, page) in pages.iter().enumerate() {
+            let new_tokens = group_tokens + page.token_count;
+
+            if new_tokens > self.config.max_tokens_per_group && !group_pages_buf.is_empty() {
+                // Finalise current group
+                let text = format_group_text(&group_pages_buf);
+                groups.push(PageGroup {
+                    text,
+                    start_page: group_pages_buf.first().unwrap().number,
+                    end_page: group_pages_buf.last().unwrap().number,
+                });
+
+                // Start new group with overlap
+                let overlap_start = i.saturating_sub(self.config.overlap_pages);
+                group_pages_buf = pages[overlap_start..=i].to_vec();
+                group_tokens = group_pages_buf.iter().map(|p| p.token_count).sum();
+            } else {
+                group_tokens = new_tokens;
+                group_pages_buf.push(page.clone());
+            }
+        }
+
+        // Final group
+        if !group_pages_buf.is_empty() {
+            let text = format_group_text(&group_pages_buf);
+            groups.push(PageGroup {
+                text,
+                start_page: group_pages_buf.first().unwrap().number,
+                end_page: group_pages_buf.last().unwrap().number,
+            });
+        }
+
+        groups
+    }
+
+    /// Generate initial structure from the first page group.
+    async fn generate_initial(&self, group: &PageGroup) -> Result<Vec<TocEntry>> {
+        let system = STRUCTURE_EXTRACTION_SYSTEM_PROMPT;
+        let user = format!(
+            r#"Analyze this document content and extract its hierarchical structure.
+
+Document content:
+{}
+
+Return a JSON array:
+[
+  {{"title": "Section Title", "level": 1, "physical_page": 1}},
+  {{"title": "Subsection", "level": 2, "physical_page": 3}},
+  ...
+]
+
+Rules:
+- "level" reflects the hierarchy (1 = chapter/top, 2 = section, 3 = subsection)
+- "physical_page" is the page number where the section begins
+- Preserve original titles as closely as possible
+- Only output the JSON array, no other text"#,
+            group.text
+        );
+
+        let sections: Vec<ExtractedSection> = self.client.complete_json(system, &user).await?;
+
+        Ok(sections
+            .into_iter()
+            .map(|s| {
+                TocEntry::new(s.title, s.level)
+                    .with_physical_page(s.physical_page)
+                    .with_confidence(0.7)
+            })
+            .collect())
+    }
+
+    /// Continue structure extraction for a subsequent group.
+    ///
+    /// Passes previously extracted entries as context so the LLM can
+    /// continue the structure rather than restart.
+    async fn generate_continuation(
+        &self,
+        group: &PageGroup,
+        previous: &[TocEntry],
+    ) -> Result<Vec<TocEntry>> {
+        let system = STRUCTURE_EXTRACTION_SYSTEM_PROMPT;
+
+        // Summarise previous entries as context
+        let prev_summary = previous
+            .iter()
+            .rev()
+            .take(10)
+            .rev()
+            .map(|e| {
+                format!(
+                    "  {{\"title\": \"{}\", \"level\": {}, \"physical_page\": {}}}",
+                    e.title,
+                    e.level,
+                    e.physical_page.unwrap_or(0)
+                )
+            })
+            .collect::<Vec<_>>()
+            .join(",\n");
+
+        let user = format!(
+            r#"Previously extracted structure:
+[
+{}
+]
+
+Continue extracting structure from these pages:
+{}
+
+Return ONLY the NEW entries (do not repeat previous ones):
+[
+  {{"title": "...", "level": N, "physical_page": M}},
+  ...
+]
+
+If no new structural elements are found, return: []"#,
+            prev_summary, group.text
+        );
+
+        let sections: Vec<ExtractedSection> = self.client.complete_json(system, &user).await?;
+
+        Ok(sections
+            .into_iter()
+            .map(|s| {
+                TocEntry::new(s.title, s.level)
+                    .with_physical_page(s.physical_page)
+                    .with_confidence(0.7)
+            })
+            .collect())
+    }
+}
+
+/// Format pages into tagged text for LLM consumption.
+fn format_group_text(pages: &[PdfPage]) -> String {
+    pages
+        .iter()
+        .map(|p| {
+            // Truncate individual page text if very long
+            let text = if p.text.len() > 3000 {
+                &p.text[..3000]
+            } else {
+                &p.text
+            };
+            format!("<page_{}>\n{}\n</page_{}>", p.number, text, p.number)
+        })
+        .collect::<Vec<_>>()
+        .join("\n\n")
+}
+
+const STRUCTURE_EXTRACTION_SYSTEM_PROMPT: &str = r#"You are a document structure extraction expert. Your task is to analyze document content and extract its hierarchical structure (chapters, sections, subsections).
+
+For each structural element you find, provide:
+- title: The section title exactly as it appears
+- level: The hierarchy level (1 = chapter/top level, 2 = section, 3 = subsection)
+- physical_page: The page number where this section begins
+
+Important:
+- Focus on genuine structural elements (chapters, sections), not paragraph topics
+- Do NOT include the abstract, summary, or bibliography as structural elements unless they are major sections
+- Be conservative: fewer high-quality entries are better than many low-quality ones"#;
+
+/// LLM response type for structure extraction.
+#[derive(serde::Deserialize)]
+struct ExtractedSection {
+    title: String,
+    level: usize,
+    physical_page: usize,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_default_config() {
+        let config = StructureExtractorConfig::default();
+        assert_eq!(config.max_tokens_per_group, 20_000);
+        assert_eq!(config.overlap_pages, 1);
+    }
+
+    #[test]
+    fn test_group_pages_single_group() {
+        let extractor = StructureExtractor::with_defaults();
+
+        let pages: Vec<PdfPage> = (1..=5)
+            .map(|i| PdfPage::new(i, format!("Page {} content", i)))
+            .collect();
+
+        let groups = extractor.group_pages(&pages);
+        assert_eq!(groups.len(), 1);
+        assert_eq!(groups[0].start_page, 1);
+        assert_eq!(groups[0].end_page, 5);
+    }
+
+    #[test]
+    fn test_group_pages_multiple_groups() {
+        let config = StructureExtractorConfig {
+            max_tokens_per_group: 50,
+            overlap_pages: 1,
+            ..Default::default()
+        };
+        let extractor = StructureExtractor::new(config);
+
+        // Create pages with enough text to span multiple groups
+        let pages: Vec<PdfPage> = (1..=10)
+            .map(|i| {
+                let text = format!("Page {} content. This is a longer text to use more tokens. ", i).repeat(10);
+                PdfPage::new(i, text)
+            })
+            .collect();
+
+        let groups = extractor.group_pages(&pages);
+        assert!(groups.len() > 1, "Expected multiple groups, got {}", groups.len());
+    }
+
+    #[test]
+    fn test_format_group_text() {
+        let pages = vec![
+            PdfPage::new(1, "Hello"),
+            PdfPage::new(2, "World"),
+        ];
+        let text = format_group_text(&pages);
+        assert!(text.contains("<page_1>"));
+        assert!(text.contains("<page_2>"));
+        assert!(text.contains("Hello"));
+        assert!(text.contains("World"));
+    }
+}
diff --git a/rust/src/index/parse/toc/types.rs b/rust/src/index/parse/toc/types.rs
index 9465311b..0438c0d3 100644
--- a/rust/src/index/parse/toc/types.rs
+++ b/rust/src/index/parse/toc/types.rs
@@ -266,6 +266,33 @@ impl VerificationReport {
     }
 }
 
+/// Processing mode for the TOC extraction pipeline.
+///
+/// Modes are ordered by quality: higher modes produce more accurate results
+/// when they succeed, but can degrade to lower modes on failure.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum ProcessingMode {
+    /// TOC found with page numbers. Highest quality path.
+    TocWithPageNumbers,
+    /// TOC found without page numbers, or page-number accuracy was too low.
+    TocWithoutPageNumbers,
+    /// No TOC, or all TOC-based modes failed. LLM-driven structure extraction.
+    NoToc,
+}
+
+impl ProcessingMode {
+    /// Degrade to the next lower quality mode.
+    ///
+    /// Returns `None` if already at the lowest mode (`NoToc`).
+    pub fn degrade(self) -> Option<Self> {
+        match self {
+            Self::TocWithPageNumbers => Some(Self::TocWithoutPageNumbers),
+            Self::TocWithoutPageNumbers => Some(Self::NoToc),
+            Self::NoToc => None,
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -307,4 +334,17 @@ mod tests {
             "Title not found on page"
         );
     }
+
+    #[test]
+    fn test_processing_mode_degrade() {
+        assert_eq!(
+            ProcessingMode::TocWithPageNumbers.degrade(),
+            Some(ProcessingMode::TocWithoutPageNumbers)
+        );
+        assert_eq!(
+            ProcessingMode::TocWithoutPageNumbers.degrade(),
+            Some(ProcessingMode::NoToc)
+        );
+        assert_eq!(ProcessingMode::NoToc.degrade(), None);
+    }
 }

From 6ff5abd7986993493db93f9dc3427a7c8f74a2e3 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Mon, 13 Apr 2026 13:44:38 +0800
Subject: [PATCH 2/5] feat(toc): implement concurrent LLM verification for TOC
 entries

- Replace sequential LLM calls with concurrent processing using
  futures::join_all for better performance
- Add concurrent page assignment verification in PageAssigner
- Implement concurrent TOC entry verification in IndexVerifier
- Add concurrent index repair functionality in IndexRepairer
- Refactor methods to static versions for concurrent use
- Improve performance of oversized entry refinement in TocProcessor
---
 rust/src/index/parse/toc/assigner.rs  | 154 ++++++++++++++++----------
 rust/src/index/parse/toc/processor.rs | 104 +++++++++--------
 rust/src/index/parse/toc/repairer.rs  | 102 +++++++++++------
 rust/src/index/parse/toc/verifier.rs  |  95 +++++++++-------
 4 files changed, 280 insertions(+), 175 deletions(-)

diff --git a/rust/src/index/parse/toc/assigner.rs b/rust/src/index/parse/toc/assigner.rs
index fc97c420..eefa3769 100644
--- a/rust/src/index/parse/toc/assigner.rs
+++ b/rust/src/index/parse/toc/assigner.rs
@@ -4,6 +4,7 @@
 //! Page assigner - assigns physical page numbers to TOC entries.
 
 use std::collections::HashMap;
+use futures::future::join_all;
 use tracing::{debug, info};
 
 use crate::config::LlmConfig;
@@ -121,7 +122,7 @@ impl PageAssigner {
             .collect()
     }
 
-    /// Calculate page offset by verifying anchors.
+    /// Calculate page offset by verifying anchors concurrently.
     async fn calculate_offset(
         &self,
         anchors: Vec<&TocEntry>,
@@ -132,26 +133,41 @@ impl PageAssigner {
         }
 
         let anchor_count = anchors.len();
-        let mut verified_offsets: Vec<(i32, bool)> = Vec::new();
-
-        for anchor in anchors {
-            let toc_page = anchor.toc_page.unwrap();
-
-            // Find the physical page where this title appears
-            if let Some(physical) = self
-                .locate_title_in_range(anchor.title.as_str(), pages, toc_page)
-                .await?
-            {
-                let offset = physical as i32 - toc_page as i32;
-                verified_offsets.push((offset, true));
-                debug!(
-                    "Anchor '{}' found: toc={}, physical={}, offset={}",
-                    anchor.title, toc_page, physical, offset
-                );
-            } else {
-                verified_offsets.push((0, false));
-            }
-        }
+
+        // Verify all anchors concurrently
+        let client = self.client.clone();
+        let pages_owned = pages.to_vec();
+        let futures: Vec<_> = anchors
+            .into_iter()
+            .map(|anchor| {
+                let title = anchor.title.clone();
+                let toc_page = anchor.toc_page.unwrap();
+                let client = client.clone();
+                let pages = pages_owned.clone();
+
+                async move {
+                    let range_pages = Self::pages_around(&pages, toc_page, 3);
+                    if range_pages.is_empty() {
+                        return (0, false);
+                    }
+
+                    let content = Self::format_range_pages(&range_pages);
+                    match Self::locate_with_client(&client, &title, &content).await {
+                        Ok(Some(physical)) => {
+                            let offset = physical as i32 - toc_page as i32;
+                            debug!(
+                                "Anchor '{}' found: toc={}, physical={}, offset={}",
+                                title, toc_page, physical, offset
+                            );
+                            (offset, true)
+                        }
+                        _ => (0, false),
+                    }
+                }
+            })
+            .collect();
+
+        let verified_offsets = join_all(futures).await;
 
         // Calculate the mode (most common offset)
         let successful: Vec<_> = verified_offsets
@@ -164,7 +180,7 @@ impl PageAssigner {
             return Ok(PageOffset::new(0, 0, 0.0));
         }
 
-        let mode = self.calculate_mode(&successful);
+        let mode = Self::calculate_mode_static(&successful);
         let sample_count = successful.len();
         let confidence = sample_count as f32 / anchor_count as f32;
 
@@ -173,6 +189,11 @@ impl PageAssigner {
 
     /// Calculate mode of offset values.
     fn calculate_mode(&self, values: &[i32]) -> i32 {
+        Self::calculate_mode_static(values)
+    }
+
+    /// Static version for use in concurrent contexts.
+    fn calculate_mode_static(values: &[i32]) -> i32 {
         let mut counts: HashMap<i32, usize> = HashMap::new();
         for &v in values {
             *counts.entry(v).or_insert(0) += 1;
@@ -184,25 +205,18 @@ impl PageAssigner {
             .unwrap_or(0)
     }
 
-    /// Locate a title in a range of pages using LLM.
-    async fn locate_title_in_range(
-        &self,
-        title: &str,
-        pages: &[PdfPage],
-        near_page: usize,
-    ) -> Result<Option<usize>> {
-        // Search in a range around the expected page
-        let start = (near_page.saturating_sub(3)).max(1);
-        let end = (near_page + 3).min(pages.len());
-
-        let range_pages: Vec<_> = (start..=end).filter_map(|i| pages.get(i - 1)).collect();
-
-        if range_pages.is_empty() {
-            return Ok(None);
-        }
+    /// Collect pages around a center page number.
+    fn pages_around(pages: &[PdfPage], center: usize, range: usize) -> Vec<PdfPage> {
+        let start = center.saturating_sub(range).max(1);
+        let end = (center + range).min(pages.len());
+        (start..=end)
+            .filter_map(|i| pages.get(i - 1).cloned())
+            .collect()
+    }
 
-        // Use LLM to find the exact page
-        let content = range_pages
+    /// Format pages into tagged text for LLM.
+    fn format_range_pages(pages: &[PdfPage]) -> String {
+        pages
             .iter()
             .map(|p| {
                 format!(
@@ -213,8 +227,15 @@ impl PageAssigner {
                 )
             })
             .collect::<Vec<_>>()
-            .join("\n\n");
+            .join("\n\n")
+    }
 
+    /// Locate a title in pre-formatted content using LLM (static, for concurrent use).
+    async fn locate_with_client(
+        client: &LlmClient,
+        title: &str,
+        content: &str,
+    ) -> Result<Option<usize>> {
         let system = "You are a document analysis assistant. Find which page contains a specific section title.";
         let user = format!(
             r#"Find which page contains the section titled: "{}"
@@ -232,21 +253,37 @@ Reply in JSON format:
             page: Option<usize>,
         }
 
-        let result: LocateResult = self.client.complete_json(system, &user).await?;
+        let result: LocateResult = client.complete_json(system, &user).await?;
         Ok(result.page)
     }
 
-    /// Assign pages using LLM for each entry.
+    /// Assign pages using LLM for each entry (concurrently).
     async fn assign_with_llm(&self, entries: &mut [TocEntry], pages: &[PdfPage]) -> Result<()> {
         info!("Assigning pages using LLM positioning");
 
-        // Group pages for efficient processing
-        let page_groups = self.group_pages(pages, 5);
+        let client = self.client.clone();
+        let pages_owned = pages.to_vec();
 
-        for entry in entries.iter_mut() {
-            let physical = self
-                .locate_title_in_groups(entry.title.as_str(), &page_groups)
-                .await?;
+        // Launch all entry searches concurrently
+        let futures: Vec<_> = entries
+            .iter()
+            .map(|entry| {
+                let title = entry.title.clone();
+                let client = client.clone();
+                let pages = pages_owned.clone();
+
+                async move {
+                    let groups = Self::group_pages_owned(&pages, 5);
+                    Self::locate_title_in_groups_static(&client, &title, &groups).await
+                }
+            })
+            .collect();
+
+        let results = join_all(futures).await;
+
+        // Write results back
+        for (entry, result) in entries.iter_mut().zip(results.into_iter()) {
+            let physical = result?;
             entry.physical_page = physical;
             entry.confidence = if physical.is_some() { 0.8 } else { 0.3 };
         }
@@ -254,19 +291,22 @@ Reply in JSON format:
         Ok(())
     }
 
-    /// Group pages for batch processing.
-    fn group_pages<'a>(&self, pages: &'a [PdfPage], group_size: usize) -> Vec<Vec<&'a PdfPage>> {
+    /// Group owned pages for batch processing.
+    fn group_pages_owned(pages: &[PdfPage], group_size: usize) -> Vec<Vec<PdfPage>> {
         pages
             .chunks(group_size)
-            .map(|chunk| chunk.iter().collect())
+            .map(|chunk| chunk.to_vec())
             .collect()
     }
 
-    /// Locate a title across page groups.
-    async fn locate_title_in_groups(
-        &self,
+    /// Locate a title across page groups (static, for concurrent use).
+    ///
+    /// Searches groups sequentially (early return on first match),
+    /// but multiple title searches can run concurrently.
+    async fn locate_title_in_groups_static(
+        client: &LlmClient,
         title: &str,
-        groups: &[Vec<&PdfPage>],
+        groups: &[Vec<PdfPage>],
     ) -> Result<Option<usize>> {
         let system = "You are a document analysis assistant. Find which page contains a specific section title.";
 
@@ -301,7 +341,7 @@ Reply in JSON format:
                 page: Option<usize>,
             }
 
-            let result: SearchResult = self.client.complete_json(system, &user).await?;
+            let result: SearchResult = client.complete_json(system, &user).await?;
 
             if result.found {
                 return Ok(result.page);
diff --git a/rust/src/index/parse/toc/processor.rs b/rust/src/index/parse/toc/processor.rs
index 978ba4e3..b2dbc1cd 100644
--- a/rust/src/index/parse/toc/processor.rs
+++ b/rust/src/index/parse/toc/processor.rs
@@ -7,6 +7,7 @@
 //! degradation: if one mode fails verification, it falls back to a lower-quality
 //! but more reliable mode.
 
+use futures::future::join_all;
 use tracing::{debug, info, warn};
 
 use crate::error::Result;
@@ -393,30 +394,24 @@ impl TocProcessor {
 
         let page_count = pages.len();
 
-        // Pre-compute next-entry page numbers before consuming entries
+        // Pre-compute next-entry page numbers and classify entries
         let next_pages: Vec<Option<usize>> = entries
             .iter()
             .enumerate()
-            .map(|(i, _)| {
-                entries.get(i + 1).and_then(|e| e.physical_page)
-            })
+            .map(|(i, _)| entries.get(i + 1).and_then(|e| e.physical_page))
             .collect();
 
-        let mut refined = Vec::with_capacity(entries.len());
-
-        for (i, entry) in entries.into_iter().enumerate() {
-            let span = entry_page_span(&entry, next_pages[i], page_count);
-            let tokens = entry_token_count(&entry, pages);
-
-            if span > self.config.max_pages_per_entry
-                && tokens > self.config.max_tokens_per_entry
-            {
-                debug!(
-                    "Refining oversized entry '{}' ({} pages, ~{} tokens)",
-                    entry.title, span, tokens
-                );
-
-                // Extract sub-pages covered by this entry
+        // Identify oversized entries and launch extractions concurrently
+        let oversized_futures: Vec<_> = entries
+            .iter()
+            .enumerate()
+            .filter(|(i, entry)| {
+                let span = entry_page_span(entry, next_pages[*i], page_count);
+                let tokens = entry_token_count(entry, pages);
+                span > self.config.max_pages_per_entry
+                    && tokens > self.config.max_tokens_per_entry
+            })
+            .map(|(i, entry)| {
                 let start = entry.physical_page.unwrap_or(1);
                 let end = next_pages[i].unwrap_or(page_count);
                 let sub_pages: Vec<PdfPage> = pages
@@ -425,20 +420,24 @@ impl TocProcessor {
                     .cloned()
                     .collect();
 
-                if sub_pages.is_empty() {
-                    refined.push(entry);
-                } else {
-                    // Run structure extraction on the sub-pages
+                let entry_title = entry.title.clone();
+                let entry_level = entry.level;
+
+                async move {
+                    if sub_pages.is_empty() {
+                        return (i, Vec::new());
+                    }
+                    debug!(
+                        "Refining oversized entry '{}' (pages {}-{})",
+                        entry_title, start, end
+                    );
                     let extractor =
                         StructureExtractor::new(StructureExtractorConfig::default());
                     match extractor.extract(&sub_pages).await {
-                        Ok(sub_entries) if !sub_entries.is_empty() => {
-                            // If the first sub-entry has the same title as the
-                            // parent, skip it — the parent already represents
-                            // that content's starting point.
+                        Ok(sub_entries) => {
                             let skip = if sub_entries
                                 .first()
-                                .map(|e| e.title.trim() == entry.title.trim())
+                                .map(|e| e.title.trim() == entry_title.trim())
                                 .unwrap_or(false)
                             {
                                 1
@@ -446,37 +445,52 @@ impl TocProcessor {
                                 0
                             };
 
-                            for sub in &sub_entries[skip..] {
-                                let level_offset = entry.level;
-                                refined.push(
-                                    TocEntry::new(&sub.title, sub.level + level_offset)
+                            let refined: Vec<TocEntry> = sub_entries[skip..]
+                                .iter()
+                                .map(|sub| {
+                                    TocEntry::new(&sub.title, sub.level + entry_level)
                                         .with_physical_page(sub.physical_page.unwrap_or(start))
-                                        .with_confidence(sub.confidence * 0.9),
-                                );
-                            }
+                                        .with_confidence(sub.confidence * 0.9)
+                                })
+                                .collect();
 
                             info!(
                                 "Refined '{}' into {} sub-entries",
-                                entry.title,
-                                sub_entries.len() - skip
+                                entry_title,
+                                refined.len()
                             );
-                        }
-                        Ok(_) => {
-                            debug!("Sub-extraction produced no entries, keeping original");
-                            refined.push(entry);
+                            (i, refined)
                         }
                         Err(e) => {
-                            warn!("Sub-extraction failed for '{}': {}", entry.title, e);
-                            refined.push(entry);
+                            warn!("Sub-extraction failed for '{}': {}", entry_title, e);
+                            (i, Vec::new())
                         }
                     }
                 }
+            })
+            .collect();
+
+        let extraction_results = join_all(oversized_futures).await;
+
+        // Build a lookup from index → refined sub-entries
+        let mut refined_map = std::collections::HashMap::new();
+        for (idx, sub_entries) in extraction_results {
+            if !sub_entries.is_empty() {
+                refined_map.insert(idx, sub_entries);
+            }
+        }
+
+        // Assemble final output
+        let mut result = Vec::with_capacity(entries.len() * 2);
+        for (i, entry) in entries.into_iter().enumerate() {
+            if let Some(sub_entries) = refined_map.remove(&i) {
+                result.extend(sub_entries);
             } else {
-                refined.push(entry);
+                result.push(entry);
             }
         }
 
-        Ok(refined)
+        Ok(result)
     }
 }
 
diff --git a/rust/src/index/parse/toc/repairer.rs b/rust/src/index/parse/toc/repairer.rs
index 4062f215..70498782 100644
--- a/rust/src/index/parse/toc/repairer.rs
+++ b/rust/src/index/parse/toc/repairer.rs
@@ -3,6 +3,7 @@
 
 //! Index repairer - fixes incorrect TOC entry page assignments.
 
+use futures::future::join_all;
 use tracing::{debug, info};
 
 use crate::config::LlmConfig;
@@ -54,7 +55,7 @@ impl IndexRepairer {
         Self::new(RepairerConfig::default())
     }
 
-    /// Repair incorrect entries.
+    /// Repair incorrect entries concurrently.
     pub async fn repair(
         &self,
         entries: &mut [TocEntry],
@@ -66,38 +67,67 @@ impl IndexRepairer {
         }
 
         info!("Repairing {} incorrect entries", errors.len());
-        let mut repaired_count = 0;
-
-        for error in errors {
-            if error.index >= entries.len() {
-                continue;
-            }
 
-            let entry = &mut entries[error.index];
-            let expected_page = error.expected_page;
-
-            // Search around the expected page
-            let start = expected_page
-                .saturating_sub(self.config.search_range)
-                .max(1);
-            let end = (expected_page + self.config.search_range).min(pages.len());
-
-            if let Some(correct_page) = self
-                .find_correct_page(&entry.title, pages, start..=end)
-                .await?
-            {
-                debug!(
-                    "Repaired '{}' : page {} → {}",
-                    entry.title, expected_page, correct_page
-                );
-                entry.physical_page = Some(correct_page);
-                entry.confidence = 0.9;
-                repaired_count += 1;
-            } else {
-                debug!(
-                    "Could not repair '{}' (searched pages {}-{})",
-                    entry.title, start, end
-                );
+        // Collect repair tasks (don't borrow entries mutably yet)
+        let client = self.client.clone();
+        let pages_owned = pages.to_vec();
+        let search_range = self.config.search_range;
+
+        let tasks: Vec<_> = errors
+            .iter()
+            .filter(|error| error.index < entries.len())
+            .map(|error| {
+                let title = entries[error.index].title.clone();
+                let expected_page = error.expected_page;
+                let client = client.clone();
+                let pages = pages_owned.clone();
+
+                async move {
+                    let start = expected_page.saturating_sub(search_range).max(1);
+                    let end = (expected_page + search_range).min(pages.len());
+
+                    let result = Self::find_correct_page_static(
+                        &client,
+                        &title,
+                        &pages,
+                        start..=end,
+                    )
+                    .await;
+
+                    (title, expected_page, result)
+                }
+            })
+            .collect();
+
+        let results = join_all(tasks).await;
+
+        // Apply repairs
+        let mut repaired_count = 0;
+        for (title, expected_page, result) in results {
+            match result {
+                Ok(Some(correct_page)) => {
+                    // Find the corresponding error entry and fix it
+                    if let Some(error) = errors.iter().find(|e| e.title == title) {
+                        if error.index < entries.len() {
+                            debug!(
+                                "Repaired '{}' : page {} → {}",
+                                title, expected_page, correct_page
+                            );
+                            entries[error.index].physical_page = Some(correct_page);
+                            entries[error.index].confidence = 0.9;
+                            repaired_count += 1;
+                        }
+                    }
+                }
+                Ok(None) => {
+                    debug!(
+                        "Could not repair '{}' (searched around page {})",
+                        title, expected_page
+                    );
+                }
+                Err(e) => {
+                    debug!("Repair failed for '{}': {}", title, e);
+                }
             }
         }
 
@@ -105,9 +135,9 @@ impl IndexRepairer {
         Ok(repaired_count)
     }
 
-    /// Find the correct page for a title within a range.
-    async fn find_correct_page(
-        &self,
+    /// Find the correct page for a title within a range (static, for concurrent use).
+    async fn find_correct_page_static(
+        client: &LlmClient,
         title: &str,
         pages: &[PdfPage],
         range: std::ops::RangeInclusive<usize>,
@@ -152,7 +182,7 @@ Reply in JSON format:
             page: Option<usize>,
         }
 
-        let result: FindResult = self.client.complete_json(system, &user).await?;
+        let result: FindResult = client.complete_json(system, &user).await?;
 
         if result.found {
             Ok(result.page)
diff --git a/rust/src/index/parse/toc/verifier.rs b/rust/src/index/parse/toc/verifier.rs
index d0c3883e..42186a09 100644
--- a/rust/src/index/parse/toc/verifier.rs
+++ b/rust/src/index/parse/toc/verifier.rs
@@ -3,6 +3,7 @@
 
 //! Index verifier - verifies TOC entry page assignments.
 
+use futures::future::join_all;
 use rand::seq::SliceRandom;
 use tracing::{debug, info};
 
@@ -55,6 +56,8 @@ impl IndexVerifier {
     }
 
     /// Verify TOC entries against PDF pages.
+    ///
+    /// All sample entries are verified concurrently via LLM calls.
     pub async fn verify(
         &self,
         entries: &[TocEntry],
@@ -64,38 +67,58 @@ impl IndexVerifier {
             return Ok(VerificationReport::all_correct(0));
         }
 
-        // Select sample
         let sample = self.select_sample(entries);
 
-        // Verify each sample entry
+        // Launch all verification checks concurrently
+        let client = self.client.clone();
+        let futures: Vec<_> = sample
+            .iter()
+            .map(|(index, entry)| {
+                let index = *index;
+                let title = entry.title.clone();
+                let physical_page = entry.physical_page;
+                let client = client.clone();
+                let pages = pages.to_vec();
+
+                async move {
+                    match physical_page {
+                        Some(page) => {
+                            let result =
+                                Self::verify_entry_with_client(&client, &title, page, &pages).await;
+                            (index, title, page, result)
+                        }
+                        None => (
+                            index,
+                            title,
+                            0,
+                            Ok(Err(ErrorType::PageOutOfRange)),
+                        ),
+                    }
+                }
+            })
+            .collect();
+
+        let results = join_all(futures).await;
+
+        // Aggregate results
+        let total = results.len();
         let mut errors = Vec::new();
         let mut correct = 0;
 
-        for (index, entry) in &sample {
-            if let Some(physical_page) = entry.physical_page {
-                match self.verify_entry(entry, physical_page, pages).await? {
-                    Ok(()) => correct += 1,
-                    Err(error_type) => {
-                        errors.push(VerificationError::new(
-                            *index,
-                            entry.title.clone(),
-                            physical_page,
-                            error_type,
-                        ));
-                    }
+        for (index, title, page, result) in results {
+            match result {
+                Ok(Ok(())) => correct += 1,
+                Ok(Err(error_type)) => {
+                    errors.push(VerificationError::new(index, title, page, error_type));
+                }
+                Err(e) => {
+                    debug!("Verification LLM call failed: {}", e);
+                    errors.push(VerificationError::new(index, title, page, ErrorType::TitleNotFound));
                 }
-            } else {
-                // No physical page assigned
-                errors.push(VerificationError::new(
-                    *index,
-                    entry.title.clone(),
-                    0,
-                    ErrorType::PageOutOfRange,
-                ));
             }
         }
 
-        let report = VerificationReport::new(sample.len(), correct, errors);
+        let report = VerificationReport::new(total, correct, errors);
         info!(
             "Verification complete: {}/{} correct ({:.1}% accuracy)",
             report.correct,
@@ -126,28 +149,23 @@ impl IndexVerifier {
         }
     }
 
-    /// Verify a single entry.
-    async fn verify_entry(
-        &self,
-        entry: &TocEntry,
+    /// Verify a single entry using a cloned client (for concurrent use).
+    async fn verify_entry_with_client(
+        client: &LlmClient,
+        title: &str,
         physical_page: usize,
         pages: &[PdfPage],
     ) -> Result<std::result::Result<(), ErrorType>> {
-        // Check page bounds
         if physical_page == 0 || physical_page > pages.len() {
             return Ok(Err(ErrorType::PageOutOfRange));
         }
 
         let page = &pages[physical_page - 1];
 
-        // Use LLM to check if title appears on this page
-        let found = self.check_title_on_page(&entry.title, &page.text).await?;
+        let found = Self::check_title_on_page_with_client(client, title, &page.text).await?;
 
         if !found {
-            debug!(
-                "Title '{}' not found on page {}",
-                entry.title, physical_page
-            );
+            debug!("Title '{}' not found on page {}", title, physical_page);
             return Ok(Err(ErrorType::TitleNotFound));
         }
 
@@ -155,10 +173,13 @@ impl IndexVerifier {
     }
 
     /// Check if a title appears on a page using LLM.
-    async fn check_title_on_page(&self, title: &str, page_text: &str) -> Result<bool> {
+    async fn check_title_on_page_with_client(
+        client: &LlmClient,
+        title: &str,
+        page_text: &str,
+    ) -> Result<bool> {
         let system = "You are a document analysis assistant. Determine if a section title appears in the given text.";
 
-        // Truncate page text if too long
         let text = if page_text.len() > 1000 {
             &page_text[..1000]
         } else {
@@ -181,7 +202,7 @@ Reply in JSON format:
             found: bool,
         }
 
-        let result: CheckResult = self.client.complete_json(system, &user).await?;
+        let result: CheckResult = client.complete_json(system, &user).await?;
         Ok(result.found)
     }
 

From e8c10b0b9d41d41bd666a867359de6fb7f97db26 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Mon, 13 Apr 2026 14:09:00 +0800
Subject: [PATCH 3/5] feat: add PDF indexing example with comprehensive
 documentation

- Create index_pdf.rs example demonstrating PDF indexing capabilities
- Implement automatic PDF format detection and hierarchical document parsing
- Add support for environment variable configuration for LLM settings
- Include detailed usage instructions with command-line examples
- Integrate error handling and process exit codes for invalid inputs
- Provide comprehensive metrics output including timing and processing stats
- Add automatic workspace cleanup after indexing operations
---
 rust/examples/index_pdf.rs     | 100 +++++++++++++++++++++++++++++++++
 samples/Docker_Cheat_Sheet.pdf | Bin 0 -> 25326 bytes
 2 files changed, 100 insertions(+)
 create mode 100644 rust/examples/index_pdf.rs
 create mode 100755 samples/Docker_Cheat_Sheet.pdf

diff --git a/rust/examples/index_pdf.rs b/rust/examples/index_pdf.rs
new file mode 100644
index 00000000..244ca6a2
--- /dev/null
+++ b/rust/examples/index_pdf.rs
@@ -0,0 +1,100 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! PDF indexing example — index a PDF document via the vectorless engine.
+//!
+//! ```bash
+//! # Using environment variables for LLM config:
+//! LLM_API_KEY=sk-xxx LLM_MODEL=google/gemini-3-flash-preview \
+//!   cargo run --example index_pdf -- ../samples/Docker_Cheat_Sheet.pdf
+//!
+//! # Or with defaults (edit the code to set your key/endpoint):
+//! cargo run --example index_pdf -- ../samples/Docker_Cheat_Sheet.pdf
+//! ```
+
+use std::path::Path;
+
+use vectorless::{EngineBuilder, IndexContext};
+
+#[tokio::main]
+async fn main() -> vectorless::Result<()> {
+    let args: Vec<String> = std::env::args().collect();
+
+    let pdf_path = args.get(1).map(|s| s.as_str()).unwrap_or_else(|| {
+        eprintln!("Usage: cargo run --example index_pdf -- <path-to-pdf>");
+        std::process::exit(1);
+    });
+
+    if !Path::new(pdf_path).exists() {
+        eprintln!("Error: file not found: {}", pdf_path);
+        std::process::exit(1);
+    }
+
+    println!("=== Indexing PDF: {} ===\n", pdf_path);
+
+    // Build engine with LLM configuration from environment or defaults.
+    // Adjust the defaults below to match your setup.
+    let api_key = std::env::var("LLM_API_KEY")
+        .unwrap_or_else(|_| "sk-or-v1-...".to_string());
+    let model = std::env::var("LLM_MODEL")
+        .unwrap_or_else(|_| "google/gemini-3-flash-preview".to_string());
+    let endpoint = std::env::var("LLM_ENDPOINT")
+        .unwrap_or_else(|_| "http://localhost:4000/api/v1".to_string());
+
+    let engine = EngineBuilder::new()
+        .with_workspace("./workspace_pdf_example")
+        .with_key(&api_key)
+        .with_model(&model)
+        .with_endpoint(&endpoint)
+        .build()
+        .await
+        .map_err(|e| vectorless::Error::Config(e.to_string()))?;
+
+    // Index the PDF — format is auto-detected from the .pdf extension.
+    // The engine will:
+    //   1. Extract text from every page
+    //   2. Detect and parse the Table of Contents
+    //   3. Build a hierarchical document tree
+    //   4. Generate summaries for each node (LLM)
+    //   5. Build a reasoning index for retrieval
+    let result = engine
+        .index(IndexContext::from_path(pdf_path))
+        .await?;
+
+    println!(
+        "Indexed: {}, Failed: {}",
+        result.items.len(),
+        result.failed.len()
+    );
+
+    for item in &result.items {
+        println!("\n--- {} ---", item.name);
+        println!("doc_id:  {}", item.doc_id);
+        println!("format:  {:?}", item.format);
+
+        if let Some(metrics) = &item.metrics {
+            println!("\nMetrics:");
+            println!("  total time:    {}ms", metrics.total_time_ms());
+            println!("  parse:         {}ms", metrics.parse_time_ms);
+            println!("  build:         {}ms", metrics.build_time_ms);
+            println!("  enhance:       {}ms", metrics.enhance_time_ms);
+            println!("  nodes:         {}", metrics.nodes_processed);
+            println!("  summaries:     {}", metrics.summaries_generated);
+            println!("  llm calls:     {}", metrics.llm_calls);
+            println!("  tokens:        {}", metrics.total_tokens_generated);
+            println!("  topics:        {}", metrics.topics_indexed);
+            println!("  keywords:      {}", metrics.keywords_indexed);
+        }
+    }
+
+    for fail in &result.failed {
+        eprintln!("FAILED: {} — {}", fail.source, fail.error);
+    }
+
+    // Cleanup workspace
+    for doc in engine.list().await? {
+        engine.remove(&doc.id).await?;
+    }
+
+    Ok(())
+}
diff --git a/samples/Docker_Cheat_Sheet.pdf b/samples/Docker_Cheat_Sheet.pdf
new file mode 100755
index 0000000000000000000000000000000000000000..0768f1c3eb59a04f87de07c384f32795f187f210
GIT binary patch
literal 25326
zcmaHx19)Uzv#2N5#GKf+ZQHiZj-5=LOst7Bv2EM7HL>m7%=i7@fBtjMy|;Jw-d(k7
z)vC2>_kOybcag{oi_$RAvOts6pH;j-Gvm|a+ZkFyb93X<Nt@W30nG7PK2?g)`1tsA
zq88Qw6UWc1wE@6H*u==r*aVuF7upHnXkuUk?Ur7zwQje|isZYfCy?DW?-SZ17e*bx
zUAGEyUIFIOzNH)9D`-U(W@|EPHoUyw*|{C_TqQ$=rO^uHLQ*mQv^QpFiXlq=x<>yP
z5th)eEG3Fuvm*QvVO};PTyW{@U`Wmc#cf{WOW630M9x`QB3Gp>4<BnSpICHysp$4_
z*>2Pz3{EdIy5j@Vj@fh!sSaM#DTkVd7TGPnN)*<MHb}*bT7puAI^UtP7^zwobh`RD
zGZh>hRHGgSQNd!2rFQcUPhy50bzPPsCJx%-aZp8UlP&@DG2mjTNN1Zn3haUF30=-?
zzQa>?<bK`p=z#$7+Lw4AI>CwX!Kg3v4JhjM5usZR_T6FOM6Y?FI<Gh(k{pdHHh>0?
zb^2@<wjJm!K4kDPx<AD7&V6{Ea5FwD^QKH=BNXgu(J7N{^n?4A=lN+njs|54h{Xb#
zsltzfLVZlSQ68z-EnR$_cBro}SOORq6{dIPA(UPoWT@eommo+OCgBYlVOG>tYecz^
zIu=Lf1Gk$rv-}>J#D84*VWbN!ceB#_4pi+-aL=7`H7LnqZ43rb_Qk3|DG_@$%)+<L
zkM^|pdv+30!Or*6gfyqs*M>?}cb#~>a)lZj(+~F%wx@>1@q8tVBZmXXVzSkCM|sN;
z0B{Alf-ltNc4%$pqw5yfJbGLr$+A{Y$l1cXl6BV5%5x;bf|;sI`3kE>_4t*M^XN+K
zp<XvMHgc6f?i>fzkIcGTmpkp;z{D=~FmX{4*Sx2)Tt|D-QfFLzZ(_=7W!sum;(fv1
z2Wi3Cn7&TBd{8W80Ao#!Bd1ef8FIh!jHP<&mv;Y@jpiYp>MJ;(k6xX4p7lwR2GK#q
zVxPh#VxbVdRTZxhHQ4S)_1N6hA{2ojmH<iMHP8gdz%k|`>`IKeY3&h2U>yaC8{OY1
zTDKGX9AsIH+?ZKe_fn>4^9HS%CD@P`3(ACD<0QY!Ix}qRZuD@!BFNMeElWtJkk;p6
zd`C6<rMNh8=dx$}8!}hw{2)#h8X#=}tnDTCuE0+{HU&nHu0vGO*G1*bUkT#C_p1<S
zUR4|2>0(om1iSAb5W5UE?-}q#>T7LLX&}i!0bth5?St2k<cuugiv_}+GZn)2cZKyN
z=u7K30P#9i0McaQ$CocyAakvB;^PSWbuZ2rnL8yqM@^KilK}gXvn9iLBNlvij`=YW
zK%Cz)QY<Ax_pf+jm4m}QJDau&JKsrH6&-!<qWelUcVaiubaK*i32aUS&F|XR$+@=T
z_i?Bu_r+rp8s#-{`s8kUy0}OHjPLnC0U(Rqq>l9oaG~_51N!a_dX6pXJVs64W%ok2
z&KXU(A@tv}xxaFAtbAM2r@`Xgo5%-_mM+r*SH4`0%Z~sV$O6I?f+LkW&xUe)U<c+>
zH?aBoQ$rU*q6o@x5)WRa8T?X!lV7+4WxRH>r~NJ#>y^<-uj5xj%}dBf=Bc`@YCHs$
zax}=i90Ypl;+|=!GQ9%#dVDL-`^Wum3%?pT2AIGO?yPH_8?oz~svg~pb;BiOD5-$*
z>uB7O$Q`JDVR6^T{XNU3vW#pxIqNB`&_*lEDFVF8u6>m-+Bt%rGxLFxKM1EXC}Itl
z&mEJ;W~^F(q;6?%CSV$f#Ylz8*r~*ln>yDE`)do6CZFq%kgKEPXM>?oAyH<?NCd*b
zH9C%_kWm3AFbepD4Zj{~8rJ@&;xrjI8ysaUoCMs_J(=^+ids@4R)J>Q{=^=vG$dyI
zfh*;<a+>-KnsHOO8E8+bXW)0d6&$2KgOfFSy{uGUM-Np-k@-=F>_}nxgVwihR4~28
z=GuaKp6U+g#fsbZR+oIMKA@3=+>3P$d5av|%{C^Ap}25?8@TXrb;pgfVRDeUQfILS
z9gMn*mdVF=orm)mhqKJHQnD4+OOJsTxXgn!XOMPfvT%&LOOJK!^KA+Qxx6j?@PJBE
z=rI?#xQ}*aONGP`Sj+-aUxH4R7t>%;v;$?o=#dmDQWbW0jqk69VV$#35TGbg`68ja
zJlJBO8CB^rtTyPWd);NX@luQ$M!V-yg<Hjj0Z!^XHCN2s2u{DRJM(=HlFZ2aVv|8-
zMJQA0yOYL3+QvLn3rAp1HP)edGIhYO&uO5S#%N1LACLqd`u7<1fv&&qrxY`$f!=Zc
zFoC$mZ`9Wn;Af~@qY(2&W=v7h!V=QGDoobbuFT%B!CbWV43;(`IKp1n9E`*9^rz@^
zYxs6}z`d?roG`~h0z<l{R~mT2bH`q4&+uh5tbrlI?{=gjeRn|LJP7HK$}?7*Mt2CO
z#*_-D%g_u2MEk;Ck~w;XS*&xPbBXV<c%?nAw&oaKP0e{UuqRx3i1JIbBkVN~VO1sT
zVE;p^@dA1ll{gy~^>NL-Q*}EjzExOvR^e_r)ZVYN&;|&7;hT16(^;SJ!QhYqRVK?#
z_t$|e@0f69twmcW9Stv+8Bt^wTlzD5jtwLWmJKVU`pww~u%%cL)A^=tH+pw-4{!s*
zS{rvz!lD|S>~datI4v`+Mfu5w(qdUlVpF<_0i}`QrX5qWw5EHGg3I31S$N&7BrSWr
zM@Bg1B*gQrq!5srR6^Pw(_VFR!GrBz85h~ZG?sTid7MLj@ypNk9R!u*paxn=1H{SO
z%Q8}NLo$;Xh`o?NI~--RzxWdm>&tGTKXdr&9eTWj*PzHfa|rZEQ!BDypwNS-6*A*U
zLt|_$r4;9_CN5uhtJ2e~7ik?0f~_mE(I2X|-21jVbZ&);z0GACRcB$=Db_hblI?~m
z1}g>1$|V<qv%)KsZ!;4!2OUEh4!iT*RvAaq-lHasW{s@fC6<&pJndNgIL;vJirR9Z
zOn`Kc4oQNIYlsgl{HYvm<_{2D{*jRXBTAK!*^bxtXnv1dVSJ{8pS5cSX$jI9t&_;N
zUR&8j!@C)5g#`CT7hZkqYe;0gvw>n=50(yfp4ZJI&3r60X74n5w?dp#NKewR`TKaq
z{gZPF19h?zYU~JTIRjip2JS%ba=nLmhHG$ZYEh{3`BBncd1W)b!i1ZsBBX~tEG{Oi
z-toCN*X9&<dqUy11fMQg57U*m(pd)?oydFtvEe}l*AkYdd``wjY{`#G8DXW8T7qar
z_0}unwLr`^yvsA@#9S;V_a}@XW@nFB%R|BV=mb`bZTCa*ES<EW3hQRrL(Frp<fye&
zp(o&>j~(_>m%9_E1%xrlWhq(JbBh<ye4n#5%f!a(Q>?M<*5VZ=q*;){U3*`h@1^Lt
zMY<jNyQwJaSw}D?3tIt6gU|M_lAA=@ok)BE!s47-E==G~(0-!|I4zgU#uJ&FlvPY4
zSKL+hbdLvo$vr=G;iEu!SYzy1C%P9{)teFc+T#Iz;a$YN8;6(&Qc2W)bl_gGX@n@d
zd}t@0jxWVa8-%)JUuY)^DpBOVz&s@Dke(W;z8zNybP(u;JHr(pRXg@JmQmnm%BUs|
ztbKanh9A-zsio*fgW2j~UbE&^>1ipt(nKqET^^Ya3gPtKwCN-|2A+lT{sqf};PV#R
zr!NMV-}okkyD%L<O;FImaT!1)=nz`>Ei&^}kpn~SDgV8vMf_^OXRdOIGV;26DFTZ8
zG;g&?t7h<vUD``Vkffp3iYU&LOFo`gJ=NJ(ska?nZev<QV8xHilFrieW~?>c2c$=h
zpXisuG}HE9Sf}QPK&1y~bWQ6~h!eb4rp}pl9#I1=YkQe=C#P6U!B4*)VC?7;a=^CQ
z$3MM)aS3jn2yeBG8|sJc>&B@9sd_lUa!!%Pr)dRM>Yca;ipg$AYrVg@Ogx+3-OAOJ
zGiPu=stcpQ!ULj1o{reo2Wr+Z5}3!^h<*pb(_{Mj@*5jnELLPdqpYX4_CC`(<fD(G
z)eJMegdCg}&amnV21KlG6|26Mf-p!Qn)<ulSFfQH@Q~p7lT^XISiw5A(Dr-RAVENY
zse8ypSt;FWb3*qwPTJjMsh|yx=qC1((gI04-<*+^fGAanLBPS|Jdzl<fL`9dW0<|b
zz3T%;uyTDJW?_!N6j*G?UInTq{Vm&1P=e=_sX~Tq8ibQx(+697mOXKO)9l?&ML?hA
ztXQD0=m$g;G4mL-iLLR!YJ|^kf2xZ=CB>iXXHmgK&%yS`;E%}k@8aXHazn%oAf^QP
zEH&`Cx&J6WUvviap8`IekN`ddKAn-l=lQb$`E2<6Oebn*3-}LfQyO|2dVJP@3Yb3*
zOlcTCRc!xL#emQLkLuIa=lfqFG5$Lwd^%-!dlP&*c>^;Od=h+yze1EZa5VWGk>Q`d
zbTTH!76yWLZunZCDtdel7J6C^7Ir2!W_$)FHU?UHc4h{8R(zemItzUU``OzG|4-5N
zcZDV9Xy<JI9~1a{$iKAztbo#IAdUvMPWFF>Hgf+*ErAbkbT;`%68f|fHgT~qGEo#0
z{G<HW`Y4(>**QBJnK<G9S!>1rm?+1m{67jmhCeg-e^-8gR`c%?kpZ8Bk&X4gm4d(B
z-QAQ$R_s*T3uRriHn`gJW{;w#ptRyhW4>33j}pe<3yXY}!smxf6GuV72>tG$4erbz
z+waF8BOu&|fg!>V&dgaB5O+wP-AhKh%N)5I6>6jCb-@8O<mI~W^N{tjcF_EwTyi|;
za$N5E(mqd+fRGzw2~5mva-TO+i)V)|9D0I4V11eZaS&wdUfY@(v!`qwT~d_J>WQ6p
zheJS-*kpN7=ZUU@?QOauafh?WUMQELY#a8~{RaZx1eyD6b?<eFElGozLF?}A@(r4;
zhRt&5S<sTM<3&0}JmeW0%If%1MJA7hLB2Mv8B!c4{A$h!M1d-8YTa)uYd)#abCDkL
zAax>@%X0DZAE59EVFkOwsRf;~cdw1ogv+o|SLb~T7<Q3j@uvo!#Mba?;&~>tkD47Q
zR)D0^O+I|r0hY&dkfs_xHbkDQn6i`zY({Po_~yH@a)eBaQ9@e~;#JKZ8V64x-gOC=
zHV)i!x_d=U$ul0uYe&z5!fAW)>n0ppve{upvyo?-=nuf1;t4ze7DvQRkN#J#;een*
zh(&?&rsGFrmhU~ROd7LYNz?O=)Xre>4QVkTLHi}ZOgthC@n|TLWVjwq(Ta0lJ=q#V
z9|sOZ+k2&`%Sgxi$11Iu-lW{|1x_5OxG2Q(Z$M)#Fo9uXY?W$FFHA`Z+L&IAF)!?F
zw_)d_wioF}o{;|4Xnj3hX)2O~a32FsPJ0ou5?vyha=yrjh-MoxUy`8gl<2Ty&sx#k
zO1|bK!%GXw`WGgLdx`3L<~?mPab~@SH+zm2vs}C4=;E^BT#Ln{kwz_ZO)LWrTME~u
zl2|KUFNg7>`f7#BH6mUFu0!*mI@dLGa_OUC9nY^fmHPm6vp|8Laylv3fi}!JwpSuG
z$o03wDN2*yx@@ll1ooBV?&hSeMUCZTjRJL06*eD5<OG;i_rWUne(N?z+hTm3jS5n1
z7G_gmgDAMlzs$bPZc~3?=O*MC$eOYVnD>fm(|sV#Yf4RfSb#Saa?9~yrka;CGO?!A
z>!Mr^^a^`txM%kj{P?Ohx_F|KU&l;gU1$KpxP-eQL3Ie;(tO8_MIahVs6fP}rTgnd
zOXLL2L)5roR9&EhGc5XRWKh`2<ea_W<`7%z;oxu@_Mr5MqBm`UYw9q*X~VrlKM~6y
z*6EJ`R#LsWCa-4v7P8gN%}U2*RJbW$Q~OYR<srD5A5=6i<5-z(AByve)ynB;l{17Z
z{>K)4)s6zow|cAU(cvMunC#+xzt$~i@8luQXf||J6j(kwuwH}NXDr2V>{n^PHN>`J
zt~5p&dTlcZ)513PtLjnaNuns-;}0uwj#&x_G6Tyxm%OUMg8Z1pEYhH}--2`+@l$Q#
z`)Z|XlPl3!>I^T>DaDdyW6oUK?_U<simflamHbOzRPmz+ZZ6~>p8FJ~SwS2Em-n47
zfLCdI!PS%F5-bQE7kwAn!B<<Ga#{{Dtv7{LJ_y+?(#2*)_8mEuRj;m?I!ln`Lbn0%
z8G3i~V;Y4A9eB~5d=Ix}TsNN6nw+Tl>Z??G*m$hs+Hd<-4$f0mSkDLiZ#OY(>NOgK
z${Z)`XPl;<-GZs9U#ru`RnshUImR7iK+@<eafw<A$F$DosR`4qE>bhEV3bZVm70Xg
zm{zrgYAH)qI@Ye9z3uQ!xHQEkb0*7M=tyXG6Av``Vl&ED$&z(R&wK<u(0}K~K)UlT
z^y4GjO^Nf~rpK{q$gT>J1q>>&<>$oy6#oekHx6D~t%oo+!FwdGCFI0SHn_1rK+D>Q
zf6~$7_1jSZ>%_3Pmgza@Iq1b~4Y!3y8<P@t6TOA*RsJx?-ee#?hpcxpBon?9$%Euo
z^{~8(iVOi3?rUFwD_D4-60k|P81Q_zihp7^jsJM}Gr!K1sz<!uu^5q?TY`AV?OG;I
zFOTCYvoepbxS<y11)l5R+;2dzIu~HFMkh<2>dbzBdUM?S;=Flt?AP*~bR`@eHsEEp
zmWywp#ac_`=!)jzT&CUASQV|l@cE-+cp#*uCDD#p!q|nnzvoW1sD5_+Su^)A3fwd%
zba}rj(y`>(5=TYZ42z|rXrcn`hr+JBHx2QEmGowCQU@$7TofDZ)qaOVqJjdZ)VbU<
z8G|vmx{azQ168o0+BYf?AqoYP@ZqELKsq;8&!6of8F$$a5vYxkn^dx_9jbbhY~hWS
zh=|`}eWThI#3vO6?WWCjgFG_BJTe7zBb^Blhf}T9BK@=XaxcUyT;p7q^L1ij@0~M%
z!x;`|5wjL|=G?geH5m~^O~t$d$VO@Jz7{Vt=M?5_{!2w86LnpB2;`7x-=RT*x_jiq
z6p!)nQV`M+FtIXIIS2X3=*if5m?8SPCfO&MC)<ddi0g>k2AUS?c;%7T5K5rzm~5GB
zm@Y0aAWtFBA<rOPDchF|AuRk_(iNM82EVpySqf!ZGrOv^aKr(;t{P{PKTOA7%lW*?
zC&HOJkw2;p!B}YB52qcXE8j;cFmsdom3LN!g?uWv`@JDZL!4x>AFuG-n*o=Xt71`2
zHg5RXZ&mL%7leyfsUK)p8UjZy(akcgfB~I73ZXa+vAYK8;UZULd&`awa6z`xCOtWA
zW#>r(iG1d9+z$T*h$Y42!u5WNs~`laqnd&dy|<D$wtAnJ#)XCN0D0+j9bKPF(qF0O
zR`Jsdj6==pW7dXfM!NVITQ#zFTXAvdU%?4&7-#8c+<WFz+J<Ru!m(dOY+awug}YH*
zo7&?oeWR`-KB$}Vl;;5W>Zt1~xwRU!IT)*@3^>PkqTB-30p0rYUqycno3pCBYlc(w
zryEK%xXhY;t4W|XN}?ED<514NFgpBo%W%(n$BO<9I#JLl$D+vH-@@NL%O}eqt7fuk
zvThQ)dI&8;#EwB7rIulHt@EaD!p8D7LZ76Obm{hMYcMq%Xj({x<n`E_xnX}ouEd+g
zjDUj*@xFG+PiKRT$kfmOwI44z`$-g8gu%Y(QTIb<KTVkPY<%1wGV7QvPnK(41(;XM
z;H{s$-zNM<)Dntiqnf4n-qhiTW{;eyqZ!*ZfqHvqxJ4L*QE)(VAsm;6U_JB`s2p7-
zj${$wHRXGTh{mV)nU^YWqV;B$Z_iTo1mQFVe(MMMYuu>{#WVv1yr;GlTWa2|$+3om
zPlBl)su)@rMgXj0Y+@SYs>f=Wx`xgzoFpQ-NC**W9^Ix#f<#kk`#s{AfdtRf&kVta
zqX16Wf@3qx5$fo}WUPhb_}F*}OL9M2|AHJbcK*)(OFQPbQa4p=JG9k)%8fZkpn$N7
zne3!mgr}!2SZx8Em&&G^7rot){tsL=?GM<BGT&#;3UL#yRei8C>$avStvnIpL;#^k
z^{p}$!XlLGFW2~9Ju6NdcGHav)sx4B^k4uogO*{%)R$(tgOilpo#Fl_$+VK0l5nwy
z$Wz6Ie4H?@n6*q{*e{N~l*AvoI`&S7E#*}{?-J9}GGqsf=rcz<CJy?dy$MjB1M@af
zlNvSlw!h;@AjqN%L6G@owFc#u7YxxiBj^MTj)zCfm!Yp0SXj~;Ds*d6-CIEJtl+Pi
z1x@=nU$o%@^N0vElC62LI5WTpfm@Y6E7&9Yqw#C`!es{LL#7v-Bb!S<w=K1+?EFeh
zTXgeoH5a`-&&>!*V*Tr)Dt%Z}VWw~<N~RJfRnxYgIaK+Wi#nKPdJs)jI-boElrtRy
zFI4b&$S36OHp2bbqznD$>8fGlM<BYHtd+P8GFjeF5ew7SMI0<9-DPKKntUcg591z3
z?lP*#qm-+LOv=~`@MjR>jch{(qSIiKZ2nld{zYQ)sHC|0m8))%=uYxRD#{}!h7@*^
z0F_Rm3Rr!IGcE}``s;)~$e>g3aW0mu&>39jC=Fh#w?X%f+u73-SCZWIM8#pFYHU4d
zsk`TOT-Tb-T?er?TavI-#-UbeId>FSMaCt?cE;E-PJ2&l@QhW+_XZ3c)Fq`U+m4b-
zTKT)*!9X`I7s=@*E4EX~r#A(8k6skT?iJh*`iuGx=8NX84*DflC9Tx@H2FWT0r|SY
zg-RLiWiaw`iWpP)O9v6AW9oGx)F!<a;4+%kj>amdAPQF=?{CQ2A4?XfB!;O?Gal(|
z?GMwn5KK5y$ze#Gjn}f{xGv$-UckpOu#+of`ww$*vuK_)eC2$m8e)8b>+cCb11ZQt
zFs8FV@|O$Gc0|S@X?JAIjkBq<k3V3yOSj2``6bwnxSfg`G@8ubDM~`A3imITIbE-I
znw`*TR%TknRHxYcv4Ah&ccK)Bd@W`Rk{j$c+H~?^p_Gjj?%r29Ok*eG*JofNl>m6G
z?dAEEXkn!fb16AX@}@=1%2>o_CAlPRg+;C5n8l|D<26d@vw)F^`-_;7(8T?IaLu>A
z5t3iSU(xsukRC~B8Td@J%l$MQlClu2`f};9$VN!t3i(}D%g}xaOin8q^_kSNpv_bK
zszWF(ns|y(WOZY{6mnUf@4C_Ruw@}aIGzrgT~g(yuQh<K#-K7)TIRkdFEyR4Nwj8K
z?LDpoKq?V&jlTy_Xyk%#rH^V4-q{}S&@Shn3~`O%I5!g~-v@AW$f@a5RKbx3S`eT}
z20rP#W2RB^jo|){9*IuFYb-Ody5xLJf8rQ*X#jB%r_<1_dH2WlYgf$3k$w7gC3BOO
zM0DEp)bzXPj_96*s_FctILIZxnSAYiE&@GF`s)Lw@Bx8cIiCv0Ic+pyV*T;*eAdl<
z<-68JZuWpHm<=U}jU*}jAme-L^!H(5)o(On2vG35{>mr`p%V*DW?)cAswE6eT$wj^
zm(c?)h*1vC(mnQ2+|1wNNZ{d*g`NqFA#tFEt}cVFdFnt_56{@(5LFL*ZzY~Vy_@cg
zVn@?_Dw0KuaDU-ct~k?Og1h?{tJ&T1*i?YeM<riL>W;6@%P6rdKPk1@oS8<to6IrX
z*&Yb_<rLIT%w{^XwyY8`#Tm1#hT!inE7p11$)=r%$Ta)Aby(inCV#)*>pMwbodqW>
zk!@V3Wt}}5GjxR)7*k^F*oCj%<duD>%1GSp%P$*5dz_hZ<sIbwXmgJU_K2kJxDRSW
zHM#^KiIn3R<(mGQ&sHn*2<*@@Rc)AKvCK~Z<Yq9HiF*_O%qfgB9%E(?LvdTl(y)TE
zyS9^F*4H5W#@G_`W3E!{SD4E!S1W+|g$L<3xEy-z`kW5T8|H4q1FaQ7%TJa0#fgWF
zek#4~{0*1oio4Yh73w01`xd=9GkHbKmni8n{lu@}r2(qR29Yfi2mx`6TWYTZbLQWT
zsg?)CJ$&;!849Qr!y`qOMW(9()U8@<s^6^IT4x{2uBPMC!#I8$gQpcx&5p~PVvNp~
zI<<U@lnh=>iJR_M*B_IKdj93VrXPul8DjGF1OVS262<moG$mN-oTa4G7X#tpx|eZw
z+CJFjU_hSX!=vl48BskTsNn+*RNz_LmbdU}fkVk$I6*q|YP_;v&hR6gMu#(uX?G)o
z#lZ9G%+*!T^TZDa27F@Ff3lyZ@vP;-*})-+o&BXjV9xnI6^6;l?A|A*`vQGawDx(=
zxk)U2SeM@TJZU0h^)zf!*mEWp_wd^?!FLvBcebPcyG5NiksDF%Kyeu-h2`8jUem{Z
zG0HaSrG7<&U74NNNus=GR%!mX48ImTVd+EEn_&zL4T*BAjJuSJpuJgLjIB_Tw6FUK
zX$S;bun}{f_jG<!E-)5GqcTk~+dba?**94c_*IXj^5n|vcZ($#Y7BlX>!2|4jrbgS
z<1PFmfXHGJR^XE_gy;j6#uUw>7rF6=g3k!v!R><+K{G<r@)uJR_90e_bDL_zrXFN8
zy;mjoSH#$KO$_fBc;TMuzgPeu>!B7-o*wYCh#sJW&TQi{Z&|8bAIY2Vzo)GVEj{B_
ztV&gXGPGW{9pX>_dMy<bfCjSqu`K%)!w8u$7G;NjgMNq9lHEwa;(6tO?89#6_G3Pf
zej!j*|MWr4>q4v{1{739oD_Abbb;%L9BJ@snB6J$7IMR#^3WQ}0BbN=^7sP`<4k}w
z{^g2>&?X;Zi1(VLLRU0vN9al2xuGHUxt!`1=NX4iA1qJ!C(T{_alVNn&qNBHiukBF
zP7>HDh(%zXI%iO|MwsL`oC;GK%95NQ=BT+UOisvcR|GBzIIG(SF`Bd9=$qZVFy?8)
z^yYYKedssjw@lMnB+cm3iWn;9&~oi0DZ1Ip*d6CR5b|y4Vgjln5G{xyiM$u7Yo64I
z#X9Jk0JiFiLyHXp^r68exgqJ1s#_sayPjK|Wm!`~Gg|y~wqaQv^%8wGiFDqvhU!u`
zA>$Nmly=tCFJ$?QM{+ZE4k*n+-){ZzzjiscaR#zYW9IDyXbYfkbF<J4c<-FzG@wtq
z#iS29ZJ%3nYmHeBxwk}|3xz|ue?N~K0mPHw6~|iTAvtRNhJEtO5cWZ_dUXrnCU7NF
zBTn^#=(R|#)A@}NcG-*3^xZkACa^3gs3qiz81#DBaNIIlu?N19Bt1X*%{VFn2rJfQ
zprqm^ik6T}14x<_tFA8moTPL}mr;$yOWaq8%t(2d{0OaPzIv$Yx@8H6B!|eTCw2W@
ziph93=vdq#!(GArqTZ^XBrZC-c(>RYVR}&_whrdQ_EONaF?T0!N_WNLGW&iKjuz{C
zhHK(h=}M37DIT{tmXd4vF5)Yesw(<VMxMI02JZ3)$JM!w)Sn*)$oCcQ?fYuT<=(#d
z0__DAX1w37#_yg7Av9ey<S+|%w4JNQjSPy@Xr{Q9d-!-Sm44jq$z+zDLFGCt4`~ep
zb)ek6;Ztn8#$|Z1QutpC%etW8EFR<qEE?&7-DeQ+zQbwsPsW4Ha2XezLsuvb-p5`&
zPa8u>#ejSTp=oBizv#V#aG=g|Q|Ji4*Og-(g{n!JBn#)<knOb}ixRi~wr|RPsG9)b
z*mttg_!-g+)@j+h=110pb2|&gfGzmo5rD8u{ykkjjfoY$zbytq$~`TjVJ8LlU@2LD
z;DXbBElhk%yXZYjepeJhG^WCsz{4S!m!rTjTF=m+EMLpFx5R=I5!4*Coo(s|v$O}Y
zELR+8!_1T~$l27!qCi?<2MDaiu0Rdm=ogB?naZdFLdh?8#VPzsb-0zPq*nQ2RO^~Y
zxw3s=vO1bl-Vi_Y+PUgxBx&-X2h0D+mIb8O#P_N7yi6Rz#Fn|DjNmVGh+Q?9FOkaK
zZt+$`ekNaE=5VvYZSr-goQDwWmDPsstZE<2C6pH?`s)>EO#NL53-090iL0$t?9b2;
zfHi&22ed$6*I<!=eOfrbe<*{gg>*$jH4$z>0F48`5BfancJYCFk!@W7HBoOVYHufl
z_MwIj=5>0PUUvTOYJ`4e0ovp~+uwwI<OI?q*^;vMqS(syqXT^n@q+<=C!|*noJ6vJ
z*Jk9?gFA@ZKHdMqrv;bggS4GRx)l%k`V<KH;RMY4J~Yhd(kmvz`W}n?(|XEnU<gad
zegG10ClVg{63FKx5Hj6op---`yZ9mFRhhHRccy#OwMPOXy=xoUhyDD!k=VnH5%3RO
zir-=nz0;OQ;-@QFgs<Wh*kKaB;a#4<8#E!oiL4v=TN%o3T3rS}A@56Rc#>UsdRJF`
z_rZa7`ImtlmkU5#zEogvr;XPjE=!%0eGgg&Sx_5&;~OYdYF${gZtFn42vuTTwm^5d
zZsRb{wr;4MH<rLI2K7`xm0PcBURB7Y87h}GemF;$aeimOx+qmLT^FcJlv_bGXzu}j
zG-yjm@s_a)^-xQZQmhdA$f{{bRfzDN45*ptRR(W99Rgj?KNUNC_{u7iELDZwWGLqP
z@(w`DgwE8-^li|}x`V8MtF5XqlPlo5Yf06EbV|dwzLjMnUNT>ATd<YYb%#T~%62IM
zXQEtw_xla(L$!4^Ncidi6fLr)02B@RC<Djc2JS<npXz5xN3F6&0Hgw(1gW0xr$?e6
z>jwsJy9)!#-2}Tod2{sym2wc+4CR&$eU2{AQHYf_3wtirbqczc=BEK{3Y|ir|BQm1
zl?gHns=U`V=NKf1Dqq3xF8z~HV_Qj@Uc+C)gqGI=HbH@l&Z%1%j1-a_H8j+PSGTLF
znwM9$OWiu^t{Vf01gc4_%iMafA_~pE(N6)WX%xzJWBa!&=@uMzQ!h+EQF|LZ5Rur@
zlQ>;uRqq)PhVI^8ua3O3`GJTmIkxF`kPC*Xe-0FeV_H1CL${4AM!KH>2#r?^D!8V8
zS2-dtK2YNhn_U!tS!d!P=xt|z&x_VJ8qDp9)&ASs)Z$m~k(@?9V6t1Jm@H8DW(saP
z;AWl-ZZ<GScnKa<ri-KRne&R#*vJ>#-*Fiz20_<<t-84aDZKgsu#D62sh}8J@Agxn
z4)?<^ZQFCKH~<tO6TqC!_`OC4-+;v71&F`k%*FueaDn5G!{LOcx{C8JL2sv^I10-9
z^M83Q);*8rFJTEQ^dNAidyXt5$@-?)Cc>Q$-PTmx2pI?FcDgmNz6ND)kVOSN;V51c
zDCCE;>H^f@Sfy#b_7ikLD6Sto$#OW7Xdg7iu`5)Jsij+;hQd_dFiyZv*H6}O43!pU
z3;HsJnB>a{145M(>6B+GM?DAw$hy=ho_n~<o9`~cM<diD-{a%3FEE`yBKQA>Z+~H(
zf6+Hq7AB5=P#41=iuFGc&>#Nw|3*NcRO#QF|8(MpZ3F2Mf-iia@n(hkkRp+x2vm-O
zgwPZ3fx2*!@(ihk;M%;+xD_bxzT7Oon_Wf5JKfAQIV28?rO3dPVvCxAP~XsC#PDc~
zlGSLhOqV^Vyrvc35DFZ)XsM=PvW?}I7+;5ljAF7jdp0=1%Lz)Rwn4xc0MMIXcpKa1
zH`+e@q1Gss6c%TC{I+$uJ{Dx~;}6^Hs57Ha8pIm;`WN>F_8l@DBupEs9w+z@*T94m
z3+l^)bI`U<J8OkTXy5cJbP!Y&vYBP3Lrbs;e+lr?<Ar+eXR0$$8E1%z5|waAMS>gE
z*dcmHm+>K)jeUD^)Rl2ZJwZJ}9aKOOyXh!+>v5Epa`4;zQf<1>S4JVBl&<cZyYi^Z
zu`cB>9&WjiE~QivcSmx>QcJ2zaw0W@v4AmSkH>sRU?wAr+5ezeos|v)3%j%7fC<%}
zF<3TE1B&hBrSH(eygc2#3;VnrulDK#l$u_*|9{EpKb-Ot$}zFB{5z%pMO6R6mwyPU
zu!)nAqlG=d&hZZ#{0o}O8rXcYO<4g|VHG(lVG~OORcED7$Vek-XKnm%BT)kz3u||L
z^1s{h{}@pG-DquK_K6f3|7|JwHv*;kgq<AtG|UVv_zWCupC^V-5c)U#lmHl5TNnx0
znpvCR)Bn{^z{%(jOXXl>|4aN6<R6)ak%{Fmg^+>0xQT_C`6q7ubn}-=31DKQiqHOc
z|38BGKf<4Ber{JaVZ{FfHvbcJ{-MGD+~t4c)jzg>yK^=K{Ka^cKT+zRw!gH322Lh_
zO#c7NW8vro5HdG#{AU%U4gMuFF#e5-|NFv!;^2S(vP5h@chXwen&H!_S=b8LI$8Xq
z_-lAI3uAz}(<e`6V*Z1u|2f#%{-pfBAD@~3ADaK({$COs6B9l=(|^fK|Mteg@wX2)
zra%7K*gijup9r1(Z|P6a|NCJ5%qrVwj#)VVSh2GGiSv)l_W4L*V*X3_8QGt24D`$l
zf2mp7+5R%2$7kkX`s~Pp&-~f#PlvzVv$8P!eWQHdw{|MF7Jv2{f9CzO^*?jT@PFB^
z{5kIb?~(PN)S7{Tm4)%YV`~q0FJ+N?JC%%**89x!Q>tZ1Eki-SFM?P=8VO<6z;KxO
zpvKApLzwi4!QUYvfuRDZ5rG)+MTMp5bAQB&=wpl78}}U)u^&!`rao;m5)`*NwIU0f
zy|(jh9(e0+R<zEwH<nBkJ9Qj*mZZrGh6+eR#XKItOVd0*{>VuDPP_zZ5MM~=#-6!`
z9xo)mLoHZDP5tw!Yi$F@6ge(((fDKR1sxsNtNN6Rn~z`I>%Huo63pqw>I6dEPJat;
zZB<W)(z^d*<F4~e83eBGZGXTaMjLI<$-v8jbe3ecot>a!bxb_Gw2pG0E4kBE3u@(V
z1ny4-X8t#bOqVE9;W5HUlo3i@a+h6m6{MraD=Xn<Q5J<Bb_qM~MF6rxh85>x0^*LW
zJsDXOGBk%p282Sn-Vo*d)h-{N;!G7Rb~F;{4+AU*;!R(r1c?z9++PupTvYHd4$o^2
z)zb>G206TTp@V0ROExV9lmQ^Wh-V<xc67?kdK_&Dk3c42v6ncfR@h8gO-xN6*GXAY
zEtt-9agTo7y6rM^29fdyNFWQ=;CCk@kcScY$7LXpb4&Yh{~(1=6^TDD&6Ic<E*zW^
zxGvV1XZkJ1l$A-cS@5*G$KatyP?ivqtu33+^?R=TdJ2sr>bz<Mjnji_J#1EJWR>rr
zRhUVFB>E9z#b(T)W~oVC#;l}!XUEV9mWlPp3;7k#**c2u(SD+_(lzoqi;fX26RjkP
zPA(xf&pf1Eaz#Z%luWry@>(YLYqlhvPW#+m%k>-TqJyit>_Aa9vP$wU7WWyu^s(bm
zs_0Pyk%PsNMW;j{#oS^9sf&Ww#uPiIl(%l?6bT6nSCXm8{qT84oUG^c>4t<xYL3@q
z8`HzNyT$efN!FW)4>jH7dqZ8*QO*I@NF$o>Jsj-Ic}HjXy4}N>Qj7j`WK-xl`VZ|#
zEz#$!U^uP2p+KqK9?y>IkH>rzsq<goFM<1|1jX%qK@UN(l_^}CPOQ^lN0=E`Dy;VD
zD@*GsZY<+jY-H51($;Vj_T)R@LuA&&&NnC)+^&d6c|tXbsEV{&>aO{{z0aj?Hn}HU
z8sC!6mYR2$&Kn&2R~w8(yBiO2jcJcPdFe<=p@@jy2qfD>=2nlnt$LiJZ)7hYgM*E-
zRFjMN=k$X49PEV|C{!iqkjjPW7$nCGwZhu_6n3Z`5}P7;<`KscZL#I}odk69O_3TS
zh+V4htv3olUZ~h`*{iBkJ@yj!eTVFdv3Q%kj^}>U$z?uTrg2lG?Xg{hj^_}}NxdLS
zu^~?_)VlLlPglMX^|^^022TuPaYnL92Syc+F~jzkTb9^6<ycTUfz)o#+6r1X2kU2l
zabKUaF37PGz@t|&bj`q0v5Le}Il*;(DAJ6shpBeiZ)zo5Flv1Fi9amT#NyR&EYayc
zzGr)uIkv~XO_CKt+AN|i{}r0`qkC3Dr>Ik>N*;}}1!H*tl2!04)9b+csV&3C5EHI3
ze7K+y1C>F9NEo@9)>6D-{B##)AKo&Qn_3@aL12%JfqB6Xh_q-Ce)4D(1<izW7Vie*
zIB8@JO5#i7X_2E`QmbN*xLAGC6~}D&i(kK&7_obo>@f!3xYZs}JC7HG4}~HL3O2W0
zrSXvuL6ZhAP>d27EHhbE@ysIG3gdw}1IL5Ec{QYzkE!C#75-$hal*bC_G(F?2^8^4
zRnJWzmzqOzj%`q<n~sPA+h^3`&>~sjV2vLKAg@BMJ}T`<{FZQz*q5uUIRl)Mcplj{
zVBO!m?Qs0&aPDkay${$mxhHHJ&_i{8KKUVfkCwiD5!$abln_%!Zs~22D}hWl_x2V6
z<NJ{->jT)v`)KRB(JIw%P^nN6QaQ1%+cm7)%`_hbmmr0Gp9^JYl6H+b=t=Mh;p(71
zHwSndW_Ze^M12=%`Yq*<?H*E)ZIi_jf_&6~PuPAnGq1{gy{o3Jw#IReEhiiN(H)r*
zxfIzH`55^<k~UE3jp1G0H=S=IYwIEoJsv#`JsCYKIcAZBuAbc4MB3fjUEAHuy=R2>
zM5$CMP05H$@64<CCg^qPwdwWo^?UA8Zd2}K?)RCcnWmY?neUerY;F_db(LHUtN!>2
zaqF#es)8<Eo%5%DcC|)X*lvJ{r015FZrD<|blX<Z<=V8H=*Tn&>uX=O+OnP<iyK;O
z+1B>6>7PBtcIO^vuyVu4y6u*V<uSs;X-!&@>L7WeRK`rdFux5bAQ$-;eC=E(c_ozF
ziy)k)MhTzYlJ%;skiB{@Ee0vnCq7=5VSLpEdmFM5Z2l!{Cl4z^v_c<UT3=0poF>?0
z&g}}m&9-Z=J2Zh190vw84@|DdA4-q$jV*Vu!R+MZ1yASO3>7Av=UQQOJ<iS(1WX}}
z0JRrq=nW_03k-oSV;9~VGGhgUd0^v2u+tHX7?B{{jRt=r^687BDH%6z=B{UNNJa=g
zB{&e29Os-QyyllDFP-&rx(VMKV((;J6VKIx^EG#CHmVL)?`(Syrqk9R<Nge{PaLdl
zuV&UAfc6`&%~sU|J~6Y+51<SMFPQXY*qgk#bDt2$$WC$0lx$40US_qEuI#dMc8db3
zhZA>?4&A(ntpo$Mg}V9qt5>V}@Q2bb3!_k8j$ck%2^Yvjmka@N8sg^d$j3@ZCyH3N
z9tT#j3>1jMwd)0L+*d-gWy5XZ?}+w4NTq0#xqbV?i@%9ow<fxjo`aL?$k+L~^lG0L
zvb!_Y&yB6oMzbt0YgAdu{Vw3~WK}sYj;}E}_UF1-W7z-Q%Mp_FUR{3|Ed~F3t~yOl
zx6SRkuRlU>baGFPp_=aQ@P6gQRW3{PMJYS|K8v-zfXP%ww7p53`RE`zw2TbSu6=5O
z{rJ!na#z@>v6*LoDusU!`YSK{?En?~@zTuFvAWgV;@KAnaeKcyCfywRs?v&zj|eHb
z#`ybLDLrehtJ7fEmwTI~OKNO`hu2aIy>4aB@0Lxlsg&M@bb7o|U}vC%V95%dh{jcX
z;R(|!YYL`%CUdRs-WUOLdB@f9+Z#Et6$?_S8Hg&n?K{tKy(LGHjv|So<<TWSdcM7e
zd2Pi!KbI+JPBRfOq0mNeWIoJX?b7|W!Sf!>q_DXLS&66Ib-g+TS#S0Axj#KvZ(SrN
znHDJWfIrI;=R9-8Iv(XWmY!V`L@d!#0M)PC8>0c8oTnET1X(Vl3UCyuMd+?OebWBL
zg>gdb9L%|axKq}{(17E(9OLtN@q27aP=itmdA%O0EM|OezdFc}qZA?93QHq#sf?$W
zyJZU1G$6+%NO?mApw?3Avr0YGsL~~=ORzwBGFM)uQSNWnk9cQu2XY5t9*pP46rIkv
z2n}ibGk;e2w&2$t_0vg1;Hm4l4cRfO-HG?@O+%>-gu3DCAkMu)z|9tkLf%l8u$MC3
ztSZ!|lk(i^Vd?f=Cg<wULvp-XFU)WK+!Hy|KuQ+Ey>POlB<KTQ7F9-Pm|iUcjlvB}
zjNVaZv5COry8-lAr&!n|9z{o*EgQWlLi<x3)<5iI6L?K3Se&7NcHze~xuKTO6hLN+
zuj3Yk8fq)Yco}S-vwX-Z3CAU@-Xcq!ttU&$@8Db}_l(Fq9(b3^A<o2L_xKt|qOsWU
zc$<2E_I8**exPt61WxVX2s-sR^~qq>Ira)WMzUU9vr~V?TngPB<7c;O;Y7&65YigR
zK@Y%2OyvsL`f^-r2(8IS2_T?Jq7obTJgdKH9U1M1dkS&3nMeKAc>c;XswV~!>-RP$
z(8}xoEKQQk#s0JeC&_zHCi&xDx4Ye>Biw#;BmTr^XU`~?Ii$qB_3N2s_i%HTgCoH2
zf*RuTH9g){(J{{Q+ak!cXx%rPlc14vgZnDjDF_UZF(6i^%)VLTh518E7Z{0$f|r&c
zm#8z$WVEcLm;_DJgjACT#@4|&KZbsJ=_-F5>@84qVSZv^0cKL^;v<W|*>f2x2`U+_
zEY*yKbhICHM(1?HopFyBd!6ux;AOb&c8((bbuxii53iTodyVIG+`P_DKfk@ay=1+v
z;=#b*bVIbd;IK{b5q3}8NcA@mg@p~))RL`pP|_`@DmQx#`l};_I*1`AyZ-W_;l|pF
zKJKgK9K?LaWbVUHT1VrrH$clZF$G_K%$F6vIT4V4(V{L?imQacsN**6Z^G(Io#+*R
zF}(UVv*A<lD6#;raHdguYQ<HA!2+ga68MVP7+idNA}bFw<_q)&dUOOGDRfR`jJ2C!
zzu3>@&&)yuYE$ATQt#cZdfH&j5yWxM?tpv7{Il(Fa-a-3CJh8)Zy(zcRo1J^JF%s4
z=Dlb*2-r45IE3>%B4YP?V7q8t6hxk|lmN0l#3`}&mI^bL+>iP<;U<xgs7E6jtYIA_
zK`=yRdNAcWQM|hv`LEz;T(YJVF(r9L6q@0<c&j1z2EB*$tWskKH~l>^DkGHhhQ$u-
zSM19M1YuJVE{%dNg4_j@(BR?v>VEZ&DDa$~+jp_0=sI2@nR$86B3eOf@*2(lEZ-}9
zg(q@JRAyHw`<~!=wLKHw3eF^-!cnPc-gNLELZcsbb7Qz(*Oyq8!PiB}8{r}eG2?B*
zheXH=gu8fuYa}wp^&zWB!6qeZ+Z@}&`3U_qFsPjHVonQ>u6P@&M+-yPpw5cHik^D7
zpLyW4*NYNpJr*Q=Q9_BvJtvloJg!X~>=V*a7WxjeoTfCJz}>h%76G?N7(xQByt%T1
z!ryc2FKz;iVc1;AM@&RPS4?i<t;k|qN#-3lmzhUY0=g7=AsT#Rr%YyZh&8ImuKF?c
z>g;)fr`hCU=l!;|(l2!{)IS5Z>1~RbRoJM}7jm|AH+M1#3OO}qAr;v05P$Bodr@Ic
z6mBB9D(`BHWum-HiZWnM3(h18x+#{Wf|}G9mfz+t!>6PhvJlWeG9p-0oZkTxP%mD=
ze}kCXp;myG0}tv|^Nq_lf(*P#joGmhl|2}BsNFCA@?;IhNK{mhSQBm8(g%-E@ml!X
ztwp3jE5rUzfgiwktcALG=UTgE5i8gN+?lIAHqex>WYOna<b%Eg3bud^taQ%#W2BD5
z+s%eS-|OS|yF2RkUeDzeEUy-QydS~fuTHGbg#sOSu5Zq?{JMvEz|-G9(xHvtDN(1o
zuKQqhMLA|;VF9Q*^&Mr=hvn-7!C_kUSJtFjer5VWi6oY3OG+yy`$}DgU<wF1&>*P-
z?CNdVY2m4*hrf(VzcpxeO@@HqsID>(9n|(oJLizsln2l6I2eCDvDPwZt<m8DrYp7U
zT|L&@0=biV4ELpYmwE#7LHSVhP(L-O5WzzN{PfH8KE`PXylx;5P1tNSX(yU!RH+KG
z>DA&hyJ%QPspFbMZoDNHuh|@+9R8IqD&*UPIv2#El^o-I^35_+6FNT~f}M?!fb}(b
z!L$D<>4s%#V0=jwYEo>qeP8Zuir%D$^NUibA&oqF^Vgx19!7mic7?!Fqzo#AUzY8=
zqVvx_nx5ug%Xsi<UkigEtn#<7wyjyIM4M=h@=u`}(kxWPI1hoOBfpDh3M!go=3Tkb
zq#4h?Pi2{z_YTo>0*hW67&h%Jma+It(1#sZ1m(qSKs;Qf8Yab6j+4R98j_37Y3G&^
zQ(1s1<6BWFm58IRk2Unfze-eVER$ubSk|-E$<?{o$m2&ZmJ`44Hy-lbW;X8GY0-P)
zK|oZfK-{MPn7B65U$=_m(`+h|@s!Q^ZCop4L9ZSvsjOq}Qs~a5m<HFMBGAtce2^py
zoKQH89Ey_6sFkLz##jy-gnRXJ?KRJ9MFHPV9;|svHf_kGdTPqIAln&OF#2$8Wd+|+
z;MTY|{<EWk1!qHx?RoyKn|)w$=hce6=KbAX^N29<`4?_-4#6@K;(fO*Quz)4%%Y_T
z<>@N4B3kT2{}cGj`Ca=KvOhHu26r4XWAo%_d6OX9NWE5LMdI7xuvNmyz11~N?8g0h
z@O2HWC*Q~6?&Qin!Lj-q8`!gJyqhDHsa4+06ctbOjO6YUdt-5ROenhm_H+&*VGH_z
zJiHtv+<rPsIzEtfP10%1J4Y+%k)A#IqMW_7s-F_bXj&K)fC%S$?EY+W2z<qR7Fo_Z
zj&wI!OEOvYJa&9>rNg*zg_l;Z-IRnXA#k6YxzYRri%#|QdLYZ^dQT6^#x9OwjS7Bi
zD1$EkfZSkhe#s44APS#gVt>O5Zv^CI0nGcvbdM`ZkNwE<qr7AQz~+EyJB#0UWX}~g
zd)}}d=4`(NJ6qK(GdonD))JADi>>RF0=wYRkVD1KOy>HtVj_{7W{b?Ua2K#5W@M!A
zqn(Uk#<k$=V>2)7g`8yJu23uZz2HzEo~7rM)-#u9mAB_HieZIUBg=Y>b>uAnKFTFp
zz#gHXDiv<_o2d6#zYgKp<xgF$JffLY$+(z|fg88j0_XYoC}O7JB+HBa!~ICt2a(B9
z({J2Dw@l9|-qvrY96zwlf?v>f3Vrs)`fgmy=6)XwMXjC^VH2hMJg_^GwZ}(=aA%T{
zlJCw#t>`@K47fS9*~A&eF-_`-!z@$p@(y}Wc)3<L3{?+l2;k1D{iL!%y@wVHa*EKk
zP+e)@Y~XZFYqfd7dC90hcU{kFT3u(Fk_!USkYoLMqHme6p<%syC?mzT;rr552Fihd
z*j6h`yUf{Ujg7b70(A~Q@w@GQQl!LuAiXh~Z|1#jH(TMTKrTM8jqwzU>UUIC{MSmV
z8OHG+2@jU~-*|Vcm?&w(o%33*x=-(e^Ppsu^3Zg3H)M^OoE2(b9k);IeCV>0rOdzK
zXx&GpxV&$8?m<1}wBgIGIbFQ;hRkLFNHauSH$*wz&22(HZm(ac4@B@5Qbw!V*k&Dd
z<?xT34gl;h1J^=N1VCcL&~OAC1oqOu0J|#2&<@v1?wb3Cd?DtE=_EIM%CSB+1h9jE
zYzWv@Dz;I@r_i$FNIXgGdpn}a7tg?Yb{hlV!JRtW!#NuakBS_|Df%VLf+-d%U<^rS
zJY)T_`rR!~e-i%g_Zv+n(RIQQRA(~3<GFNBCa<y5U<?Xsktj8#NVi{&KW)|qlWGi|
zs})9)N=Kvas1uZs1<g^2XK-V}7nalDzC*1Ldk3>>>1ET1>XZjbKBKtNyfy@o0-NDq
z@rroPnACm0_tHvAPa=uh@hS!9YXG1-lz)#;k^`yP&6I<)|Ju2k3^Sju5+SCdSZ?JY
z$sEIOsfl}1hn=7~u`-9D?KSolpIkDG)@=z&cB0@`#y;Y7kKZ~Gkhdmi)+Qv@a9*)}
z6TAr9m|HfFXPF)?j1@`B^;(P=tAK!eHlU8F&s0&ko_0FFLB?Ke+Gj#<DIC(wA6DHH
z2G2k=r@8h3?gs*_?@)0R#TIb7h&ef5aH5I6IkjT0hJTR#K6d35kdjTiE!7uLbPbt^
zjN47*o)GGjLM^{9HWA^$?!iG3p^n?kZRvi?TSGSzRU$3QQ1cSg5qKCC(Y)59A2bDh
zZ+dcebGTAg?8>d>a`6(8l%DoTatGJWC8%3oT&y9Ub@<V>j|fxhXvgEEw4dzzc$S%q
z@#M_^?oIwdJ=Z8$@X{NU1_TomrL7=?3uaKBjtB;Il{&D+_yEfYCJ~q)6jn(veP?8Z
z;iv$A-tk8I#Dg4bAe1+R*xN5%L{cA)Sp|XjQ?Q5E?bliDL$UQJ(g9f=H)RXN)fUnJ
zDegO<np(QH6{JZINH0;kfFvOVf`CZx9Yl~CK<P+t(gZ<4iXdF6(!2B`RS*Iw(tB@$
zibzlpM9OypdarWtyYBaX>tE{+Yu23Xv#0Gdb259@^K7P=*#};oGXa>lYzjMDRhHow
z{VifFHq}=>n%qQgj;yQQKcL8-kS?%S*KfWodl_|pf_5u>Hsw}_Xa)g2rq@rypwa%j
z`<}zB-@^}|0ld)?xel$@=OVGYZ~0(Rd=p7ly05*;RpOO(1dLP?`l?cx1Sq$1V&Tp5
z`Aer0w;MR~&b|wji{8RdKP~UnX`PXI{hNa+GsgzCiUY5?mwi<ytL+jdvARhwDN8(o
zxfKz(x#^?GzRb-BB3vp@8AGs94=}m}d36bJp5_4!+D!NrQLY?rNopiGN|UBg-6|}Z
zecIfCA7+|tn6pYxyYu{Par||?Nou>8-L(|k?N!+-4IJWLS2D!7d#YP=kL@*7QclPN
zW|-~cJuhhuad0(1aB}T-5Ph4w$1ih^=1s^%wSs#Ff9U?0Xy+q8ebvd8t0qhVYXayo
zhXKR=m%)srNvN+S1_UjdjB{w;H?E7Z_Y8&e+#sYf^3xB;!aO^9Mx)6qA_cQbs(Ub>
zKJGAI!o)HwJ)5F<_xR~LqVA<%Ox7N(^YAjtZ(wdJY)OKxz4pnhMZ}6nKGj1h)M=wu
z+B|Yb+i%V&;@K|YLdJ+KwDB&Jf8e@i8nocyE!<j)Iy1Q-^_@)xK~gQ5@|iagHQTw!
zzH6I$-`^pv?%g><LM6QwlQS#PWj<mDnhM$Q0ioB4cG(4cB|4hI;>Xf5l7{wOYYc4t
zM-T2dPNzmT<y6R|EYL1O1)`fXg=m*Fy*6>~C`z~n=Jdl;XTK^=CRM7D-DJC-m#lo5
zjz&K|tSq#m-!p>aQqk>b(12RUK)x&GonQykj`8-LxSfcdgAD>uU1g2op)PpXmE=vw
z_t&DILg)6ASGWE5rOTqiU3sVFJm&PK%a}XX*v09?I$Cmr(u{>0-m^_Bs1KxMJ$g2}
zI)L+CfzM3;f~XCZ0!R_<o8GatlA%bVNR-@;B9f&){E|yd^H_$G>%$Axg)nC6$E2$_
zI>}o5sm_!gyiaEv+&1lgAp7za`2JyL!(p1=Ekr(B?ropgp=f(o0poM#TQ`dGtwKAM
zV63(?8M)B7t+u+xtS1do>hZCI8ZU{M70XNGqltrJ3SF6y5%x<O0&lLmf_>pvx;rqU
zeoxXk?tByTi(mQT1t)gDPh>6-&*YipMbW&Vz`P&a%_cw3+Zlu}5q!Cm%ZbLOlXi@_
z<EwhCn}skBy3F%ElUC4s3|$fOe9xoe+BYrVZ}uzAcJwo6^LBcDW_Lm)e|);4`D}2>
zryK`e29g4YpIg{bH#NMuv0rS#vas%DLvdffZjwR#s}O%_RAB0Yuc@t^059iar$e^d
zM#25D@a(e2P5y~2JNDtFJ3IW{AyMy?QaiUd<}HjgaHG{n%q9(sJ>sMu5HgHCC>|M?
zN>#V*ye4Lna?ifmUNQI%ixo;ELuw$vGJQen34Y&WjX<`lPw4214+^P~>nP$6u(-r%
zWVEW);&muefh;CfLw3YW`Lx&LrUf5g7sf+sOJn)b^7XFkbU~^5oh477@e^E)-%?sJ
z3G;5Nx4Hf~EXd$6g=$e<P3hhEB5lIPC84JHrQr$Mj?t=`6;&B4c)p&@s8qS$>sYEQ
z`&CJ4Uq3=GI-2|1PxhT>E+%`!8svuJTF7?`8P>nzsxX&2Y+bB?UJu)w*m+;E!0I9%
zKXwQmjW`g$d+X(={BR>^&SYax;B1Ttzv;_$6S{rrm5oqx14j>)0fB{VH^DIV+H0)Q
z&|r}za@NYJ_kXxD*N9#bP7k5E7}`Zv%}_xiz(dR89Or*^`C?($8NVmg6-=KwBiQB3
z4xYO8)}dFH4MJ$m$3uFBW1dHc#m}U4x9nzqbkPEj#AuIw!rOu)ZJ7IW6N`P!A;|U>
zlr>nmVp~DfKP?&b8k1NleBCYMLAUk*X8N^0tDHo;R){5Q<T{&J{FmY->S|ei8ZSc;
zoHden(Uyc~nV9)VxSkYWt?w-z!9e|a+6MwAB-?atHD$y4CN7*&x)M)L1bsjV{Q~XY
zL<em%U*a2~+<I>(=b0iK#yIP?j&{CJ0T7y`Fk(x}!FSqBSW`ne&5*Qbb}@p4U-9%Y
z<DJcvpvR#Uj_2$I@b^Wn&e4cPwcl<tH!PzqK3v7KTH&M%g<8;Lh(7WyZ%<VHgi7~Z
zaw{5|^;4A!r#*N~a>c5ZTo==b+Z5}h+v?Bt@(MF1>f$Z6dycI2vcA0YO6PpF@l^RS
zZp3>D!6myd6P`O=Z=DsK4_8LD)9N}Au{OYIBo6tEd}nadvR~$c4mjsyRd!5_cWTdG
zQrhXn^*6$&H4Y)?@C}IaFD2<ot5;1n!C!H>yM4&SF_VB<;r3K-O=EB1osR<b=(Ri<
zEL(^O9xTJCaV9@<^3676in>=Lovztx2s!}Is}eaXcp9}riAof2MPmwrGad2-lxem=
zAB??phZH^Ec2aM=D#R%9T>0!=E&^+xC6nLVdut)WH=lh`gneF?s0w5p?NoXy)#F9r
ze%FHJ>5nBFVf$Sgi-9XneZ5sLB#3&I#(2%52;7xDtrDcgVu|XRFs`R{7TJb%OSM}g
z*AQjWt?rC=J{P6zcmr~)>)Ny_ONa3cNj$Y$moL_j@uj~ad%UA%dCqR)9Ys=Is~myd
ztw64k=8LJ(rGC@YWe2+YCa*@tQ~5?z1Ks=%JYhwTORd}l%H&!<7-Q%jd>iA}D0@YH
z&RE}43aJ-ID@kutz(=oFH@^+i<MRkV2a2zs*CfgYDY5TNvR<!N5xZbE4%0U{)xPMP
zAwnOopwDL(gG)1E8hH8(hu9R(7lXNUnm;CSWI@Xco*J$DM3~D&71Phnzv|bjw-#AK
zs07Y(bVqA|&FX@!K{;PT_h@Q6@U_{e_|3|L6{$#}`FNZRPS<V)*YZ+vD$qrVNA-r>
zmY`EMGPe_n*l0~*lWt1^eOB3d7~Jns+jfh{qjt`nC==7eO~fW&+tx`zM~R|ky8)RZ
zt5KxW!;>yF4GT8Bu72B=c&X`ZO$1JJa0>{3`XlAt2B$BQxwYfCxCCSElK%Bn_+#nK
z!AwN^yWX<RH18%mICj0$)vI}SYr&JjGiA}1q1{O;F+B5}RImZn7_(4UD(|f!SpnM3
zp>%mp*Y$Y5I$FsZc+|Ui5^I)siHUr5GdTHO(U0sNP;;>-MYEsRl%nNgf0|6h5+)E(
zOA{tbX-*7AllAAd+#=HCr+7k4=}CG1e8Z_$4Q(@!)JAq6Ax06SL>M#ILLdfm2~BNH
zA%12#$9F39Og+EO7{?gK+K~GX5DD!&5ZqTK_Nn((o>P&+NP__KQW|zSfdo_Wz`6j1
zr*4&|Zw)5)4@1bmHH!xvZcu$`KCMI6$@3489xRUG2TV&u5c(IU=T`vBj}UFpKjRtv
zYm7D{?G}ej2zA4ka}w2|XU!Q0mAJKrjh%0KRE_VSDrM!-LPblHefz%US@5iq0At$A
zXqB3}n(%V<d{0t=e@;8ArS>3`^rP3#s>Vayxk$as#jPJ}N=iO|T`?>EaKpF32!Ui>
zsMNVr^1<XAraU{T&WG$Z_x5A0f`gc#EM)4?bXo#iP2{NCeq2SHs?e}sNs!9*Pxkt=
zp%r;`)K{3KB+_aor7wN&*tr7`OFuVeax|ZLUUWAB-WIEKXYix$71i%b<~!@%66F@j
zdmYgQ#z`vni47K06vFg1Q;ucpEW|y^XsX*Z>-Ep(8kY8%w>@5@j@`}d>VLt1n>aF>
z?S*=q`imh~*+Gz@>F(|JUIj56?ZK-RY|XgM*o1mU_T1GJ9T2fO(wdZh6Sp^#i6Amv
z9XHJi%<{slSh_fymH0}$XtjmMUI}j6*y)<h!Pcnk$Pl&HGu7EO9~DDW^{vV}&7bsN
z$jnwZ^esQc-xnT&bI^Xb?o)rmS7(t<pe@#y6e6rj&cFh<6|&*~Zl|1vB=>3wxT6Y^
zBlTGDxwkKrE!OmQA?|s5TaRYb`bb?--o;*%a*lbkf0!cJ$NpoAz~S&eQD0d6&EFwT
z{}J^iC8w^csqzcz>j-yrgrNFA0KNdU5I-CNKz)P(U=I`qz=-}I0KR}~juBe_3&7VA
zDCk#|&`&F21Bd*Vh_C-(n*SQ?^=EJqfuGo}e*=3REole<(!%y0`v(I+R)0aS1Q8Ih
zAfO{ChyWvgNk28PGVC?>{?A(estX|kltKb{D*)bw5QKt-0Bqi$C4~SWmyj@61bBv6
z83KX@eIbx=K=aog{M5(hLXaa(DC|f_2#!3;5daiRQ0Q2O003t&VeD9hgpo&ANMJlz
zDFQ%s9oGy8u7rT;BM89Auy=%zKvWiBjMyBTj^@#CcrfhPetpu=e~4lNRQSKegTaJF
z;J?AahIMRpbVkq3cr^J~1(@BtychruqF|a0Bg@c)G84(Q%j+uOSgUHHWAux;(egYo
z=!}>)QyZ8BDz6M~ZqA7&49ih%&*B*v0aqic(K4}P1}I|&`TF-PmAtl9h5M7kF3ZV2
z&5r98+cn!$K8kD^3I!LUHOqakpB0aTCw9Vv6j1)-d$P)Md?u1v#5s*$XrFyvIXrM_
zsg(X`sI>BCWBdkHRQSI5yj=WQGoKzUxvT}eZGNLlJIij|p^GAhPMfXm94_qcbm|~G
zzQl@h6#+p3${XAw*>>+6%FE)_-|7{|=_+x4c9GmJoX+TxYHaA=C56rQT^O>7RiiKF
z5+LPTT*{73xVPn$B|4QKsne_`{5r<-3l0I_*AfXs8y0RTOvWU2T(&GuvZ5if%_r}s
z%#8@;s4&SJab7k8i&pN1xw?qE6&x9(^nv0QMHL;*EZYz6xUe(p8OVJNO?~Y<9{rwv
zxyqemgu_9lNC^&lj4*V`Qv3>+H_)>h35!8hja6RmR!oeiPs&*ls>nvt`Mw`sHtmA1
zdAn2?`(~1HIatlftZkwbYI?Z&`@fpR>D?15!0#VR;Pmp@WK+(15T^F9hC^N?Lz{F|
zXM2>P!qV3M+>=O!BvjPkkS%k&BVzkDOEo%J*IVR!xx9||y=j_93`x7|IQ(XJ15}17
z^x|8%9C)TZEKEwX{I9I-HnCY<(>J!)U*&J)_o*SH(=fPjjXyjwPj#7bNb+)}_f@}d
zgYksgHz(AGH%lS%RS5e=9-~_CIw|Ib=aCbAPh;cdpR|Hf^f-5=c%R~eP?gIAa)#Za
z?+3SVcljC>4;379HNBtDJfmHGYryHA|6QlCs9(g&d0=uVG_kTmM=jKe@`*^%$_IDR
zaCdkgURQ?l9-<(7TIamMjR|uD`vS-t&@%t9+N8xL8lU=V?Nti5&`O~pMZ>2_oc(IW
z+8=B(3oG&Qa)xC(;ooEr4MBxQJ&u{qDLT0{vUUu7Q-PRzNtgO<_N>C5PhKH<%F8$t
z_71RmKI=yuQ)S+3dG1{e_dn#}&Z0pLg}STOyTrTf1&za}skMvSW@sc(Uu_GszG8|9
z<%Ns;!mIG>@@Ng0l|G{8AF!{S9^}jKR1dW*hA?Uk>K8Mt%36;l%B;nUiA(n=pHa@%
zQLUuAUMczM;w{#-T-P&sdKcVAZCSFPz3O7#Mdm-(n>4LcTI<XXTpATnF6ok`2HRBr
z@t7=DPFgGCX_xpo?Pd?3$~Rq2o84&tK%RRX914O#)w(>YMn-~Xw`6pwbsFzF$)9oa
zn0S6@A(kLv@hYM<Zf=%qX(m!MUF2Hrp44+vw)Bv+=t+BGF4BuNdz`cc+Db~=HW7)H
zP$6^!^!>btvcb0LV2_~Y*6jVRgGakE8lJAnseWr|qqeyYYE`tdsg!gGn`yFgLz>&5
zn3R%~vu`SCr<a>qqPd1jGPtFSiJF_I*{+Rr%sZlLgf%4Z=Er5c<+bP=adH(Oq2{{Q
z*roG!t*(XexzqH+60usE7rK|5EPPvbrXKcw+<^!1I{0b{8sBNHWqL7-Q%l`yBE3Vr
zxIpKdd^d`Y)!S*_x2cZeh1YGdv?%hKUJce@j30gvbOx2YHP?G-k0(Gd!Xsq|#i3R+
zM;@WQ;8y@j;o>UjQA$n@i}V2L5-g_3B%AI<ZWy8+>DEp&IV$UQ(A#(IR1r7|&J?-l
z3HvPSGa1m!(>Gf*wCvz5u0)K|%hQF@^G8UN!Asl<t>m;(;Wp-QmgHgv!#;f=qS~8c
zvYxN*^G~n&zj=roaSeB@*E59DqM>_i<)yNFL+xm>@ur#MUF|iu%$ED&7rfO81t5Ot
zyqkMN_ospLv+?0CEFa4kxg>_}D;u5ag}O^H7Q4ezAEG1Ih-^a1Cu`_!d~mAi#~4ER
zBi@`&n&>7R!$dH7a|YxJj}H&xmUcz5hGK@$G+Wdt)S6vY86IQd<^<Ptfjh#caW~e%
z&Qxq;M`P>E;zoL249pgd+DGYC&4hFm>2tM-vW_U_j?`(cy@;Z-WpT?BijZgCIiLBU
zEOCC#JF8qvyfv?=jodenJ52YSgKt{dgR+6hbN%`(A))lIrMSt>tO=E+xKVsozENe6
zbF-FwPD^=s-#o)K4eo!#*^BTZVa*JNCO~hJJdpN;FjUabnv1>Qz8Ik;d!AZu{-E}Z
zTQk8n^wZ<+?d7Lnns15epBJI8L!Nkv52>I@c$>QZka(jpNIyuTVyQy;np$qgLi+92
zizW7?L_5<*q*V0VXCUVTZlI@gT-`hfk^FR5@Ql&IbRG{~3hHmrB`{~Akul;UIpvK;
zP&(U}bm&{?l*}6+dMM-`!iTtN<*cp`E8t$&i%^iuAx)jvduv8gW3GQsm$c&i%LnQ2
zV{GyQHx)=WOfldZhL10w2C4~lJ`7YNdAF{Ii_UCj+@I1L7QlDl-GvR%d9;CbKd87}
zXb+V*&#p|=CBZ)4XDB+B6hkLf!o*IfVA}S*Pg`_2mXa%U0YvfI3F-|=UYN5G$<mW2
zi%@FYF}*pKu^9J|l7FWEb}1}_L<V-DjX%ijcFV`Rm*k961M~@r<4f|>Ro?fJP)OSj
z_zp8xwj7e*Js8C~SXJ01$Vm8p@9(?iqm96i-7*v@0{yvNhW!KY1%}=GXdUl^|0U?P
zk;d!Dmtc}(hYEPnaSs$UE|roCO_GC6wX*P!yeYg~o9wDHBM`w6Q?E|~jUGfx<apwy
zII9ikZ?q~gn6-6zX$qSrtEsour0P<`l0xkjMYd<~`>VkvPi}zhopvB*ZJDP|eN)Bn
z`bI=n^QH6(sci&#az9s|%xK`%q>eiUJK0uex|X#aVe13#uc~VnJi-Y{7rmph?zV7T
ze3%8m7mJ<U`X+9qNID*Dy!pgX+{A0{W;BFyh)87C{dlL~Rov&d*JK(&Z(SQ`BXx0*
zQ6<Ce6yf2$tOTqs{r_M|taW}^5(fJ-d>;(EQT#9b4jDx$Wi^fegWqvX{=hmYfALKI
z|GOP0i6*dL$qE0-?~qoM!Hxra``E+zi*54H<^c+5d$|B^kfMW?BX)vfy&5gco7Mmk
z3!vs)1p+Br9+7geL<e<e3rlA#vxHmG0^rhEyL$b3OWV!K$<ERq%dk13&-@4wc!H&J
z^hl4_!8NcU1Yn^cfUQ0P25j&}M8HNy*xmAvNQwXKbRK(cfAVU8VIGrp{%x3lxk*2J
z&aZG(*D)=~!VIAI0CvOjVvg*rb?jvR#T7YP)xY@$f18}xNRBQ?l`30X9Myzf^grBH
zX-7A}G|*oL2>W>;gSEJZv!jI@3J8YJjWV-#b_7E&L4_^>%sTE{uC7j^0zha=Kzh^J
z%;}aj%H@)y^G%*(rj#@gDj(oofyLxx<>Vj`I1&Ou$O1_U0udGja%_SE*AO5Fyeb%w
z3j*I^fDUl$ILCg+UW))aKoW-kNPrAmhu~G<9#A(nA^xp?Y}<ey!0~|~fJR}+qiC7}
zN8Pk`bdbSD-{h7Ng#j%>p)eRgm;t)Q4-w{qKzM$&&98@2$8%lU3}ARU-aNAJk)?k`
z$^2`^csg5Joy9t~XMvvo_<#{YLV`kIEAX)l28SU5rxE)BJN%GA1O<hFwRS=V1Dwwj
zG6)3A1v;Sz6**ZJ3PA!V6~F7jfG2RWT^Jk);dw$20(9kMyHE%W2AJcwb^)0X;D?@&
zVV&xeGGVOa|GOR(E_AYg!0#89>-D=HQsi%Ag8_W2ll=o^fQ5fA3xfjuypuA)qa%mk
z+5paBfMbIbWr4GZljDF1B4B^7Pvq}q;eg`F`rruoFZOeFHUoHk&PV@|LfhKg5<6e8
t?=~$*M^`Y`&_8~svF}6h(HcEk^)9Yv&aOu@3<gIak!LwLWYy)){ts^BhfM$g

literal 0
HcmV?d00001


From adaae29f02735489f997635856b885fbee4da52c Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Mon, 13 Apr 2026 14:25:23 +0800
Subject: [PATCH 4/5] feat(examples): add environment variable support for LLM
 configuration

Add support for configuring LLM settings through environment
variables (LLM_API_KEY, LLM_MODEL, LLM_ENDPOINT) that override
config file values. Update all examples to demonstrate both
environment variable usage and default config file approaches
with updated documentation.

The changes affect all example files to provide consistent
configuration methods and improve usability by allowing
runtime configuration without modifying source code or
configuration files.

Fixes related to workspace cleanup and metric display formatting
are also included as part of the refactoring.
---
 rust/examples/advanced.rs          | 25 +++++++++++++-----
 rust/examples/events.rs            | 19 ++++++++++++--
 rust/examples/flow.rs              | 30 +++++++++++++++-------
 rust/examples/graph.rs             | 18 ++++++++++---
 rust/examples/index_incremental.rs | 22 +++++++++++++---
 rust/examples/index_single.rs      | 41 ++++++++++++++++--------------
 rust/examples/indexing.rs          | 22 +++++++++++++---
 7 files changed, 130 insertions(+), 47 deletions(-)

diff --git a/rust/examples/advanced.rs b/rust/examples/advanced.rs
index a5c367b4..602fa435 100644
--- a/rust/examples/advanced.rs
+++ b/rust/examples/advanced.rs
@@ -9,10 +9,10 @@
 //! # Usage
 //!
 //! ```bash
-//! # First, copy the example config and edit it
-//! cp config.toml ./my_vectorless.toml
-//! # Edit my_vectorless.toml to customize settings
+//! # Using environment variables for LLM config (overrides config file):
+//! LLM_API_KEY=sk-xxx LLM_MODEL=gpt-4o cargo run --example advanced
 //!
+//! # Or with defaults (using config file):
 //! cargo run --example advanced
 //! ```
 
@@ -24,8 +24,21 @@ async fn main() -> vectorless::Result<()> {
 
     // Load all settings from the specified config file.
     // The config file must include api_key and model.
-    let client = EngineBuilder::new()
-        .with_config_path("./config.toml")
+    // If environment variables are set, they override the config file values.
+    let mut builder = EngineBuilder::new().with_config_path("./config.toml");
+    
+    // Override config with env vars if present
+    if let Ok(api_key) = std::env::var("LLM_API_KEY") {
+        builder = builder.with_key(&api_key);
+    }
+    if let Ok(model) = std::env::var("LLM_MODEL") {
+        builder = builder.with_model(&model);
+    }
+    if let Ok(endpoint) = std::env::var("LLM_ENDPOINT") {
+        builder = builder.with_endpoint(&endpoint);
+    }
+
+    let client = builder
         .build()
         .await
         .map_err(|e: vectorless::BuildError| vectorless::Error::Config(e.to_string()))?;
@@ -56,4 +69,4 @@ async fn main() -> vectorless::Result<()> {
 
     println!("\n=== Done ===");
     Ok(())
-}
+}
\ No newline at end of file
diff --git a/rust/examples/events.rs b/rust/examples/events.rs
index 65176751..59d8e3d2 100644
--- a/rust/examples/events.rs
+++ b/rust/examples/events.rs
@@ -11,6 +11,11 @@
 //! # Usage
 //!
 //! ```bash
+//! # Using environment variables for LLM config:
+//! LLM_API_KEY=sk-xxx LLM_MODEL=gpt-4o \
+//!   LLM_ENDPOINT=https://api.openai.com/v1 cargo run --example events
+//!
+//! # Or with defaults (edit the code to set your key/endpoint):
 //! cargo run --example events
 //! ```
 
@@ -90,12 +95,22 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
 
     println!("  ✓ Event handlers configured\n");
 
+    // Build engine with LLM configuration from environment or defaults.
+    // Adjust the defaults below to match your setup.
+    let api_key = std::env::var("LLM_API_KEY")
+        .unwrap_or_else(|_| "sk-...".to_string());
+    let model = std::env::var("LLM_MODEL")
+        .unwrap_or_else(|_| "gpt-4o".to_string());
+    let endpoint = std::env::var("LLM_ENDPOINT")
+        .unwrap_or_else(|_| "https://api.openai.com/v1".to_string());
+
     // 2. Create engine with events
     println!("Step 2: Creating engine with event emitter...");
     let engine = EngineBuilder::new()
         .with_workspace("./workspace_events_example")
-        .with_key("sk-...")
-        .with_model("gpt-4o")
+        .with_key(&api_key)
+        .with_model(&model)
+        .with_endpoint(&endpoint)
         .with_events(events)
         .build()
         .await
diff --git a/rust/examples/flow.rs b/rust/examples/flow.rs
index ff1b6ca7..4778bd44 100644
--- a/rust/examples/flow.rs
+++ b/rust/examples/flow.rs
@@ -12,6 +12,11 @@
 //! # Usage
 //!
 //! ```bash
+//! # Using environment variables for LLM config:
+//! LLM_API_KEY=sk-xxx LLM_MODEL=gpt-4o \
+//!   LLM_ENDPOINT=https://api.openai.com/v1 cargo run --example flow
+//!
+//! # Or with defaults (edit the code to set your key/endpoint):
 //! cargo run --example flow
 //! ```
 
@@ -54,14 +59,23 @@ async fn main() -> vectorless::Result<()> {
 
     println!("=== Vectorless Flow Example ===\n");
 
+    // Build engine with LLM configuration from environment or defaults.
+    // Adjust the defaults below to match your setup.
+    let api_key = std::env::var("LLM_API_KEY")
+        .unwrap_or_else(|_| "sk-...".to_string());
+    let model = std::env::var("LLM_MODEL")
+        .unwrap_or_else(|_| "gpt-4o".to_string());
+    let endpoint = std::env::var("LLM_ENDPOINT")
+        .unwrap_or_else(|_| "https://api".to_string());
+
     // Step 1: Create a Vectorless client
     println!("Step 1: Creating Vectorless client...");
 
     let engine = EngineBuilder::new()
         .with_workspace("./worksspace_flow_example")
-        .with_key("sk...")
-        .with_model("gpt-4o")
-        .with_endpoint("https://api")
+        .with_key(&api_key)
+        .with_model(&model)
+        .with_endpoint(&endpoint)
         .build()
         .await
         .map_err(|e| vectorless::Error::Config(e.to_string()))?;
@@ -130,12 +144,10 @@ async fn main() -> vectorless::Result<()> {
         println!();
     }
 
-    // Step 5: Cleanup
-    println!("Step 5: Cleanup...");
-
-    // engine.remove(&doc_id).await?;
-    // println!("  - Document removed");
+    // Cleanup
+    for doc in engine.list().await? {
+        engine.remove(&doc.id).await?;
+    }
 
-    println!("\n=== Example Complete ===");
     Ok(())
 }
diff --git a/rust/examples/graph.rs b/rust/examples/graph.rs
index cdefb451..61033da1 100644
--- a/rust/examples/graph.rs
+++ b/rust/examples/graph.rs
@@ -10,6 +10,11 @@
 //! # Usage
 //!
 //! ```bash
+//! # Using environment variables for LLM config:
+//! LLM_API_KEY=sk-xxx LLM_MODEL=gpt-4o \
+//!   cargo run --example graph
+//!
+//! # Or with defaults (edit the code to set your key/endpoint):
 //! cargo run --example graph
 //! ```
 
@@ -19,11 +24,18 @@ use vectorless::{EngineBuilder, IndexContext};
 async fn main() -> vectorless::Result<()> {
     println!("=== Document Graph Example ===\n");
 
+    // Build engine with LLM configuration from environment or defaults.
+    // Adjust the defaults below to match your setup.
+    let api_key = std::env::var("LLM_API_KEY")
+        .unwrap_or_else(|_| "sk-...".to_string());
+    let model = std::env::var("LLM_MODEL")
+        .unwrap_or_else(|_| "gpt-4o".to_string());
+
     // 1. Create engine
     let engine = EngineBuilder::new()
         .with_workspace("./workspace_graph_example")
-        .with_key("sk-...")
-        .with_model("gpt-4o")
+        .with_key(&api_key)
+        .with_model(&model)
         .build()
         .await
         .map_err(|e: vectorless::BuildError| vectorless::Error::Config(e.to_string()))?;
@@ -91,4 +103,4 @@ async fn main() -> vectorless::Result<()> {
 
     println!("\n=== Done ===");
     Ok(())
-}
+}
\ No newline at end of file
diff --git a/rust/examples/index_incremental.rs b/rust/examples/index_incremental.rs
index 6b710a93..078a5ed0 100644
--- a/rust/examples/index_incremental.rs
+++ b/rust/examples/index_incremental.rs
@@ -4,6 +4,11 @@
 //! Incremental indexing example — re-index with change detection.
 //!
 //! ```bash
+//! # Using environment variables for LLM config:
+//! LLM_API_KEY=sk-xxx LLM_MODEL=google/gemini-3-flash-preview \
+//!   LLM_ENDPOINT=http://localhost:4000/api/v1 cargo run --example index_incremental
+//!
+//! # Or with defaults (edit the code to set your key/endpoint):
 //! cargo run --example index_incremental
 //! ```
 
@@ -11,11 +16,20 @@ use vectorless::{DocumentFormat, EngineBuilder, IndexContext, IndexMode};
 
 #[tokio::main]
 async fn main() -> vectorless::Result<()> {
+    // Build engine with LLM configuration from environment or defaults.
+    // Adjust the defaults below to match your setup.
+    let api_key = std::env::var("LLM_API_KEY")
+        .unwrap_or_else(|_| "sk-or-v1-...".to_string());
+    let model = std::env::var("LLM_MODEL")
+        .unwrap_or_else(|_| "google/gemini-3-flash-preview".to_string());
+    let endpoint = std::env::var("LLM_ENDPOINT")
+        .unwrap_or_else(|_| "http://localhost:4000/api/v1".to_string());
+
     let engine = EngineBuilder::new()
         .with_workspace("./workspace_incremental_example")
-        .with_key("sk-or-v1-...")
-        .with_model("google/gemini-3-flash-preview")
-        .with_endpoint("http://localhost:4000/api/v1")
+        .with_key(&api_key)
+        .with_model(&model)
+        .with_endpoint(&endpoint)
         .build()
         .await
         .map_err(|e| vectorless::Error::Config(e.to_string()))?;
@@ -93,4 +107,4 @@ Deletes a user by their unique identifier.
     }
 
     Ok(())
-}
+}
\ No newline at end of file
diff --git a/rust/examples/index_single.rs b/rust/examples/index_single.rs
index 3a5632f0..4fe0a522 100644
--- a/rust/examples/index_single.rs
+++ b/rust/examples/index_single.rs
@@ -4,6 +4,11 @@
 //! Single document indexing example — index one document from content.
 //!
 //! ```bash
+//! # Using environment variables for LLM config:
+//! LLM_API_KEY=sk-xxx LLM_MODEL=google/gemini-3-flash-preview \
+//!   LLM_ENDPOINT=http://localhost:4000/api/v1 cargo run --example index_single
+//!
+//! # Or with defaults (edit the code to set your key/endpoint):
 //! cargo run --example index_single
 //! ```
 
@@ -11,11 +16,20 @@ use vectorless::{DocumentFormat, EngineBuilder, IndexContext};
 
 #[tokio::main]
 async fn main() -> vectorless::Result<()> {
+    // Build engine with LLM configuration from environment or defaults.
+    // Adjust the defaults below to match your setup.
+    let api_key = std::env::var("LLM_API_KEY")
+        .unwrap_or_else(|_| "sk-or-v1-...".to_string());
+    let model = std::env::var("LLM_MODEL")
+        .unwrap_or_else(|_| "google/gemini-3-flash-preview".to_string());
+    let endpoint = std::env::var("LLM_ENDPOINT")
+        .unwrap_or_else(|_| "http://localhost:4000/api/v1".to_string());
+
     let engine = EngineBuilder::new()
         .with_workspace("./workspace_single_example")
-        .with_key("sk-or-v1-...")
-        .with_model("google/gemini-3-flash-preview")
-        .with_endpoint("http://localhost:4000/api/v1")
+        .with_key(&api_key)
+        .with_model(&model)
+        .with_endpoint(&endpoint)
         .build()
         .await
         .map_err(|e| vectorless::Error::Config(e.to_string()))?;
@@ -69,21 +83,10 @@ Monitoring is implemented using a Prometheus and Grafana stack, with custom metr
         println!("name:    {}", item.name);
         println!("format:  {:?}", item.format);
 
-        if let Some(metrics) = &item.metrics {
-            println!("  metrics:");
-            println!("    total time:  {}ms", metrics.total_time_ms());
-            println!("    parse:       {}ms", metrics.parse_time_ms);
-            println!("    build:       {}ms", metrics.build_time_ms);
-            println!("    enhance:     {}ms", metrics.enhance_time_ms);
-            println!("    enrich:      {}ms", metrics.enrich_time_ms);
-            println!("    optimize:    {}ms", metrics.optimize_time_ms);
-            println!("    reasoning:   {}ms", metrics.reasoning_index_time_ms);
-            println!("    nodes:       {}", metrics.nodes_processed);
-            println!("    summaries:   {}", metrics.summaries_generated);
-            println!("    llm calls:   {}", metrics.llm_calls);
-            println!("    tokens:      {}", metrics.total_tokens_generated);
-            println!("    topics:      {}", metrics.topics_indexed);
-            println!("    keywords:    {}", metrics.keywords_indexed);
+        if let Some(ref metrics) = item.metrics {
+            println!("time:    {}ms", metrics.total_time_ms());
+            println!("nodes:   {}", metrics.nodes_processed);
+            println!("tokens:  {}", metrics.total_tokens_generated);
         }
     }
 
@@ -93,4 +96,4 @@ Monitoring is implemented using a Prometheus and Grafana stack, with custom metr
     }
 
     Ok(())
-}
+}
\ No newline at end of file
diff --git a/rust/examples/indexing.rs b/rust/examples/indexing.rs
index 53d8fe92..ecc0eb83 100644
--- a/rust/examples/indexing.rs
+++ b/rust/examples/indexing.rs
@@ -4,6 +4,11 @@
 //! Batch indexing example — index multiple documents at once.
 //!
 //! ```bash
+//! # Using environment variables for LLM config:
+//! LLM_API_KEY=sk-xxx LLM_MODEL=google/gemini-3-flash-preview \
+//!   LLM_ENDPOINT=http://localhost:4000/api/v1 cargo run --example indexing
+//!
+//! # Or with defaults (edit the code to set your key/endpoint):
 //! cargo run --example indexing
 //! ```
 
@@ -11,11 +16,20 @@ use vectorless::{EngineBuilder, IndexContext};
 
 #[tokio::main]
 async fn main() -> vectorless::Result<()> {
+    // Build engine with LLM configuration from environment or defaults.
+    // Adjust the defaults below to match your setup.
+    let api_key = std::env::var("LLM_API_KEY")
+        .unwrap_or_else(|_| "sk-or-v1-...".to_string());
+    let model = std::env::var("LLM_MODEL")
+        .unwrap_or_else(|_| "google/gemini-3-flash-preview".to_string());
+    let endpoint = std::env::var("LLM_ENDPOINT")
+        .unwrap_or_else(|_| "http://localhost:4000/api/v1".to_string());
+
     let engine = EngineBuilder::new()
         .with_workspace("./workspace_batch_example")
-        .with_key("sk-or-v1-...")
-        .with_model("google/gemini-3-flash-preview")
-        .with_endpoint("http://localhost:4000/api/v1")
+        .with_key(&api_key)
+        .with_model(&model)
+        .with_endpoint(&endpoint)
         .build()
         .await
         .map_err(|e| vectorless::Error::Config(e.to_string()))?;
@@ -43,4 +57,4 @@ async fn main() -> vectorless::Result<()> {
     }
 
     Ok(())
-}
+}
\ No newline at end of file

From 524c3423d8aa0beea8e4ef4ab94f62fae044bb10 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Mon, 13 Apr 2026 15:56:20 +0800
Subject: [PATCH 5/5] feat: add tracing initialization to examples and enhance
 PDF parsing

- Add tracing_subscriber::fmt::init() to all examples for debug output
- Modify parse functions to accept optional LLM client for enhanced PDF processing
- Update PDF parser to use external LLM client for TOC extraction and structure analysis
- Add with_llm_client constructors to TOC processing components
- Improve error handling in event example by removing redundant error mapping
- Update examples to use cleaner output formatting and better documentation
---
 rust/examples/advanced.rs                     |  3 +
 rust/examples/events.rs                       | 93 ++++++-------------
 rust/examples/graph.rs                        |  3 +
 rust/examples/index_incremental.rs            |  3 +
 rust/examples/index_pdf.rs                    | 28 ++++--
 rust/examples/index_single.rs                 |  3 +
 rust/examples/indexing.rs                     | 26 +++---
 rust/src/index/parse/mod.rs                   | 29 +++++-
 rust/src/index/parse/pdf/parser.rs            | 39 ++++++--
 rust/src/index/parse/toc/assigner.rs          |  8 ++
 rust/src/index/parse/toc/detector.rs          | 14 +++
 rust/src/index/parse/toc/parser.rs            |  8 ++
 rust/src/index/parse/toc/processor.rs         | 41 +++++++-
 rust/src/index/parse/toc/repairer.rs          |  8 ++
 .../index/parse/toc/structure_extractor.rs    |  5 +
 rust/src/index/parse/toc/verifier.rs          |  8 ++
 rust/src/index/pipeline/executor.rs           |  3 +-
 rust/src/index/stages/enhance.rs              |  6 ++
 rust/src/index/stages/parse.rs                | 24 ++++-
 19 files changed, 247 insertions(+), 105 deletions(-)

diff --git a/rust/examples/advanced.rs b/rust/examples/advanced.rs
index 602fa435..a75608d1 100644
--- a/rust/examples/advanced.rs
+++ b/rust/examples/advanced.rs
@@ -20,6 +20,9 @@ use vectorless::{EngineBuilder, IndexContext, QueryContext};
 
 #[tokio::main]
 async fn main() -> vectorless::Result<()> {
+    // Initialize tracing for debug output (set RUST_LOG=debug to see more)
+    tracing_subscriber::fmt::init();
+
     println!("=== Vectorless Advanced Example (Config File) ===\n");
 
     // Load all settings from the specified config file.
diff --git a/rust/examples/events.rs b/rust/examples/events.rs
index 59d8e3d2..b0433dc7 100644
--- a/rust/examples/events.rs
+++ b/rust/examples/events.rs
@@ -27,6 +27,9 @@ use vectorless::events::{EventEmitter, IndexEvent, QueryEvent};
 
 #[tokio::main]
 async fn main() -> Result<(), Box<dyn std::error::Error>> {
+    // Initialize tracing for debug output (set RUST_LOG=debug to see more)
+    tracing_subscriber::fmt::init();
+
     println!("=== Event Callbacks Example ===\n");
 
     // 1. Create event emitter with handlers
@@ -113,79 +116,43 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
         .with_endpoint(&endpoint)
         .with_events(events)
         .build()
-        .await
-        .map_err(|e: vectorless::BuildError| vectorless::Error::Config(e.to_string()))?;
+        .await?;
     println!("  ✓ Engine created\n");
 
-    // 3. Index a document (events will fire)
-    println!("Step 3: Indexing document (watch events)...\n");
-
-    let temp_dir = tempfile::tempdir()?;
-    let doc_content = r#"# Example Document
-
-## Introduction
-
-This is an example document for demonstrating event callbacks.
-
-## Features
-
-- Event monitoring for indexing
-- Event monitoring for queries
-- Progress tracking
-
-## Architecture
-
-The event system uses handlers that can be attached to the engine builder.
-"#;
-
-    let doc_path = temp_dir.path().join("example.md");
-    tokio::fs::write(&doc_path, doc_content).await?;
-
-    let index_result = engine.index(IndexContext::from_path(&doc_path)).await?;
-    let doc_id = index_result.doc_id().unwrap().to_string();
-    println!();
-
-    // 4. Query the document (events will fire)
-    println!("Step 4: Querying document (watch events)...\n");
-
+    // 3. Index a document with events
+    println!("Step 3: Indexing document (with events)...");
     let result = engine
-        .query(QueryContext::new("What features are available?").with_doc_id(&doc_id))
+        .index(IndexContext::from_path("../README.md"))
         .await?;
-    println!();
+    let doc_id = result.doc_id().unwrap().to_string();
+    println!("  ✓ Indexed: {doc_id}\n");
 
-    // 5. Show results
-    println!("Step 5: Query result:");
+    // 4. Query with events
+    println!("Step 4: Querying (with events)...");
+    let result = engine
+        .query(
+            QueryContext::new("What is vectorless?")
+                .with_doc_id(&doc_id)
+        )
+        .await?;
     if let Some(item) = result.single() {
-        println!("  - Score: {:.2}", item.score);
-        println!("  - Nodes: {}", item.node_ids.len());
+        println!("  ✓ Found result ({} chars)", item.content.len());
         if !item.content.is_empty() {
-            let preview: String = item.content.chars().take(100).collect();
-            println!("  - Content: {}...", preview);
+            let preview: String = item.content.chars().take(200).collect();
+            println!("  Preview: {}...", preview);
         }
     }
-    println!();
-
-    // 6. Show statistics
-    println!("Step 6: Event statistics:");
-    println!(
-        "  - Index events fired: {}",
-        index_count.load(Ordering::SeqCst)
-    );
-    println!(
-        "  - Query events fired: {}",
-        query_count.load(Ordering::SeqCst)
-    );
-    println!(
-        "  - Nodes visited: {}",
-        nodes_visited.load(Ordering::SeqCst)
-    );
-    println!();
-
-    // 7. Cleanup
-    println!("Step 7: Cleanup...");
+
+    // 5. Stats
+    println!("\n--- Stats ---");
+    println!("  Documents indexed: {}", index_count.load(Ordering::SeqCst));
+    println!("  Queries executed: {}", query_count.load(Ordering::SeqCst));
+    println!("  Nodes visited: {}", nodes_visited.load(Ordering::SeqCst));
+
+    // Cleanup
     engine.remove(&doc_id).await?;
-    println!("  ✓ Document removed\n");
+    println!("\n  Cleaned up");
 
-    println!("=== Example Complete ===");
+    println!("\n=== Done ===");
     Ok(())
 }
diff --git a/rust/examples/graph.rs b/rust/examples/graph.rs
index 61033da1..ac87a673 100644
--- a/rust/examples/graph.rs
+++ b/rust/examples/graph.rs
@@ -22,6 +22,9 @@ use vectorless::{EngineBuilder, IndexContext};
 
 #[tokio::main]
 async fn main() -> vectorless::Result<()> {
+    // Initialize tracing for debug output (set RUST_LOG=debug to see more)
+    tracing_subscriber::fmt::init();
+
     println!("=== Document Graph Example ===\n");
 
     // Build engine with LLM configuration from environment or defaults.
diff --git a/rust/examples/index_incremental.rs b/rust/examples/index_incremental.rs
index 078a5ed0..32254d7d 100644
--- a/rust/examples/index_incremental.rs
+++ b/rust/examples/index_incremental.rs
@@ -16,6 +16,9 @@ use vectorless::{DocumentFormat, EngineBuilder, IndexContext, IndexMode};
 
 #[tokio::main]
 async fn main() -> vectorless::Result<()> {
+    // Initialize tracing for debug output (set RUST_LOG=debug to see more)
+    tracing_subscriber::fmt::init();
+
     // Build engine with LLM configuration from environment or defaults.
     // Adjust the defaults below to match your setup.
     let api_key = std::env::var("LLM_API_KEY")
diff --git a/rust/examples/index_pdf.rs b/rust/examples/index_pdf.rs
index 244ca6a2..c7840e14 100644
--- a/rust/examples/index_pdf.rs
+++ b/rust/examples/index_pdf.rs
@@ -18,6 +18,10 @@ use vectorless::{EngineBuilder, IndexContext};
 
 #[tokio::main]
 async fn main() -> vectorless::Result<()> {
+    // Initialize tracing so we can see pipeline logs.
+    // Set RUST_LOG=info or RUST_LOG=debug for more detail.
+    tracing_subscriber::fmt::init();
+
     let args: Vec<String> = std::env::args().collect();
 
     let pdf_path = args.get(1).map(|s| s.as_str()).unwrap_or_else(|| {
@@ -33,7 +37,6 @@ async fn main() -> vectorless::Result<()> {
     println!("=== Indexing PDF: {} ===\n", pdf_path);
 
     // Build engine with LLM configuration from environment or defaults.
-    // Adjust the defaults below to match your setup.
     let api_key = std::env::var("LLM_API_KEY")
         .unwrap_or_else(|_| "sk-or-v1-...".to_string());
     let model = std::env::var("LLM_MODEL")
@@ -41,6 +44,13 @@ async fn main() -> vectorless::Result<()> {
     let endpoint = std::env::var("LLM_ENDPOINT")
         .unwrap_or_else(|_| "http://localhost:4000/api/v1".to_string());
 
+    tracing::info!(
+        "LLM config — key: {}..., model: {}, endpoint: {}",
+        &api_key[..api_key.len().min(8)],
+        model,
+        endpoint
+    );
+
     let engine = EngineBuilder::new()
         .with_workspace("./workspace_pdf_example")
         .with_key(&api_key)
@@ -50,13 +60,6 @@ async fn main() -> vectorless::Result<()> {
         .await
         .map_err(|e| vectorless::Error::Config(e.to_string()))?;
 
-    // Index the PDF — format is auto-detected from the .pdf extension.
-    // The engine will:
-    //   1. Extract text from every page
-    //   2. Detect and parse the Table of Contents
-    //   3. Build a hierarchical document tree
-    //   4. Generate summaries for each node (LLM)
-    //   5. Build a reasoning index for retrieval
     let result = engine
         .index(IndexContext::from_path(pdf_path))
         .await?;
@@ -84,6 +87,13 @@ async fn main() -> vectorless::Result<()> {
             println!("  tokens:        {}", metrics.total_tokens_generated);
             println!("  topics:        {}", metrics.topics_indexed);
             println!("  keywords:      {}", metrics.keywords_indexed);
+
+            if metrics.llm_calls == 0 {
+                println!("\n  *** WARNING: No LLM calls were made. ***");
+                println!("  Set RUST_LOG=info to see pipeline logs:");
+                println!("    RUST_LOG=info cargo run --example index_pdf -- <path>");
+                println!("  Check LLM_API_KEY, LLM_MODEL, and LLM_ENDPOINT are valid.");
+            }
         }
     }
 
@@ -91,7 +101,7 @@ async fn main() -> vectorless::Result<()> {
         eprintln!("FAILED: {} — {}", fail.source, fail.error);
     }
 
-    // Cleanup workspace
+    // Cleanup workspace (uncomment to clean up after run)
     for doc in engine.list().await? {
         engine.remove(&doc.id).await?;
     }
diff --git a/rust/examples/index_single.rs b/rust/examples/index_single.rs
index 4fe0a522..55ec52d5 100644
--- a/rust/examples/index_single.rs
+++ b/rust/examples/index_single.rs
@@ -16,6 +16,9 @@ use vectorless::{DocumentFormat, EngineBuilder, IndexContext};
 
 #[tokio::main]
 async fn main() -> vectorless::Result<()> {
+    // Initialize tracing for debug output (set RUST_LOG=debug to see more)
+    tracing_subscriber::fmt::init();
+
     // Build engine with LLM configuration from environment or defaults.
     // Adjust the defaults below to match your setup.
     let api_key = std::env::var("LLM_API_KEY")
diff --git a/rust/examples/indexing.rs b/rust/examples/indexing.rs
index ecc0eb83..e4489d29 100644
--- a/rust/examples/indexing.rs
+++ b/rust/examples/indexing.rs
@@ -1,7 +1,7 @@
 // Copyright (c) 2026 vectorless developers
 // SPDX-License-Identifier: Apache-2.0
 
-//! Batch indexing example — index multiple documents at once.
+//! Batch indexing example — index multiple documents via the vectorless engine.
 //!
 //! ```bash
 //! # Using environment variables for LLM config:
@@ -16,6 +16,9 @@ use vectorless::{EngineBuilder, IndexContext};
 
 #[tokio::main]
 async fn main() -> vectorless::Result<()> {
+    // Initialize tracing for debug output (set RUST_LOG=debug to see more)
+    tracing_subscriber::fmt::init();
+
     // Build engine with LLM configuration from environment or defaults.
     // Adjust the defaults below to match your setup.
     let api_key = std::env::var("LLM_API_KEY")
@@ -34,21 +37,20 @@ async fn main() -> vectorless::Result<()> {
         .await
         .map_err(|e| vectorless::Error::Config(e.to_string()))?;
 
-    // Index multiple files from different paths
+    // Index multiple documents in a single call.
+    // Paths are resolved relative to the workspace directory.
     let result = engine
-        .index(IndexContext::from_paths(&[
-            "../README.md",
-            "../CLAUDE.md",
-            "../LICENSE",
-        ]))
+        .index(
+            IndexContext::from_paths(&["../README.md", "../CLAUDE.md"]))
         .await?;
 
-    println!("indexed: {}, failed: {}", result.items.len(), result.failed.len());
+    println!("Indexed {} document(s)", result.items.len());
     for item in &result.items {
-        println!("  {} — doc_id: {}", item.name, item.doc_id);
-    }
-    for fail in &result.failed {
-        println!("  FAILED: {} — {}", fail.source, fail.error);
+        println!("  - {} ({})", item.name, item.doc_id);
+        if let Some(metrics) = &item.metrics {
+            println!("    Time: {}ms", metrics.total_time_ms());
+            println!("    Nodes: {}", metrics.nodes_processed);
+        }
     }
 
     // Cleanup
diff --git a/rust/src/index/parse/mod.rs b/rust/src/index/parse/mod.rs
index 9fd5a042..0bcba9f4 100644
--- a/rust/src/index/parse/mod.rs
+++ b/rust/src/index/parse/mod.rs
@@ -27,9 +27,14 @@ use std::path::Path;
 
 use crate::error::Result;
 use crate::index::parse::markdown::MarkdownParser;
+use crate::llm::LlmClient;
 
 /// Parse a string content document.
-pub async fn parse_content(content: &str, format: DocumentFormat) -> Result<ParseResult> {
+pub async fn parse_content(
+    content: &str,
+    format: DocumentFormat,
+    _llm_client: Option<LlmClient>,
+) -> Result<ParseResult> {
     match format {
         DocumentFormat::Markdown => {
             let parser = MarkdownParser::new();
@@ -42,21 +47,32 @@ pub async fn parse_content(content: &str, format: DocumentFormat) -> Result<Pars
 }
 
 /// Parse a file.
-pub async fn parse_file(path: &Path, format: DocumentFormat) -> Result<ParseResult> {
+pub async fn parse_file(
+    path: &Path,
+    format: DocumentFormat,
+    llm_client: Option<LlmClient>,
+) -> Result<ParseResult> {
     match format {
         DocumentFormat::Markdown => {
             let parser = MarkdownParser::new();
             parser.parse_file(path).await
         }
         DocumentFormat::Pdf => {
-            let parser = pdf::PdfParser::new();
+            let parser = match llm_client {
+                Some(client) => pdf::PdfParser::with_llm_client(client),
+                None => pdf::PdfParser::new(),
+            };
             parser.parse_file(path).await
         }
     }
 }
 
 /// Parse binary data.
-pub async fn parse_bytes(bytes: &[u8], format: DocumentFormat) -> Result<ParseResult> {
+pub async fn parse_bytes(
+    bytes: &[u8],
+    format: DocumentFormat,
+    llm_client: Option<LlmClient>,
+) -> Result<ParseResult> {
     match format {
         DocumentFormat::Markdown => {
             let content = std::str::from_utf8(bytes)
@@ -65,7 +81,10 @@ pub async fn parse_bytes(bytes: &[u8], format: DocumentFormat) -> Result<ParseRe
             parser.parse(content).await
         }
         DocumentFormat::Pdf => {
-            let parser = pdf::PdfParser::new();
+            let parser = match llm_client {
+                Some(client) => pdf::PdfParser::with_llm_client(client),
+                None => pdf::PdfParser::new(),
+            };
             parser.parse_bytes_async(bytes, None).await
         }
     }
diff --git a/rust/src/index/parse/pdf/parser.rs b/rust/src/index/parse/pdf/parser.rs
index b2ae6b5d..7702872b 100644
--- a/rust/src/index/parse/pdf/parser.rs
+++ b/rust/src/index/parse/pdf/parser.rs
@@ -15,14 +15,16 @@ use tracing::{info, warn};
 use crate::Error;
 use crate::error::Result;
 use crate::index::parse::toc::TocProcessor;
+use crate::llm::LlmClient;
 
 use super::types::{PdfMetadata, PdfPage, PdfParseResult};
 use crate::index::parse::{DocumentFormat, DocumentMeta, ParseResult, RawNode};
 
 /// PDF document parser.
-#[derive(Debug, Clone)]
 pub struct PdfParser {
     config: PdfParserConfig,
+    /// Optional LLM client for TOC extraction and structure analysis.
+    llm_client: Option<LlmClient>,
 }
 
 /// PDF parser configuration.
@@ -50,17 +52,31 @@ impl PdfParser {
         Self::default()
     }
 
+    /// Create a PDF parser with an externally provided LLM client.
+    pub fn with_llm_client(client: LlmClient) -> Self {
+        Self {
+            config: PdfParserConfig::default(),
+            llm_client: Some(client),
+        }
+    }
+
     /// Create a parser with custom configuration.
     pub fn with_config(config: PdfParserConfig) -> Self {
-        Self { config }
+        Self {
+            config,
+            llm_client: None,
+        }
     }
 
     /// Create a parser without TOC extraction.
     pub fn without_toc() -> Self {
-        Self::with_config(PdfParserConfig {
-            extract_toc: false,
-            ..Default::default()
-        })
+        Self {
+            config: PdfParserConfig {
+                extract_toc: false,
+                ..Default::default()
+            },
+            llm_client: None,
+        }
     }
 
     /// Parse PDF from bytes and return raw pages.
@@ -274,7 +290,16 @@ impl PdfParser {
         let nodes = if self.config.extract_toc {
             info!("Extracting TOC from PDF with {} pages", page_count);
 
-            let processor = TocProcessor::new();
+            let processor = match &self.llm_client {
+                Some(client) => {
+                    info!("PdfParser: creating TocProcessor with LLM client");
+                    TocProcessor::with_llm_client(client.clone())
+                }
+                None => {
+                    info!("PdfParser: creating TocProcessor without LLM client (no key configured)");
+                    TocProcessor::new()
+                }
+            };
             match processor.process(&result.pages).await {
                 Ok(entries) if !entries.is_empty() => {
                     info!("Extracted {} TOC entries", entries.len());
diff --git a/rust/src/index/parse/toc/assigner.rs b/rust/src/index/parse/toc/assigner.rs
index eefa3769..beff3021 100644
--- a/rust/src/index/parse/toc/assigner.rs
+++ b/rust/src/index/parse/toc/assigner.rs
@@ -50,6 +50,14 @@ impl PageAssigner {
         Self { config, client }
     }
 
+    /// Create an assigner with an externally provided LLM client.
+    pub fn with_client(client: LlmClient) -> Self {
+        Self {
+            config: PageAssignerConfig::default(),
+            client,
+        }
+    }
+
     /// Create an assigner with default configuration.
     pub fn with_defaults() -> Self {
         Self::new(PageAssignerConfig::default())
diff --git a/rust/src/index/parse/toc/detector.rs b/rust/src/index/parse/toc/detector.rs
index f179c507..032a18af 100644
--- a/rust/src/index/parse/toc/detector.rs
+++ b/rust/src/index/parse/toc/detector.rs
@@ -74,6 +74,20 @@ impl TocDetector {
         }
     }
 
+    /// Create a detector with an externally provided LLM client.
+    pub fn with_client(config: TocDetectorConfig, client: LlmClient) -> Self {
+        let use_llm = config.use_llm_fallback;
+        Self {
+            config,
+            llm_client: if use_llm {
+                Some(client)
+            } else {
+                None
+            },
+            patterns: Self::build_patterns(),
+        }
+    }
+
     /// Create a detector with default configuration.
     pub fn with_defaults() -> Self {
         Self::new(TocDetectorConfig::default())
diff --git a/rust/src/index/parse/toc/parser.rs b/rust/src/index/parse/toc/parser.rs
index 20b61af2..06aaade3 100644
--- a/rust/src/index/parse/toc/parser.rs
+++ b/rust/src/index/parse/toc/parser.rs
@@ -47,6 +47,14 @@ impl TocParser {
         Self { config, client }
     }
 
+    /// Create a parser with an externally provided LLM client.
+    pub fn with_client(client: LlmClient) -> Self {
+        Self {
+            config: TocParserConfig::default(),
+            client,
+        }
+    }
+
     /// Create a parser with default configuration.
     pub fn with_defaults() -> Self {
         Self::new(TocParserConfig::default())
diff --git a/rust/src/index/parse/toc/processor.rs b/rust/src/index/parse/toc/processor.rs
index b2dbc1cd..9ed2c95b 100644
--- a/rust/src/index/parse/toc/processor.rs
+++ b/rust/src/index/parse/toc/processor.rs
@@ -12,6 +12,7 @@ use tracing::{debug, info, warn};
 
 use crate::error::Result;
 use crate::index::parse::pdf::PdfPage;
+use crate::llm::LlmClient;
 
 use super::assigner::{PageAssigner, PageAssignerConfig};
 use super::detector::{TocDetector, TocDetectorConfig};
@@ -118,6 +119,8 @@ pub struct TocProcessor {
     assigner: PageAssigner,
     verifier: IndexVerifier,
     repairer: IndexRepairer,
+    /// Optional LLM client for StructureExtractor (no-TOC mode and refinement).
+    llm_client: Option<LlmClient>,
 }
 
 impl TocProcessor {
@@ -126,14 +129,34 @@ impl TocProcessor {
         Self::with_config(TocProcessorConfig::default())
     }
 
+    /// Create a TOC processor with an externally provided LLM client.
+    ///
+    /// All sub-components (detector, parser, assigner, verifier, repairer)
+    /// will use this client instead of creating their own from default config.
+    pub fn with_llm_client(client: LlmClient) -> Self {
+        info!("TocProcessor: created with external LLM client");
+        let config = TocProcessorConfig::default();
+        Self {
+            detector: TocDetector::with_client(config.detector.clone(), client.clone()),
+            parser: TocParser::with_client(client.clone()),
+            assigner: PageAssigner::with_client(client.clone()),
+            verifier: IndexVerifier::with_client(client.clone()),
+            repairer: IndexRepairer::with_client(client.clone()),
+            llm_client: Some(client),
+            config,
+        }
+    }
+
     /// Create a TOC processor with custom configuration.
     pub fn with_config(config: TocProcessorConfig) -> Self {
+        info!("TocProcessor: created with config (no external LLM client)");
         Self {
             detector: TocDetector::new(config.detector.clone()),
             parser: TocParser::new(config.parser.clone()),
             assigner: PageAssigner::new(config.assigner.clone()),
             verifier: IndexVerifier::new(config.verifier.clone()),
             repairer: IndexRepairer::new(config.repairer.clone()),
+            llm_client: None,
             config,
         }
     }
@@ -328,7 +351,12 @@ impl TocProcessor {
     async fn process_without_toc(&self, pages: &[PdfPage]) -> Result<Vec<TocEntry>> {
         info!("Extracting structure from page content (no TOC available)");
 
-        let extractor = StructureExtractor::new(StructureExtractorConfig::default());
+        let extractor = match &self.llm_client {
+            Some(client) => {
+                StructureExtractor::with_client(StructureExtractorConfig::default(), client.clone())
+            }
+            None => StructureExtractor::new(StructureExtractorConfig::default()),
+        };
         extractor.extract(pages).await
     }
 
@@ -402,6 +430,7 @@ impl TocProcessor {
             .collect();
 
         // Identify oversized entries and launch extractions concurrently
+        let llm_client = self.llm_client.clone();
         let oversized_futures: Vec<_> = entries
             .iter()
             .enumerate()
@@ -422,6 +451,7 @@ impl TocProcessor {
 
                 let entry_title = entry.title.clone();
                 let entry_level = entry.level;
+                let llm_client = llm_client.clone();
 
                 async move {
                     if sub_pages.is_empty() {
@@ -431,8 +461,13 @@ impl TocProcessor {
                         "Refining oversized entry '{}' (pages {}-{})",
                         entry_title, start, end
                     );
-                    let extractor =
-                        StructureExtractor::new(StructureExtractorConfig::default());
+                    let extractor = match &llm_client {
+                        Some(client) => StructureExtractor::with_client(
+                            StructureExtractorConfig::default(),
+                            client.clone(),
+                        ),
+                        None => StructureExtractor::new(StructureExtractorConfig::default()),
+                    };
                     match extractor.extract(&sub_pages).await {
                         Ok(sub_entries) => {
                             let skip = if sub_entries
diff --git a/rust/src/index/parse/toc/repairer.rs b/rust/src/index/parse/toc/repairer.rs
index 70498782..51931674 100644
--- a/rust/src/index/parse/toc/repairer.rs
+++ b/rust/src/index/parse/toc/repairer.rs
@@ -50,6 +50,14 @@ impl IndexRepairer {
         Self { config, client }
     }
 
+    /// Create a repairer with an externally provided LLM client.
+    pub fn with_client(client: LlmClient) -> Self {
+        Self {
+            config: RepairerConfig::default(),
+            client,
+        }
+    }
+
     /// Create a repairer with default configuration.
     pub fn with_defaults() -> Self {
         Self::new(RepairerConfig::default())
diff --git a/rust/src/index/parse/toc/structure_extractor.rs b/rust/src/index/parse/toc/structure_extractor.rs
index a6dd807d..17511b36 100644
--- a/rust/src/index/parse/toc/structure_extractor.rs
+++ b/rust/src/index/parse/toc/structure_extractor.rs
@@ -66,6 +66,11 @@ impl StructureExtractor {
         Self { config, client }
     }
 
+    /// Create a structure extractor with an externally provided LLM client.
+    pub fn with_client(config: StructureExtractorConfig, client: LlmClient) -> Self {
+        Self { config, client }
+    }
+
     /// Create an extractor with default configuration.
     pub fn with_defaults() -> Self {
         Self::new(StructureExtractorConfig::default())
diff --git a/rust/src/index/parse/toc/verifier.rs b/rust/src/index/parse/toc/verifier.rs
index 42186a09..09b28059 100644
--- a/rust/src/index/parse/toc/verifier.rs
+++ b/rust/src/index/parse/toc/verifier.rs
@@ -50,6 +50,14 @@ impl IndexVerifier {
         Self { config, client }
     }
 
+    /// Create a verifier with an externally provided LLM client.
+    pub fn with_client(client: LlmClient) -> Self {
+        Self {
+            config: VerifierConfig::default(),
+            client,
+        }
+    }
+
     /// Create a verifier with default configuration.
     pub fn with_defaults() -> Self {
         Self::new(VerifierConfig::default())
diff --git a/rust/src/index/pipeline/executor.rs b/rust/src/index/pipeline/executor.rs
index a80cf176..1538c7b3 100644
--- a/rust/src/index/pipeline/executor.rs
+++ b/rust/src/index/pipeline/executor.rs
@@ -81,8 +81,9 @@ impl PipelineExecutor {
     /// 7. `reasoning_index` - Build pre-computed reasoning index
     /// 8. `optimize` - Optimize tree
     pub fn with_llm(client: LlmClient) -> Self {
+        tracing::info!("PipelineExecutor::with_llm — cloning client to ParseStage + EnhanceStage");
         let orchestrator = PipelineOrchestrator::new()
-            .stage_with_priority(ParseStage::new(), 10)
+            .stage_with_priority(ParseStage::with_llm_client(client.clone()), 10)
             .stage_with_priority(BuildStage::new(), 20)
             .stage_with_priority(ValidateStage::new(), 22)
             .stage_with_priority(SplitStage::new(), 25)
diff --git a/rust/src/index/stages/enhance.rs b/rust/src/index/stages/enhance.rs
index 452089c0..5550de45 100644
--- a/rust/src/index/stages/enhance.rs
+++ b/rust/src/index/stages/enhance.rs
@@ -109,6 +109,12 @@ impl IndexStage for EnhanceStage {
     async fn execute(&mut self, ctx: &mut IndexContext) -> Result<StageResult> {
         let start = Instant::now();
 
+        info!(
+            "EnhanceStage: llm_client={}, strategy={:?}",
+            self.llm_client.is_some(),
+            ctx.options.summary_strategy
+        );
+
         // Check if we need summaries
         if !self.needs_summaries(ctx) {
             info!(
diff --git a/rust/src/index/stages/parse.rs b/rust/src/index/stages/parse.rs
index 98ef911b..6c8166b6 100644
--- a/rust/src/index/stages/parse.rs
+++ b/rust/src/index/stages/parse.rs
@@ -15,12 +15,22 @@ use crate::index::IndexMode;
 use crate::index::pipeline::{IndexContext, IndexInput};
 
 /// Parse stage - extracts raw nodes from documents.
-pub struct ParseStage;
+pub struct ParseStage {
+    /// Optional LLM client for PDF structure extraction.
+    llm_client: Option<crate::llm::LlmClient>,
+}
 
 impl ParseStage {
     /// Create a new parse stage.
     pub fn new() -> Self {
-        Self
+        Self { llm_client: None }
+    }
+
+    /// Create a parse stage with an LLM client.
+    pub fn with_llm_client(client: crate::llm::LlmClient) -> Self {
+        Self {
+            llm_client: Some(client),
+        }
     }
 
     /// Detect document format from path and options.
@@ -61,6 +71,10 @@ impl IndexStage for ParseStage {
         ctx.format = format;
 
         info!("Parsing document with format: {:?}", format);
+        info!(
+            "ParseStage llm_client present: {}",
+            self.llm_client.is_some()
+        );
 
         // Parse based on input type
         let result = match &ctx.input {
@@ -77,7 +91,7 @@ impl IndexStage for ParseStage {
                     .to_string();
 
                 // Parse directly
-                crate::index::parse::parse_file(&path, format).await?
+                crate::index::parse::parse_file(&path, format, self.llm_client.clone()).await?
             }
             IndexInput::Content {
                 content,
@@ -88,14 +102,14 @@ impl IndexStage for ParseStage {
                 ctx.name = name.clone();
 
                 // Parse content directly
-                crate::index::parse::parse_content(content, *format).await?
+                crate::index::parse::parse_content(content, *format, self.llm_client.clone()).await?
             }
             IndexInput::Bytes { data, name, format } => {
                 // Set name
                 ctx.name = name.clone();
 
                 // Parse bytes
-                crate::index::parse::parse_bytes(data, *format).await?
+                crate::index::parse::parse_bytes(data, *format, self.llm_client.clone()).await?
             }
         };