Skip to content
Merged

Dev #49

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -83,4 +83,7 @@ wheels/
.ruff_cache/
.venv/
venv/
ENV/
ENV/

# Test workspace
workspace*
2 changes: 1 addition & 1 deletion rust/examples/graph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ async fn main() -> vectorless::Result<()> {

// 2. Index documents — graph is rebuilt automatically
let result = engine
.index(IndexContext::from_paths(&["./README.md", "./CLAUDE.md"]))
.index(IndexContext::from_paths(&["../README.md", "../CLAUDE.md"]))
.await?;

println!("Indexed {} document(s)", result.items.len());
Expand Down
96 changes: 96 additions & 0 deletions rust/examples/index_incremental.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// Copyright (c) 2026 vectorless developers
// SPDX-License-Identifier: Apache-2.0

//! Incremental indexing example — re-index with change detection.
//!
//! ```bash
//! cargo run --example index_incremental
//! ```

use vectorless::{DocumentFormat, EngineBuilder, IndexContext, IndexMode};

#[tokio::main]
async fn main() -> vectorless::Result<()> {
let engine = EngineBuilder::new()
.with_workspace("./workspace_incremental_example")
.with_key("sk-or-v1-...")
.with_model("google/gemini-3-flash-preview")
.with_endpoint("http://localhost:4000/api/v1")
.build()
.await
.map_err(|e| vectorless::Error::Config(e.to_string()))?;

let content_v1 = r#"# API Reference

## GET /users

Returns a list of all users in the system.

## POST /users

Creates a new user account.
"#;

let content_v2 = r#"# API Reference

## GET /users

Returns a paginated list of users. Supports `?page=` and `?limit=` parameters.

## POST /users

Creates a new user account. Requires email and password fields.

## DELETE /users/:id

Deletes a user by their unique identifier.
"#;

// 1. Initial full index
println!("--- Initial index ---");
let result = engine
.index(IndexContext::from_content(content_v1, DocumentFormat::Markdown))
.await?;

let doc_id = result.items[0].doc_id.clone();
if let Some(m) = &result.items[0].metrics {
println!("indexed in {}ms, {} nodes", m.total_time_ms(), m.nodes_processed);
}

// 2. Re-index unchanged content (incremental) — skips processing
println!("\n--- Re-index unchanged (incremental) ---");
let result = engine
.index(
IndexContext::from_content(content_v1, DocumentFormat::Markdown)
.with_mode(IndexMode::Incremental),
)
.await?;

for item in &result.items {
println!("doc_id: {} (unchanged, skipped)", item.doc_id);
}

// 3. Re-index with changes (incremental) — detects diff and updates
println!("\n--- Re-index with changes (incremental) ---");
let result = engine
.index(
IndexContext::from_content(content_v2, DocumentFormat::Markdown)
.with_mode(IndexMode::Incremental),
)
.await?;

for item in &result.items {
if let Some(m) = &item.metrics {
println!("updated in {}ms, {} nodes", m.total_time_ms(), m.nodes_processed);
}
}

println!("\ndoc_id: {doc_id}");

// Cleanup
for doc in engine.list().await? {
engine.remove(&doc.id).await?;
}

Ok(())
}
86 changes: 86 additions & 0 deletions rust/examples/index_single.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// Copyright (c) 2026 vectorless developers
// SPDX-License-Identifier: Apache-2.0

//! Single document indexing example — index one document from content.
//!
//! ```bash
//! cargo run --example index_single
//! ```

use vectorless::{DocumentFormat, EngineBuilder, IndexContext};

#[tokio::main]
async fn main() -> vectorless::Result<()> {
let engine = EngineBuilder::new()
.with_workspace("./workspace_single_example")
.with_key("sk-or-v1-...")
.with_model("google/gemini-3-flash-preview")
.with_endpoint("http://localhost:4000/api/v1")
.build()
.await
.map_err(|e| vectorless::Error::Config(e.to_string()))?;

let content = r#"# Project Overview

## Introduction

This document describes the architecture of a distributed system
designed for high-throughput data processing.

## Components

### API Gateway

Handles authentication, rate limiting, and request routing.
Supports both REST and gRPC protocols.

### Worker Pool

Processes tasks from the message queue. Each worker handles
one task at a time with configurable timeout.

## Performance

Under load testing, the system achieves 50k requests/second
with p99 latency under 200ms.

## Conclusion

The modular design allows independent scaling of each component.
"#;

// Index from content string
let result = engine
.index(IndexContext::from_content(content, DocumentFormat::Markdown))
.await?;

for item in &result.items {
println!("doc_id: {}", item.doc_id);
println!("name: {}", item.name);
println!("format: {:?}", item.format);

if let Some(metrics) = &item.metrics {
println!(" metrics:");
println!(" total time: {}ms", metrics.total_time_ms());
println!(" parse: {}ms", metrics.parse_time_ms);
println!(" build: {}ms", metrics.build_time_ms);
println!(" enhance: {}ms", metrics.enhance_time_ms);
println!(" enrich: {}ms", metrics.enrich_time_ms);
println!(" optimize: {}ms", metrics.optimize_time_ms);
println!(" reasoning: {}ms", metrics.reasoning_index_time_ms);
println!(" nodes: {}", metrics.nodes_processed);
println!(" summaries: {}", metrics.summaries_generated);
println!(" llm calls: {}", metrics.llm_calls);
println!(" tokens: {}", metrics.total_tokens_generated);
println!(" topics: {}", metrics.topics_indexed);
println!(" keywords: {}", metrics.keywords_indexed);
}
}

// Cleanup
for doc in engine.list().await? {
engine.remove(&doc.id).await?;
}

Ok(())
}
116 changes: 26 additions & 90 deletions rust/examples/indexing.rs
Original file line number Diff line number Diff line change
@@ -1,110 +1,46 @@
// Copyright (c) 2026 vectorless developers
// SPDX-License-Identifier: Apache-2.0

//! Index pipeline example for Vectorless.
//!
//! Demonstrates the full indexing flow: create engine → index document → inspect metrics.
//!
//! # Usage
//! Batch indexing example — index multiple documents at once.
//!
//! ```bash
//! cargo run --example indexing
//! ```

use vectorless::{EngineBuilder, IndexContext, IndexMode};
use vectorless::{EngineBuilder, IndexContext};

#[tokio::main]
async fn main() -> vectorless::Result<()> {
println!("=== Index Pipeline Example ===\n");

// 1. Create engine
let engine = EngineBuilder::new()
.with_workspace("./workspace_index_example")
.with_key("sk-...")
.with_model("gpt-4o")
.with_workspace("./workspace_batch_example")
.with_key("sk-or-v1-...")
.with_model("google/gemini-3-flash-preview")
.with_endpoint("http://localhost:4000/api/v1")
.build()
.await
.map_err(|e: vectorless::BuildError| vectorless::Error::Config(e.to_string()))?;

println!("Engine created\n");

// 2. Index a single document with default options
println!("--- Single document (default mode) ---");
let result = engine.index(IndexContext::from_path("./README.md")).await?;

.map_err(|e| vectorless::Error::Config(e.to_string()))?;

// Index multiple files from different paths
let result = engine
.index(IndexContext::from_paths(&[
"../README.md",
"../CLAUDE.md",
"../LICENSE",
]))
.await?;

println!("indexed: {}, failed: {}", result.items.len(), result.failed.len());
for item in &result.items {
println!(" doc_id: {}", item.doc_id);
println!(" name: {}", item.name);
println!(" format: {:?}", item.format);

if let Some(ref metrics) = item.metrics {
println!(" metrics:");
println!(" total time: {}ms", metrics.total_time_ms());
println!(" parse: {}ms", metrics.parse_time_ms);
println!(" build: {}ms", metrics.build_time_ms);
println!(" enhance: {}ms", metrics.enhance_time_ms);
println!(" enrich: {}ms", metrics.enrich_time_ms);
println!(" optimize: {}ms", metrics.optimize_time_ms);
println!(" reasoning: {}ms", metrics.reasoning_index_time_ms);
println!(" nodes: {}", metrics.nodes_processed);
println!(" summaries: {}", metrics.summaries_generated);
println!(" llm calls: {}", metrics.llm_calls);
println!(" tokens: {}", metrics.total_tokens_generated);
println!(" topics: {}", metrics.topics_indexed);
println!(" keywords: {}", metrics.keywords_indexed);
}

// doc_id preserved across the loop for readability
let _doc_id = item.doc_id.clone();

// 3. Re-index with incremental mode — should detect no change
println!("\n--- Re-index (incremental, unchanged) ---");
let result2 = engine
.index(IndexContext::from_path("./README.md").with_mode(IndexMode::Incremental))
.await?;

for item in &result2.items {
println!(
" {} (metrics present: {})",
item.doc_id,
item.metrics.is_some()
);
}

// 4. Index multiple documents at once
println!("\n--- Batch indexing ---");
let batch = engine
.index(IndexContext::from_paths(&["./README.md", "./CLAUDE.md"]))
.await?;

println!(
" indexed: {}, failed: {}",
batch.items.len(),
batch.failed.len()
);
for item in &batch.items {
let time = item
.metrics
.as_ref()
.map(|m| m.total_time_ms())
.unwrap_or(0);
let nodes = item
.metrics
.as_ref()
.map(|m| m.nodes_processed)
.unwrap_or(0);
println!(" {} — {}ms, {} nodes", item.name, time, nodes);
}
println!(" {} — doc_id: {}", item.name, item.doc_id);
}
for fail in &result.failed {
println!(" FAILED: {} — {}", fail.source, fail.error);
}

// 5. Cleanup
println!("\n--- Cleanup ---");
let docs = engine.list().await?;
for doc in &docs {
engine.remove(&doc.id).await?;
}
println!(" removed {} document(s)", docs.len());
// Cleanup
for doc in engine.list().await? {
engine.remove(&doc.id).await?;
}

println!("\n=== Done ===");
Ok(())
}
10 changes: 6 additions & 4 deletions rust/src/client/engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,9 @@ impl Engine {
.await;
if items.is_empty() && !failed.is_empty() {
return Err(Error::Config(format!(
"All {} source(s) failed to index",
failed.len()
"All {} source(s) failed to index: {}",
failed.len(),
failed.iter().map(|f| format!("{} ({})", f.source, f.error)).collect::<Vec<_>>().join("; ")
)));
}
if !items.is_empty() {
Expand Down Expand Up @@ -207,8 +208,9 @@ impl Engine {

if items.is_empty() && !failed.is_empty() {
return Err(Error::Config(format!(
"All {} source(s) failed to index",
failed.len()
"All {} source(s) failed to index: {}",
failed.len(),
failed.iter().map(|f| format!("{} ({})", f.source, f.error)).collect::<Vec<_>>().join("; ")
)));
}

Expand Down
15 changes: 13 additions & 2 deletions rust/src/client/indexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -428,13 +428,24 @@ impl IndexerClient {
let logic_fp = pipeline_options.logic_fingerprint();
meta = meta.with_logic_fingerprint(logic_fp);

let mut persisted =
PersistedDocument::new(meta, doc.tree.expect("IndexedDocument must have a tree"));
let tree = doc.tree.expect("IndexedDocument must have a tree");

// Extract stats from metrics
let node_count = tree.node_count();
let (summary_tokens, duration_ms) = if let Some(ref m) = doc.metrics {
(m.total_tokens_generated, m.total_time_ms())
} else {
(0, 0)
};

let mut persisted = PersistedDocument::new(meta, tree);

for page in doc.pages {
persisted.add_page(page.page, &page.content);
}

persisted.meta.update_processing_stats(node_count, summary_tokens, duration_ms);

persisted
}
}
Expand Down
Loading