Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "vectorless"
version = "0.1.12"
version = "0.1.13"
edition = "2024"
authors = ["zTgx <beautifularea@gmail.com>"]
description = "Hierarchical, reasoning-native document intelligence engine"
Expand Down Expand Up @@ -59,6 +59,16 @@ indextree = { version = "4.8.0", features = ["deser"] }
# LRU cache
lru = "0.12"

# Checksum
sha2 = "0.10"

# Compression
flate2 = "1.0"

# File locking (Unix)
[target.'cfg(unix)'.dependencies]
libc = "0.2"

# PDF processing
pdf-extract = "0.10.0"
lopdf = "0.34"
Expand Down
18 changes: 2 additions & 16 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,22 +134,8 @@ async fn main() -> vectorless::Result<()> {

## Examples

See the [examples/](examples/) directory for complete working examples:

| Example | Description |
|---------|-------------|
| [basic.rs](examples/basic.rs) | Minimal ~30 line example showing core API |
| [index.rs](examples/index.rs) | Document indexing pipeline |
| [retrieve.rs](examples/retrieve.rs) | Retrieval pipeline with options |
| [events.rs](examples/events.rs) | Event-driven indexing with EventEmitter |
| [session.rs](examples/session.rs) | Session management with statistics |
| [batch_processing.rs](examples/batch_processing.rs) | Batch document processing |
| [content_aggregation.rs](examples/content_aggregation.rs) | Content aggregation strategies |
| [streaming.rs](examples/streaming.rs) | Streaming document processing |
| [multi_format.rs](examples/multi_format.rs) | Multi-format document support |
| [custom_pilot.rs](examples/custom_pilot.rs) | Custom pilot implementation |
| [cli_tool.rs](examples/cli_tool.rs) | CLI application example |
| [markdownflow.rs](examples/markdownflow.rs) | Markdown workflow example |
See the [examples/](examples/) directory for complete working examples


## Architecture

Expand Down
6 changes: 3 additions & 3 deletions examples/content_aggregation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@ use vectorless::retrieval::content::{
StructureBuilder, OutputFormat, RelevanceScorer, ScoringStrategyConfig,
ContentChunk, ScoringContext,
};
use vectorless::domain::NodeId;
use vectorless::document::NodeId;
use indextree::Arena;

fn make_node_id() -> NodeId {
let mut arena = Arena::new();
let node = vectorless::domain::TreeNode {
let node = vectorless::document::TreeNode {
title: "Test".to_string(),
structure: String::new(),
content: String::new(),
Expand Down Expand Up @@ -135,7 +135,7 @@ fn main() {

for (name, format) in formats {
let builder = StructureBuilder::new(format);
let tree = vectorless::domain::DocumentTree::new("Test", "");
let tree = vectorless::document::DocumentTree::new("Test", "");
let structured = builder.build(result.selected.clone(), &tree);

println!("\n{} Output ({} chars, {} tokens):", name, structured.content.len(), structured.metadata.total_tokens);
Expand Down
4 changes: 2 additions & 2 deletions examples/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ async fn main() -> vectorless::Result<()> {

/// Print tree structure up to a maximum depth.
fn print_tree_structure(
tree: &vectorless::domain::DocumentTree,
node_id: vectorless::domain::NodeId,
tree: &vectorless::document::DocumentTree,
node_id: vectorless::document::NodeId,
current_depth: usize,
max_depth: usize,
) {
Expand Down
2 changes: 1 addition & 1 deletion examples/retrieve.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
//! ```

use std::sync::Arc;
use vectorless::domain::DocumentTree;
use vectorless::document::DocumentTree;
use vectorless::retrieval::{
PipelineRetriever, RetrieveOptions, Retriever, StrategyPreference,
pipeline::RetrievalOrchestrator,
Expand Down
95 changes: 95 additions & 0 deletions examples/storage_async.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
// Copyright (c) 2026 vectorless developers
// SPDX-License-Identifier: Apache-2.0

//! Async workspace usage example.
//!
//! This example demonstrates async workspace operations:
//! - Creating an async workspace
//! - Concurrent document access
//! - Async LRU cache
//!
//! # Usage
//!
//! ```bash
//! cargo run --example storage_async
//! ```

use std::sync::Arc;

use vectorless::document::DocumentTree;
use vectorless::storage::{AsyncWorkspace, DocumentMeta, PersistedDocument};

fn create_doc(id: &str, name: &str) -> PersistedDocument {
let meta = DocumentMeta::new(id, name, "md");
let content = format!("Content for {}", name);
let tree = DocumentTree::new("Root", &content);
PersistedDocument::new(meta, tree)
}

#[tokio::main]
async fn main() -> vectorless::Result<()> {
println!("=== Async Workspace Example ===\n");

let workspace_path = "./example_async_workspace";

// 1. Create async workspace
println!("1. Creating async workspace...");
let workspace = AsyncWorkspace::new(workspace_path).await?;
println!(" ✓ Created\n");

// 2. Add documents
println!("2. Adding documents...");
workspace.add(&create_doc("doc-1", "Document One")).await?;
workspace.add(&create_doc("doc-2", "Document Two")).await?;
workspace.add(&create_doc("doc-3", "Document Three")).await?;
println!(" ✓ Added 3 documents\n");

// 3. Concurrent access example
println!("3. Concurrent access from multiple tasks...");
let ws = Arc::new(workspace);

let mut handles = vec![];

// Spawn concurrent read tasks
for i in 1..=3 {
let ws_clone = ws.clone();
let handle = tokio::spawn(async move {
let id = format!("doc-{}", i);
let doc = ws_clone.load(&id).await.unwrap().unwrap();
println!(" [Task {}] Loaded: {}", i, doc.meta.name);
});
handles.push(handle);
}

// Wait for all tasks
for handle in handles {
handle.await.unwrap();
}
println!(" ✓ All concurrent loads completed\n");

// 4. Cache stats
println!("4. Cache statistics:");
let stats = ws.cache_stats().await;
println!(" - Hits: {}", stats.hits);
println!(" - Misses: {}", stats.misses);
println!();

// 5. Clone and share
println!("5. Workspace can be cloned cheaply (Arc internally)...");
let ws2 = ws.clone();
let ws3 = ws.clone();

let len1 = ws.len().await;
let len2 = ws2.len().await;
let len3 = ws3.len().await;

println!(" ws1.len() = {}, ws2.len() = {}, ws3.len() = {}", len1, len2, len3);
println!(" ✓ All clones share the same state\n");

// Cleanup
println!("Cleaning up...");
std::fs::remove_dir_all(workspace_path).ok();
println!(" ✓ Done!");

Ok(())
}
130 changes: 130 additions & 0 deletions examples/storage_backend.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
// Copyright (c) 2026 vectorless developers
// SPDX-License-Identifier: Apache-2.0

//! Custom storage backend example.
//!
//! This example shows how to implement a custom StorageBackend.
//! Useful for integrating with databases, cloud storage, etc.
//!
//! # Usage
//!
//! ```bash
//! cargo run --example storage_backend
//! ```

use std::collections::HashMap;
use std::sync::{Arc, RwLock};

use vectorless::document::DocumentTree;
use vectorless::storage::{DocumentMeta, PersistedDocument, StorageBackend, Workspace};
use vectorless::Result;

/// A simple in-memory backend with logging.
///
/// This demonstrates how to implement StorageBackend trait.
/// In production, you might implement S3, PostgreSQL, Redis, etc.
#[derive(Debug)]
struct LoggingMemoryBackend {
name: &'static str,
data: RwLock<HashMap<String, Vec<u8>>>,
}

impl LoggingMemoryBackend {
fn new(name: &'static str) -> Self {
Self {
name,
data: RwLock::new(HashMap::new()),
}
}
}

impl StorageBackend for LoggingMemoryBackend {
fn get(&self, key: &str) -> Result<Option<Vec<u8>>> {
let data = self.data.read().unwrap();
let result = data.get(key).cloned();
println!(" [{}] GET '{}' -> {}", self.name, key, if result.is_some() { "found" } else { "not found" });
Ok(result)
}

fn put(&self, key: &str, value: &[u8]) -> Result<()> {
let mut data = self.data.write().unwrap();
data.insert(key.to_string(), value.to_vec());
println!(" [{}] PUT '{}' ({} bytes)", self.name, key, value.len());
Ok(())
}

fn delete(&self, key: &str) -> Result<bool> {
let mut data = self.data.write().unwrap();
let existed = data.remove(key).is_some();
println!(" [{}] DELETE '{}' -> {}", self.name, key, existed);
Ok(existed)
}

fn exists(&self, key: &str) -> Result<bool> {
let data = self.data.read().unwrap();
Ok(data.contains_key(key))
}

fn keys(&self) -> Result<Vec<String>> {
let data = self.data.read().unwrap();
Ok(data.keys().cloned().collect())
}

fn len(&self) -> Result<usize> {
let data = self.data.read().unwrap();
Ok(data.len())
}

fn clear(&self) -> Result<()> {
let mut data = self.data.write().unwrap();
data.clear();
println!(" [{}] CLEAR", self.name);
Ok(())
}

fn backend_name(&self) -> &'static str {
self.name
}
}

fn main() -> vectorless::Result<()> {
println!("=== Custom Storage Backend Example ===\n");

// 1. Create custom backend
println!("1. Creating custom backend...");
let backend = Arc::new(LoggingMemoryBackend::new("MyCustomBackend"));
println!(" ✓ Backend: {}\n", backend.backend_name());

// 2. Create workspace with custom backend
println!("2. Creating workspace with custom backend...");
let mut workspace = Workspace::with_backend(backend)?;
println!(" ✓ Workspace created\n");

// 3. Add a document (watch the logging)
println!("3. Adding document (observe backend calls):");
let meta = DocumentMeta::new("custom-doc", "Custom Backend Test", "md");
let tree = DocumentTree::new("Root", "Testing custom backend!");
let doc = PersistedDocument::new(meta, tree);
workspace.add(&doc)?;
println!();

// 4. Load the document
println!("4. Loading document:");
let loaded = workspace.load("custom-doc")?.unwrap();
println!(" ✓ Loaded: {}\n", loaded.meta.name);

// 5. Show workspace stats
println!("5. Workspace stats:");
println!(" - Documents: {}", workspace.len());
println!(" - Cache size: {}", workspace.cache_len());
println!();

println!("✓ Custom backend example complete!");
println!("\nTip: Implement StorageBackend to integrate with:");
println!(" - S3 / GCS / Azure Blob");
println!(" - PostgreSQL / MySQL");
println!(" - Redis / Memcached");
println!(" - Any custom storage system");

Ok(())
}
Loading
Loading