From 2d145ba7c7c36f7a78f579c5dec6ac4ef318ac77 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Mon, 13 Apr 2026 00:08:13 +0800 Subject: [PATCH 1/2] feat: add QueryContext and update indexing APIs - Add QueryContext class for structured query operations with methods for setting document scope (single, multiple, workspace), max tokens, reasoning chain inclusion, and depth limits - Rename IndexContext.from_file() to from_path() and from_files() to from_paths() for consistency - Update IndexContext.from_content() and from_bytes() to accept format as second parameter instead of name - Add with_name() method to IndexContext for setting document names - Change IndexOptions parameters from 'summaries' and 'description' to 'generate_summaries' and 'generate_description' with updated defaults - Add new IndexOptions parameters: include_text and generate_ids - Update engine.query() to accept QueryContext instead of doc_id and question parameters - Export additional classes in __init__.py including QueryContext, IndexOptions, and graph-related types --- examples/indexing/main.py | 18 ++-- python/src/lib.rs | 192 ++++++++++++++++++++++++---------- python/vectorless/__init__.py | 37 +++++-- 3 files changed, 175 insertions(+), 72 deletions(-) diff --git a/examples/indexing/main.py b/examples/indexing/main.py index 7aa70b3b..fd507fdd 100644 --- a/examples/indexing/main.py +++ b/examples/indexing/main.py @@ -8,7 +8,7 @@ import asyncio import os -from vectorless import Engine, IndexContext, IndexOptions +from vectorless import Engine, IndexContext, IndexOptions, QueryContext # os is used only for removing the sample file @@ -45,8 +45,8 @@ async def main(): - **LLM Navigation**: Queries are resolved by traversing the tree. - **No Vectors**: No embeddings, no similarity search, no vector DB. """, - name="architecture", - ) + "markdown", + ).with_name("architecture") ) doc_id = result.doc_id print(f" Indexed: {doc_id}") @@ -74,7 +74,7 @@ async def main(): Projected Q1 revenue is $13.5M based on current pipeline. """) - result = await engine.index(IndexContext.from_file(sample_path)) + result = await engine.index(IndexContext.from_path(sample_path)) file_doc_id = result.doc_id print(f" Indexed: {file_doc_id}\n") os.remove(sample_path) @@ -84,14 +84,18 @@ async def main(): result = await engine.index( IndexContext.from_content( "# API Reference\n\n## GET /users\n\nList all users.\n\n## POST /users\n\nCreate a user.", - name="api_ref", - ).with_options(IndexOptions(summaries=True, description=True)), + "markdown", + ) + .with_name("api_ref") + .with_options(IndexOptions(generate_summaries=True, generate_description=True)), ) print(f" Indexed: {result.doc_id}\n") # --- 5. Query --- print("--- Query ---") - answer = await engine.query(file_doc_id, "What was the total revenue?") + answer = await engine.query( + QueryContext("What was the total revenue?").with_doc_id(file_doc_id) + ) item = answer.single() if item: print(f" Score: {item.score:.2f}") diff --git a/python/src/lib.rs b/python/src/lib.rs index c8eb9da2..d4e2f972 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -101,8 +101,10 @@ fn parse_format(format: &str) -> PyResult { /// /// Args: /// mode: Indexing mode - "default", "force", or "incremental". -/// summaries: Whether to generate summaries. Default: False. -/// description: Whether to generate document description. Default: False. +/// generate_summaries: Whether to generate summaries. Default: True. +/// generate_description: Whether to generate document description. Default: False. +/// include_text: Whether to include node text in the tree. Default: True. +/// generate_ids: Whether to generate node IDs. Default: True. #[pyclass(name = "IndexOptions", skip_from_py_object)] #[derive(Clone)] pub struct PyIndexOptions { @@ -112,8 +114,14 @@ pub struct PyIndexOptions { #[pymethods] impl PyIndexOptions { #[new] - #[pyo3(signature = (mode="default", summaries=false, description=false))] - fn new(mode: &str, summaries: bool, description: bool) -> PyResult { + #[pyo3(signature = (mode="default", generate_summaries=true, generate_description=false, include_text=true, generate_ids=true))] + fn new( + mode: &str, + generate_summaries: bool, + generate_description: bool, + include_text: bool, + generate_ids: bool, + ) -> PyResult { let mut opts = IndexOptions::new(); match mode { "default" => {} @@ -126,17 +134,26 @@ impl PyIndexOptions { ))) } } - if summaries { - opts = opts.with_summaries(); - } - if description { - opts = opts.with_description(); - } + opts.generate_summaries = generate_summaries; + opts.generate_description = generate_description; + opts.include_text = include_text; + opts.generate_ids = generate_ids; Ok(Self { inner: opts }) } fn __repr__(&self) -> String { - "IndexOptions(...)".to_string() + format!( + "IndexOptions(mode='{}', generate_summaries={}, generate_description={}, include_text={}, generate_ids={})", + match self.inner.mode { + IndexMode::Default => "default", + IndexMode::Force => "force", + IndexMode::Incremental => "incremental", + }, + self.inner.generate_summaries, + self.inner.generate_description, + self.inner.include_text, + self.inner.generate_ids, + ) } } @@ -152,19 +169,19 @@ impl PyIndexOptions { /// from vectorless import IndexContext /// /// # Single file -/// ctx = IndexContext.from_file("./document.pdf") +/// ctx = IndexContext.from_path("./document.pdf") /// /// # Multiple files -/// ctx = IndexContext.from_files(["./a.pdf", "./b.md"]) +/// ctx = IndexContext.from_paths(["./a.pdf", "./b.md"]) /// /// # Directory /// ctx = IndexContext.from_dir("./docs/") /// /// # From text -/// ctx = IndexContext.from_text("# Title\\nContent...", name="doc") +/// ctx = IndexContext.from_content("# Title\\nContent...", "markdown").with_name("doc") /// /// # From bytes -/// ctx = IndexContext.from_bytes(data, name="doc", format="pdf") +/// ctx = IndexContext.from_bytes(data, "pdf").with_name("doc") /// ``` #[pyclass(name = "IndexContext")] pub struct PyIndexContext { @@ -175,18 +192,15 @@ pub struct PyIndexContext { impl PyIndexContext { /// Create an IndexContext from a single file path. #[staticmethod] - #[pyo3(signature = (path, name=None))] - fn from_file(path: String, name: Option) -> Self { - let mut ctx = IndexContext::from_path(&path); - if let Some(n) = name { - ctx = ctx.with_name(&n); + fn from_path(path: String) -> Self { + Self { + inner: IndexContext::from_path(&path), } - Self { inner: ctx } } /// Create an IndexContext from multiple file paths. #[staticmethod] - fn from_files(paths: Vec) -> Self { + fn from_paths(paths: Vec) -> Self { Self { inner: IndexContext::from_paths(&paths), } @@ -202,29 +216,30 @@ impl PyIndexContext { /// Create an IndexContext from text content. #[staticmethod] - #[pyo3(signature = (content, name=None, format="markdown"))] - fn from_content(content: String, name: Option, format: &str) -> PyResult { + #[pyo3(signature = (content, format="markdown"))] + fn from_content(content: String, format: &str) -> PyResult { let doc_format = parse_format(format)?; - let mut ctx = IndexContext::from_content(&content, doc_format); - if let Some(n) = name { - ctx = ctx.with_name(&n); - } + let ctx = IndexContext::from_content(&content, doc_format); Ok(Self { inner: ctx }) } /// Create an IndexContext from binary data. #[staticmethod] - #[pyo3(signature = (data, name, format))] - fn from_bytes(data: Vec, name: String, format: &str) -> PyResult { + fn from_bytes(data: Vec, format: &str) -> PyResult { let doc_format = parse_format(format)?; - let ctx = IndexContext::from_bytes(data, doc_format).with_name(&name); + let ctx = IndexContext::from_bytes(data, doc_format); Ok(Self { inner: ctx }) } + /// Set the document name (single-source only). + fn with_name(&self, name: String) -> Self { + let ctx = self.inner.clone().with_name(&name); + Self { inner: ctx } + } + /// Apply indexing options. fn with_options(&self, options: &PyIndexOptions) -> Self { - let mut ctx = self.inner.clone(); - ctx = ctx.with_options(options.inner.clone()); + let ctx = self.inner.clone().with_options(options.inner.clone()); Self { inner: ctx } } @@ -250,6 +265,80 @@ impl PyIndexContext { } } +// ============================================================ +// QueryContext +// ============================================================ + +/// Context for a query operation. +/// +/// ```python +/// from vectorless import QueryContext +/// +/// # Query a single document +/// ctx = QueryContext("What is the total revenue?").with_doc_id(doc_id) +/// +/// # Query multiple documents +/// ctx = QueryContext("What is the architecture?").with_doc_ids(["doc-1", "doc-2"]) +/// +/// # Query entire workspace +/// ctx = QueryContext("Explain the algorithm") +/// ``` +#[pyclass(name = "QueryContext")] +pub struct PyQueryContext { + inner: QueryContext, +} + +#[pymethods] +impl PyQueryContext { + /// Create a new query context (defaults to workspace scope). + #[new] + fn new(query: String) -> Self { + Self { + inner: QueryContext::new(&query), + } + } + + /// Set scope to a single document. + fn with_doc_id(&self, doc_id: String) -> Self { + let ctx = self.inner.clone().with_doc_id(&doc_id); + Self { inner: ctx } + } + + /// Set scope to multiple documents. + fn with_doc_ids(&self, doc_ids: Vec) -> Self { + let ctx = self.inner.clone().with_doc_ids(doc_ids); + Self { inner: ctx } + } + + /// Set scope to entire workspace. + fn with_workspace(&self) -> Self { + let ctx = self.inner.clone().with_workspace(); + Self { inner: ctx } + } + + /// Set the maximum tokens for the result content. + fn with_max_tokens(&self, tokens: usize) -> Self { + let ctx = self.inner.clone().with_max_tokens(tokens); + Self { inner: ctx } + } + + /// Set whether to include the reasoning chain. + fn with_include_reasoning(&self, include: bool) -> Self { + let ctx = self.inner.clone().with_include_reasoning(include); + Self { inner: ctx } + } + + /// Set the maximum tree traversal depth. + fn with_depth_limit(&self, depth: usize) -> Self { + let ctx = self.inner.clone().with_depth_limit(depth); + Self { inner: ctx } + } + + fn __repr__(&self) -> String { + "QueryContext(...)".to_string() + } +} + // ============================================================ // QueryResultItem // ============================================================ @@ -798,7 +887,7 @@ async fn run_get_graph(engine: Arc) -> PyResult> /// `api_key` and `model` are **required**. /// /// ```python -/// from vectorless import Engine, IndexContext +/// from vectorless import Engine, IndexContext, QueryContext /// /// engine = Engine( /// workspace="./data", @@ -807,11 +896,11 @@ async fn run_get_graph(engine: Arc) -> PyResult> /// ) /// /// # Index -/// result = await engine.index(IndexContext.from_file("./report.pdf")) +/// result = await engine.index(IndexContext.from_path("./report.pdf")) /// doc_id = result.doc_id /// /// # Query -/// answer = await engine.query(doc_id, "What is the revenue?") +/// answer = await engine.query(QueryContext("What is the revenue?").with_doc_id(doc_id)) /// print(answer.single().content) /// ``` #[pyclass(name = "Engine")] @@ -885,7 +974,7 @@ impl PyEngine { /// Index a document. /// /// Args: - /// ctx: IndexContext created from from_file, from_files, from_dir, etc. + /// ctx: IndexContext created from from_path, from_paths, from_dir, etc. /// /// Returns: /// IndexResult with doc_id and items. @@ -901,35 +990,21 @@ impl PyEngine { /// Query indexed documents. /// /// Args: - /// doc_id: Document ID (or list of IDs) returned from index(). - /// question: The question to ask. + /// ctx: QueryContext with query text and scope. /// /// Returns: /// QueryResult with answer and score. /// /// Raises: /// VectorlessError: If query fails. - #[pyo3(signature = (doc_id, question))] fn query<'py>( &self, py: Python<'py>, - doc_id: &Bound<'_, PyAny>, - question: String, + ctx: &PyQueryContext, ) -> PyResult> { let engine = Arc::clone(&self.inner); - - let ctx = if let Ok(single) = doc_id.extract::() { - QueryContext::new(&question).with_doc_id(&single) - } else if let Ok(multi) = doc_id.extract::>() { - QueryContext::new(&question).with_doc_ids(multi) - } else { - return Err(PyErr::from(VectorlessError::new( - "doc_id must be a string or list of strings".to_string(), - "config", - ))); - }; - - future_into_py(py, run_query(engine, ctx)) + let query_ctx = ctx.inner.clone(); + future_into_py(py, run_query(engine, query_ctx)) } /// List all indexed documents. @@ -986,11 +1061,11 @@ impl PyEngine { /// Vectorless - Reasoning-native document intelligence engine. /// /// ```python -/// from vectorless import Engine, IndexContext +/// from vectorless import Engine, IndexContext, QueryContext /// /// engine = Engine(workspace="./data", api_key="sk-...", model="gpt-4o") -/// result = await engine.index(IndexContext.from_file("./report.pdf")) -/// answer = await engine.query(result.doc_id, "What is the revenue?") +/// result = await engine.index(IndexContext.from_path("./report.pdf")) +/// answer = await engine.query(QueryContext("What is the revenue?").with_doc_id(result.doc_id)) /// print(answer.single().content) /// ``` #[pymodule] @@ -998,6 +1073,7 @@ fn vectorless(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; diff --git a/python/vectorless/__init__.py b/python/vectorless/__init__.py index 8b995746..4d66e2ca 100644 --- a/python/vectorless/__init__.py +++ b/python/vectorless/__init__.py @@ -5,25 +5,37 @@ instead of vector databases for accurate, explainable retrieval. Quick Start: - from vectorless import Engine, IndexContext + from vectorless import Engine, IndexContext, QueryContext # Create engine - engine = Engine(workspace="./data") + engine = Engine(workspace="./data", api_key="sk-...", model="gpt-4o") # Index a document - ctx = IndexContext.from_file("./report.pdf") - doc_id = engine.index(ctx) + ctx = IndexContext.from_path("./report.pdf") + result = await engine.index(ctx) + doc_id = result.doc_id # Query - result = engine.query(doc_id, "What is the revenue?") - print(result.content) + answer = await engine.query(QueryContext("What is the revenue?").with_doc_id(doc_id)) + print(answer.single().content) """ -from vectorless.vectorless import ( +from vectorless._vectorless import ( Engine, IndexContext, + IndexOptions, + IndexResult, + IndexItem, + QueryContext, QueryResult, + QueryResultItem, DocumentInfo, + DocumentGraph, + DocumentGraphNode, + GraphEdge, + EdgeEvidence, + WeightedKeyword, + FailedItem, VectorlessError, __version__, ) @@ -31,8 +43,19 @@ __all__ = [ "Engine", "IndexContext", + "IndexOptions", + "IndexResult", + "IndexItem", + "QueryContext", "QueryResult", + "QueryResultItem", "DocumentInfo", + "DocumentGraph", + "DocumentGraphNode", + "GraphEdge", + "EdgeEvidence", + "WeightedKeyword", + "FailedItem", "VectorlessError", "__version__", ] From eab706de472b1fad4cd06808d198f94f9380142e Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Mon, 13 Apr 2026 00:09:43 +0800 Subject: [PATCH 2/2] chore(release): bump version from 0.1.1 to 0.1.2 Update project version in pyproject.toml to prepare for new release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index bee82a1f..99ff1191 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "vectorless" -version = "0.1.1" +version = "0.1.2" description = "Hierarchical document intelligence without vectors" readme = "README.md" requires-python = ">=3.9"