diff --git a/.gitignore b/.gitignore index a05dac13..9147f650 100644 --- a/.gitignore +++ b/.gitignore @@ -84,6 +84,3 @@ wheels/ .venv/ venv/ ENV/ - -# Test workspace -workspace* \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index 30641940..86b89fb9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,11 +10,13 @@ members = [ "vectorless-core/vectorless-metrics", "vectorless-core/vectorless-llm", "vectorless-core/vectorless-storage", - "vectorless-core/vectorless-query", + # Strategy layer moved to Python — crates kept but not compiled: + # "vectorless-core/vectorless-query", + # "vectorless-core/vectorless-agent", + # "vectorless-core/vectorless-retrieval", "vectorless-core/vectorless-index", - "vectorless-core/vectorless-agent", - "vectorless-core/vectorless-retrieval", "vectorless-core/vectorless-rerank", + "vectorless-core/vectorless-primitives", "vectorless-core/vectorless-engine", "vectorless-core/vectorless-py", ] diff --git a/README.md b/README.md index b42dc245..ab01bdbd 100644 --- a/README.md +++ b/README.md @@ -1,126 +1,43 @@ -
-
-Knowing by reasoning, not vectors.
+Deep and reliable. Vectorless plays nicely with your documents. Ask questions in plain language; get answers by reasoning with Vectorless.
-### Four-Artifact Index Architecture +## Installation -When a document is indexed, the compile pipeline builds four artifacts: +Install using `pip install -U vectorless`. For more details, see the [Installation](https://vectorless.dev/docs/installation) section in the documentation. -``` -Content Layer Navigation Layer Reasoning Index Document Card -DocumentTree NavigationIndex ReasoningIndex DocCard -(TreeNode) (NavEntry, ChildRoute) (topic_paths, hot_nodes) (title, overview, - │ │ │ question hints) - │ │ │ │ - Agent reads Agent reads every Agent's targeted Orchestrator reads - only on cat decision round search tool (grep) for multi-doc routing -``` - -- **Content Layer** — The raw document tree. The agent only accesses this when reading specific paragraphs (`cat`). -- **Navigation Layer** — Each non-leaf node stores an overview, question hints, and child routes (title + description). The agent reads this every round to decide where to go next. -- **Reasoning Index** — Keyword-topic mappings with weights. Provides the agent's `grep` tool with structured keyword data for targeted search within a document. -- **DocCard** — A compact document-level summary. The Orchestrator reads DocCards to decide which documents to navigate in multi-document queries, without loading full documents. - -This separation means the agent makes routing decisions from lightweight metadata, not by scanning full content. - -### Agent-Based Understanding - -``` -Engine.query("What drove the revenue decline?") - │ - ├─ Query Understanding ── intent, concepts, strategy (LLM) - │ - ├─ Orchestrator ── analyzes query, dispatches Workers - │ │ - │ ├─ Worker 1 ── ls → cd "Financials" → ls → cd "Revenue" → cat - │ └─ Worker 2 ── ls → cd "Risk Factors" → grep "decline" → cat - │ │ - │ └─ evaluate ── insufficient? → replan → dispatch new paths → loop - │ - └─ Synthesis ── dedup, evidence scoring, reasoned answer with source chain -``` - -Worker navigation commands: - -| Command | Action | Reads | -|---------|--------|-------| -| `ls` | List child sections | Navigation Layer (ChildRoute) | -| `cd` | Enter a child section | Navigation Layer | -| `cat` | Read content at current node | Content Layer (DocumentTree) | -| `grep` | Search by keyword | Reasoning Index (topic_paths) | - -The Orchestrator evaluates Worker results after each round. If evidence is insufficient, it **replans** — adjusting strategy, dispatching new paths, or deepening exploration. This continues until enough evidence is collected. - -## Quick Start - -```bash -pip install vectorless -``` +## A Simple Example ```python import asyncio -from vectorless import Engine, IndexContext, QueryContext +from vectorless import Engine async def main(): engine = Engine(api_key="sk-...", model="gpt-4o", endpoint="https://api.openai.com/v1") - # Index a document - result = await engine.index(IndexContext.from_path("./report.pdf")) + # Compile a document + result = await engine.compile(path="./report.pdf") doc_id = result.doc_id - # Query - result = await engine.query( - QueryContext("What is the total revenue?").with_doc_ids([doc_id]) - ) - print(result.single().content) + # Ask a question + response = await engine.ask("What is the total revenue?", doc_ids=[doc_id]) + print(response.single().content) asyncio.run(main()) ``` -## Resources +## Help -- [Documentation](https://vectorless.dev) — Guides, architecture, API reference -- [Rust API Docs](https://docs.rs/vectorless) — Auto-generated crate documentation -- [PyPI](https://pypi.org/project/vectorless/) — Python package -- [Crates.io](https://crates.io/crates/vectorless) — Rust crate -- [Examples](examples/) — Complete usage patterns for Python and Rust +See [documentation](https://vectorless.dev/docs/getting-started) for more details. -## Contributing -Contributions welcome! If you find this useful, please ⭐ the repo — it helps others discover it. - -## Star History +## Contributing - -Document Understanding Engine for AI
- -Knowing by reasoning, not vectors.
++ Deep and reliable. Vectorless plays nicely with your documents. + Ask questions in plain language; get answers by reasoning with Vectorless. +