From 5ec2d22b81ae71cd8752455e216d75b9d9c7ccf2 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Sat, 25 Apr 2026 23:27:59 +0800 Subject: [PATCH 1/3] feat(homepage): redesign homepage with product showcase and improved UX - Add product grid displaying vectorless core engine and vectorless-code - Implement step-by-step explanation of how Vectorless works - Update hero banner styling with increased padding and adjusted layout - Replace simple example with comprehensive product overview - Add responsive design improvements for mobile view - Include installation instructions and documentation links - Update page description to better reflect product value proposition The redesign provides clearer product differentiation and usage scenarios while maintaining focus on the reasoning-based approach over traditional vector methods. --- docs/src/pages/index.module.css | 222 ++++++++++++++++------- docs/src/pages/index.tsx | 176 ++++++++++++------- docs/vectorless-code.md | 302 ++++++++++++++++++++++++++++++++ 3 files changed, 569 insertions(+), 131 deletions(-) create mode 100644 docs/vectorless-code.md diff --git a/docs/src/pages/index.module.css b/docs/src/pages/index.module.css index 0cbb0d5..d9872a3 100644 --- a/docs/src/pages/index.module.css +++ b/docs/src/pages/index.module.css @@ -5,7 +5,7 @@ /* ===== Hero Banner ===== */ .heroBanner { margin: 0; - padding: 40px 24px 32px; + padding: 60px 24px 48px; min-height: calc(100vh - 68px); overflow: hidden; position: relative; @@ -35,14 +35,14 @@ .heroBanner::after { content: ''; position: absolute; - top: 20%; - left: 35%; + top: 10%; + left: 30%; transform: translateX(-50%); - width: 600px; - height: 600px; + width: 700px; + height: 700px; background: radial-gradient( circle, - rgba(175, 120, 139, 0.10) 0%, + rgba(175, 120, 139, 0.08) 0%, transparent 70% ); pointer-events: none; @@ -54,64 +54,54 @@ position: relative; z-index: 1; text-align: left; - max-width: 960px; + max-width: 860px; width: 100%; margin: 0 auto; } -.mainTitle { - font-size: clamp(1.6rem, 4vw, 2.4rem); - font-weight: 700; - letter-spacing: -0.03em; - color: var(--text); - margin-bottom: 6px; - line-height: 1; -} - -.badges { - display: flex; - gap: 6px; - margin-bottom: 10px; +/* ===== Manifesto ===== */ +.manifesto { + margin-bottom: 36px; } -.badges img { - height: 20px; +.mainTitle { + font-size: clamp(2rem, 5vw, 3rem); + font-weight: 800; + letter-spacing: -0.04em; + color: var(--text); + margin-bottom: 8px; + line-height: 1.1; } .tagline { - font-size: 1rem; - font-weight: 600; - color: var(--primary); - margin-bottom: 4px; - font-style: italic; + font-size: 1.05rem; + font-weight: 500; + color: var(--text-light); + line-height: 1.5; + max-width: 600px; } -.subTitle { - font-size: 0.88rem; - font-weight: 400; - color: var(--text-light); - margin-bottom: 20px; - line-height: 1.6; - max-width: none; +/* ===== Sections ===== */ +.section { + margin-bottom: 36px; } -/* ===== Section titles & paragraphs ===== */ .sectionTitle { - font-size: 1rem; + font-size: 0.95rem; font-weight: 700; color: var(--text); - margin-top: 16px; - margin-bottom: 4px; + margin-top: 0; + margin-bottom: 8px; padding-bottom: 0; border-bottom: none; } .paragraph { - font-size: 0.85rem; + font-size: 0.88rem; font-weight: 400; color: var(--text-light); - line-height: 1.5; - margin-bottom: 2px; + line-height: 1.65; + margin-bottom: 6px; } .paragraph code { @@ -123,18 +113,128 @@ font-family: 'SF Mono', 'Fira Code', 'Consolas', monospace; } -/* ===== Code Section ===== */ -.codeSection { - margin-top: 4px; - margin-bottom: 0; - max-width: none; +/* ===== Product Grid ===== */ +.productGrid { + display: grid; + grid-template-columns: 1fr 1fr; + gap: 16px; + margin-top: 8px; +} + +.productCard { + padding: 20px; + border: 1px solid var(--border); + border-radius: 8px; + background-color: rgba(255, 255, 255, 0.02); +} + +.productBadge { + display: inline-block; + font-size: 0.7rem; + font-weight: 700; + text-transform: uppercase; + letter-spacing: 0.06em; + color: var(--primary); + margin-bottom: 8px; +} + +.productBadgeCode { + color: #6ee7b7; +} + +.productName { + font-size: 1.1rem; + font-weight: 700; + color: var(--text); + margin: 0 0 8px 0; +} + +.productDesc { + font-size: 0.84rem; + color: var(--text-light); + line-height: 1.6; + margin-bottom: 6px; +} + +.productAudience { + font-size: 0.8rem; + color: var(--text-light); + font-style: italic; + margin-bottom: 12px; +} + +.productInstall { + margin-bottom: 10px; +} + +.productInstall code { + background-color: var(--primary-soft); + color: var(--text); + padding: 4px 10px; + border-radius: 4px; + font-size: 0.82rem; + font-family: 'SF Mono', 'Fira Code', 'Consolas', monospace; +} + +.productLinks { + display: flex; + gap: 16px; +} + +.productLinks a { + font-size: 0.82rem; + font-weight: 600; + color: var(--primary); + text-decoration: none; +} + +.productLinks a:hover { + text-decoration: underline; +} + +/* ===== Steps ===== */ +.steps { + display: flex; + flex-direction: column; + gap: 12px; + margin-top: 8px; } -.codeSection pre { - border-radius: 6px !important; - font-size: 0.75rem !important; - line-height: 1.5 !important; - padding: 12px !important; +.step { + display: flex; + align-items: flex-start; + gap: 14px; + font-size: 0.86rem; + color: var(--text-light); + line-height: 1.6; +} + +.step strong { + color: var(--text); +} + +.step code { + background-color: var(--primary-soft); + color: var(--text); + padding: 0px 5px; + border-radius: 3px; + font-size: 0.84em; + font-family: 'SF Mono', 'Fira Code', 'Consolas', monospace; +} + +.stepNumber { + flex-shrink: 0; + width: 28px; + height: 28px; + display: flex; + align-items: center; + justify-content: center; + border-radius: 50%; + border: 1.5px solid var(--border); + color: var(--primary); + font-size: 0.8rem; + font-weight: 700; + margin-top: 1px; } /* ===== Buttons ===== */ @@ -143,7 +243,7 @@ gap: 10px; align-items: center; flex-wrap: wrap; - margin-top: 20px; + margin-top: 8px; } .secondaryButton { @@ -169,9 +269,15 @@ } /* ===== Responsive ===== */ +@media (max-width: 768px) { + .productGrid { + grid-template-columns: 1fr; + } +} + @media (max-width: 640px) { .heroBanner { - padding: 32px 16px 24px; + padding: 36px 16px 28px; } .mainTitle { @@ -182,21 +288,11 @@ font-size: 0.92rem; } - .subTitle { - font-size: 0.82rem; - max-width: 100%; - } - - .codeSection { - max-width: 100%; - } - .heroActions { flex-direction: column; width: 100%; } - .primaryButton, .secondaryButton { width: 100%; justify-content: center; diff --git a/docs/src/pages/index.tsx b/docs/src/pages/index.tsx index b87e525..9cc6b80 100644 --- a/docs/src/pages/index.tsx +++ b/docs/src/pages/index.tsx @@ -2,91 +2,131 @@ import type {ReactNode} from 'react'; import useDocusaurusContext from '@docusaurus/useDocusaurusContext'; import Layout from '@theme/Layout'; import Link from '@docusaurus/Link'; -import CodeBlock from '@theme/CodeBlock'; import styles from './index.module.css'; -const CODE_EXAMPLE = `import asyncio -from vectorless import Engine - -async def main(): - engine = Engine(api_key="sk-...", model="gpt-4o", endpoint="https://api.openai.com/v1") - - # Compile a document - result = await engine.compile(path="./report.pdf") - doc_id = result.doc_id - - # Ask a question - response = await engine.ask("What is the total revenue?", doc_ids=[doc_id]) - print(response.single().content) - -asyncio.run(main())`; - export default function Home(): ReactNode { const {siteConfig} = useDocusaurusContext(); return ( + description="Knowing by reasoning, not vectors. AI document understanding without embeddings.">
-

Vectorless

-
- - PyPI - - - PyPI Downloads - + + {/* ── Manifesto ── */} +
+

Reason, don't vector.

+

+ Knowing by reasoning, not vectors. +

-

Knowing by reasoning, not vectors.

-

- Deep and reliable. Vectorless plays nicely with your documents. - Ask questions in plain language; get answers by reasoning with Vectorless. -

-

Installation

-

- Install using pip install -U vectorless. For more details, - see the{' '} - Installation section in the - documentation. -

+ {/* ── The Problem ── */} +
+

+ Deep and reliable. Vectorless plays nicely with your documents. + Ask questions in plain language; get answers by reasoning. +

+
-

A Simple Example

-
- {CODE_EXAMPLE} -
+ {/* ── Two Products ── */} +
+
-

Help

-

- See{' '} - documentation for more - details. -

+
+
Core Engine
+

vectorless

+

+ A reasoning-based document understanding engine for AI. + Compile documents into a rich IR, query with an agent that navigates and reasons. + Zero embedding dependency. +

+

+ For AI engineers building retrieval systems. +

+
+ pip install vectorless +
+
+ Documentation + GitHub +
+
-

Contributing

-

- Contributions welcome! See{' '} - - Contributing - {' '} - for setup and guidelines. -

+
+
Application
+

vectorless-code

+

+ AI code search for your entire codebase. + CLI + MCP server that plugs into Cursor, Claude Code, or any AI coding tool. + No vector DB, no embedding model — just compile and search. +

+

+ For developers who search code every day. +

+
+ pip install vectorless-code +
+
+ Learn more + GitHub +
+
-

License

-

Apache License 2.0

+
+
+ + {/* ── How It Works ── */} +
+

How it works

+
+
+
1
+
+ Compile.{' '} + Parse your documents (or codebase) into a rich intermediate representation — + a navigable tree with keyword indexes, routing tables, and evidence scores baked in. No LLM required. +
+
+
+
2
+
+ Reason.{' '} + An AI agent navigates the tree like a human expert — + ls to explore, cd to dive deeper, + cat to read, find to search. + It reasons about which path leads to the answer. +
+
+
+
3
+
+ Answer.{' '} + The agent collects evidence with full source attribution — + section title, node path, line numbers. Every claim is traceable. +
+
+
+
+ + {/* ── Open Source ── */} +
+
+ + + GitHub + + + Get Started + +
+
-
- - - GitHub - -
diff --git a/docs/vectorless-code.md b/docs/vectorless-code.md new file mode 100644 index 0000000..76e1bb0 --- /dev/null +++ b/docs/vectorless-code.md @@ -0,0 +1,302 @@ +# vectorless-code:基于树遍历的代码搜索 + +## 1. 现有工具分析 + +### cocoindex-code + +给 AI 编码助手用的**代码语义搜索引擎**。 + +``` +源码 → 分块(~1000字符) → embedding向量 → sqlite-vec +查询 → query embedding → 余弦相似度 → top-k代码块 +``` + +- 依赖:嵌入模型 + 向量数据库 +- 搜索速度:~100ms +- 擅长:语义相似匹配("login" 能匹配 "authenticate") +- 不擅长:复杂推理查询("认证流程怎么走") + +### codeindex + +和 vectorless 思路相同的代码搜索工具(TypeScript 实现)。 + +``` +源码 → 解析符号 → 构建树(Project>Module>File>Symbol) → LLM生成摘要 +查询 → LLM逐层遍历(module→file→symbol, 3次调用) → 返回代码 +``` + +- 依赖:LLM(无 embedding、无向量 DB) +- 索引速度:中(LLM 生成每层摘要) +- 搜索速度:~5-10s(3 次 LLM 调用) +- 擅长:精准定位(LLM 理解语义选择节点) +- 验证了"慢但准"的路线可行 + +--- + +## 2. vectorless-code 方案 + +### 核心思路 + +复用 vectorless 的编译管线 + 树结构,实现三层查询策略: + +| 模式 | 方法 | 速度 | 覆盖场景 | +|---|---|---|---| +| **Fast** | ReasoningIndex 关键词匹配 | ~10ms | 精确查询(函数名、变量名) | +| **标准** | codeindex 式逐层遍历(3次LLM) | ~5s | 语义查询("认证逻辑在哪") | +| **Deep** | Worker Agent 推理导航 | ~30s | 复杂查询("认证流程怎么走") | + +### 查询流程 + +``` +查询 "authentication logic" + │ + ├─ Step 1: 关键词匹配(~10ms) + │ extract_keywords → 查 ReasoningIndex + │ 命中 → 返回节点,结束 + │ + ├─ Step 2: 逐层遍历(~5s, 3次LLM) + │ Level 1: "这8个目录哪些相关?" → LLM 选 2-3 个 + │ Level 2: "这20个文件哪些相关?" → LLM 选 3-5 个 + │ Level 3: "这些代码块哪些相关?" → LLM 选 5-10 个 + │ → 返回,结束 + │ + └─ Step 3: Worker 推理(~30s, 6-15次LLM) + 完整 ls/cd/cat/find/grep 导航 + → 返回带溯源的证据 +``` + +### 三个工具对比 + +| | cocoindex-code | codeindex | vectorless-code | +|---|---|---|---| +| **方法** | Embedding 向量搜索 | LLM 逐层遍历 | 关键词 + 逐层遍历 + Worker | +| **依赖** | 嵌入模型 + 向量DB | 仅 LLM | 仅 LLM(Fast 模式连 LLM 都不需要) | +| **索引** | 慢(算 embedding) | 中(LLM 生成摘要) | 快(Fast 编译 0 LLM) | +| **搜索速度** | ~100ms | ~5-10s | ~10ms / ~5s / ~30s | +| **语义理解** | 好(向量语义) | 好(LLM 理解) | 好(LLM 理解) | +| **深度查询** | 不支持 | 有限(3层遍历) | 支持(Worker 推理) | +| **精确匹配** | 一般(模糊) | 好(LLM 选择) | 好(关键词精确 + LLM 选择) | +| **跨语言** | 所有语言 | 9种(有语言适配器) | 所有语言(通用分块) | + +### 架构 + +``` +源码文件 (*.rs, *.py, *.ts, ...) + │ + ▼ +┌──────────────────────────────────────────┐ +│ Code Parser(通用分块) │ +│ file → Vec │ +│ Level 0: 项目根 │ +│ Level 1: 文件(path 作为标题) │ +│ Level 2: 代码块(~50行/块,按结构分) │ +└──────────────────────────────────────────┘ + │ + ▼ +┌──────────────────────────────────────────┐ +│ Compile Pipeline │ +│ Fast 模式: Build → Enrich → Reasoning │ +│ Standard 模式: + EnhancePass(生成摘要) │ +└──────────────────────────────────────────┘ + │ + ▼ +┌──────────────────────────────────────────┐ +│ Document IR(代码树) │ +│ my-project/ │ +│ ├── src/ │ +│ │ ├── auth.rs │ +│ │ │ ├── auth.rs:1-48 (imports, ...) │ +│ │ │ └── auth.rs:49-96 (fn login) │ +│ │ ├── parser.rs │ +│ │ │ └── parser.rs:1-55 │ +│ │ └── engine.rs │ +│ │ └── engine.rs:1-60 │ +│ └── tests/ │ +│ └── integration.rs │ +│ └── integration.rs:1-40 │ +└──────────────────────────────────────────┘ + │ + ▼ +┌──────────────────────────────────────────┐ +│ 查询(三层策略) │ +│ Fast: 关键词 → ReasoningIndex → 节点 │ +│ 标准: LLM 逐层遍历 (3次调用) │ +│ Deep: Worker Agent 推理导航 │ +└──────────────────────────────────────────┘ +``` + +--- + +## 3. 工作分解 + +### vectorless 改动 + +#### 3.1 加 Code 格式(~5 个文件) + +| 文件 | 改动 | +|---|---| +| `vectorless-document/src/format.rs` | 加 `Code` variant,映射 `.rs/.py/.ts/.go/.java/.cpp/...` | +| `vectorless-compiler/src/parse/code/` | 新模块:通用代码分块器 | +| `vectorless-compiler/src/parse/mod.rs` | 加 `DocumentFormat::Code =>` match arm | +| `vectorless-engine/src/indexer.rs` | 加 format 映射 | +| `vectorless-engine/src/engine.rs` | 加 pipeline options 映射 | + +#### 3.2 通用代码分块器 + +语言无关的启发式分块: + +```rust +fn parse_code(content: &str, file_path: &str) -> Vec { + let mut nodes = vec![]; + + // Level 1: 文件节点(标题 = 相对路径) + nodes.push(RawNode { + title: file_path.to_string(), + level: 1, + ..Default::default() + }); + + // Level 2: 代码块(按结构分块) + for chunk in split_by_structure(content, max_lines=50) { + nodes.push(RawNode { + title: format!("{}:{}-{}", file_path, chunk.start, chunk.end), + content: chunk.text, + level: 2, + ..Default::default() + }); + } + + nodes +} +``` + +分块策略:空行优先 → 缩进变化 → 行数硬切(~50行)。 + +**可选增强**:tree-sitter 把 Level 2 从"代码块"升级为"函数/类/方法"(50+ 语言)。 + +#### 3.3 暴露关键词搜索 API + +```rust +impl DocumentNavigator { + /// 关键词检索,毫秒级 + pub fn search_by_keywords(&self, query: &str) -> Vec { + let keywords = extract_keywords(query); + let mut scored: HashMap = HashMap::new(); + for kw in &keywords { + if let Some(entries) = self.reasoning_index.topic_paths.get(kw) { + for entry in entries { + *scored.entry(entry.node_id).or_default() += entry.weight; + } + } + } + let mut results: Vec<_> = scored.into_iter() + .map(|(node_id, score)| SearchResult { node_id, score }) + .collect(); + results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap()); + results.truncate(10); + results + } +} +``` + +#### 3.4 逐层遍历查询(codeindex 式) + +标准模式的查询策略,3 次 LLM 调用: + +```python +async def traverse_search(tree, query, llm): + # Level 1: 选目录 + modules = tree.children_of_root() + selected = await llm.select_nodes(query, modules, max_select=3) + + # Level 2: 选文件 + files = [f for m in selected for f in tree.children_of(m)] + selected = await llm.select_nodes(query, files, max_select=5) + + # Level 3: 选代码块 + chunks = [c for f in selected for c in tree.children_of(f)] + selected = await llm.select_nodes(query, chunks, max_select=10) + + return selected +``` + +### vectorless-code 独立项目 + +``` +vectorless-code/ # 新项目,独立仓库 +├── pyproject.toml # 依赖: vectorless, mcp, typer, pathspec +├── src/ +│ └── vectorless_code/ +│ ├── __init__.py +│ ├── indexer.py # 遍历文件 → engine.compile(format="code") +│ ├── search.py # 三层查询策略 +│ ├── traversal.py # 逐层遍历(标准模式) +│ ├── server.py # MCP server(search tool) +│ ├── cli.py # CLI: vc init / index / search / mcp +│ └── settings.py # .gitignore, include/exclude 配置 +└── README.md +``` + +**MCP 接口**: + +```python +@mcp.tool() +async def search(query: str, limit: int = 5, mode: str = "auto") -> list[dict]: + """Search codebase. + + mode: "fast" (keyword), "standard" (traversal), "deep" (worker), "auto" + """ + if mode == "fast" or mode == "auto": + results = doc.search_by_keywords(query) + if results: + return format_results(results[:limit]) + + if mode == "standard" or mode == "auto": + results = await traverse_search(tree, query, llm) + if results: + return format_results(results[:limit]) + + # Deep mode + answer = await engine.ask(query, doc_ids=[doc_id]) + return format_evidence(answer.evidence[:limit]) +``` + +**CLI**: + +| 命令 | 功能 | +|---|---| +| `vc init` | 初始化配置 | +| `vc index [--mode fast|standard]` | 编译代码库 | +| `vc search [--mode auto|fast|standard|deep]` | 搜索代码 | +| `vc mcp` | 启动 MCP server | +| `vc status` | 查看索引状态 | + +--- + +## 4. 不需要改的 + +- `DocumentTree` / arena 结构 — 完全复用 +- `BuildPass` — `RawNode.level` 驱动,天然兼容 +- `ReasoningIndex` — 关键词倒排索引,Fast 模式核心 +- Worker 核心循环 — Deep 模式复用 +- PyO3 绑定框架 — 增量添加新方法 +- Engine / Workspace / Cache — 完全复用 +- SplitPass — 自动处理超大代码文件 +- 增量编译 — fingerprint + 增量更新已有 + +## 5. 优势 + +1. **无需嵌入模型** — 不需要向量 DB、不需要 embedding API、不需要 GPU +2. **三层速度** — 10ms / 5s / 30s,按需选择 +3. **Fast 模式零 LLM** — 索引和查询都不需要 LLM(纯 CPU) +4. **深度查询** — Worker 模式处理 embedding 无法回答的复杂问题 +5. **所有语言** — 通用分块器,不依赖 tree-sitter +6. **增量编译** — 代码变更只重编译改动的文件 + +## 6. 实施步骤 + +1. **vectorless 加 Code 格式** — 通用分块器 + 关键词搜索 API +2. **vectorless-code CLI** — `vc init / index / search`,验证三层查询 +3. **逐层遍历实现** — 标准模式(3 次 LLM),对标 codeindex 效果 +4. **vectorless-code MCP server** — 暴露 `search` tool,接入 Claude Code +5. **(可选)tree-sitter 增强** — 精确 AST 分块,替换通用分块器 From 177f94c008df4f2a6f6cd63fbe4534911f355d12 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Sat, 25 Apr 2026 23:30:33 +0800 Subject: [PATCH 2/3] refactor: reorder imports and improve code formatting across modules Reorder import statements alphabetically for better consistency across multiple files. Format long function calls and conditional expressions with proper line breaks to maintain 100 character limit per line. BREAKING CHANGE: None fix: adjust code style in engine and indexer modules Apply consistent formatting to multi-line expressions and function calls. Simplify nested method chains and improve readability of conditional statements. feat: update workspace persistence logic Streamline document card extraction logic in workspace module by removing unnecessary indentation and improving code flow. refactor: format stopwords array in keywords module Convert stopwords array to multi-line format with one entry per line for better readability and maintainability. --- crates/vectorless-document/src/lib.rs | 2 +- crates/vectorless-engine/src/engine.rs | 39 ++---- crates/vectorless-engine/src/indexer.rs | 19 +-- crates/vectorless-storage/src/persistence.rs | 2 +- crates/vectorless-storage/src/workspace.rs | 12 +- crates/vectorless-utils/src/keywords.rs | 133 +++++++++++++++++-- 6 files changed, 150 insertions(+), 57 deletions(-) diff --git a/crates/vectorless-document/src/lib.rs b/crates/vectorless-document/src/lib.rs index 4095279..08d7cd9 100644 --- a/crates/vectorless-document/src/lib.rs +++ b/crates/vectorless-document/src/lib.rs @@ -45,7 +45,7 @@ pub use structure::{DocumentStructure, StructureNode}; pub use toc::{TocConfig, TocEntry, TocNode, TocView}; pub use tree::{DocumentTree, RetrievalIndex}; pub use understanding::{ - Concept, Document, DocumentInfo, DocumentMeta, IngestInput, CURRENT_SCHEMA_VERSION, + CURRENT_SCHEMA_VERSION, Concept, Document, DocumentInfo, DocumentMeta, IngestInput, }; // Re-export agent acceleration types diff --git a/crates/vectorless-engine/src/engine.rs b/crates/vectorless-engine/src/engine.rs index 07b8018..84b9184 100644 --- a/crates/vectorless-engine/src/engine.rs +++ b/crates/vectorless-engine/src/engine.rs @@ -347,21 +347,21 @@ impl Engine { /// Build a [`CompileArtifact`] from a [`Document`]. fn build_index_item(doc: &Document) -> CompileArtifact { use vectorless_document::DocumentFormat; - let format = DocumentFormat::from_extension(&doc.format) - .unwrap_or(DocumentFormat::Markdown); + let format = + DocumentFormat::from_extension(&doc.format).unwrap_or(DocumentFormat::Markdown); CompileArtifact::new( doc.doc_id.clone(), doc.name.clone(), format, - if doc.summary.is_empty() { None } else { Some(doc.summary.clone()) }, + if doc.summary.is_empty() { + None + } else { + Some(doc.summary.clone()) + }, doc.page_count, ) - .with_source_path( - doc.source_path - .clone() - .unwrap_or_default(), - ) + .with_source_path(doc.source_path.clone().unwrap_or_default()) } // ============================================================ @@ -441,10 +441,7 @@ impl Engine { } /// Load a full Document by ID (for navigation via primitives). - pub async fn load_document( - &self, - doc_id: &str, - ) -> Result> { + pub async fn load_document(&self, doc_id: &str) -> Result> { self.workspace.load(doc_id).await } @@ -595,9 +592,8 @@ impl Engine { None => return Ok(IndexAction::FullIndex { existing_id: None }), }; - let format = - vectorless_compiler::parse::DocumentFormat::from_extension(&stored_doc.format) - .unwrap_or(vectorless_compiler::parse::DocumentFormat::Markdown); + let format = vectorless_compiler::parse::DocumentFormat::from_extension(&stored_doc.format) + .unwrap_or(vectorless_compiler::parse::DocumentFormat::Markdown); let pipeline_options = self.build_pipeline_options(options, source); // If logic fingerprint changed, remove old doc before full reprocess @@ -667,13 +663,7 @@ impl Engine { for doc in &loaded_docs { let keywords = Self::extract_keywords_from_doc(doc); let node_count = doc.meta.as_ref().map(|m| m.node_count).unwrap_or(0); - builder.add_document( - &doc.doc_id, - &doc.name, - &doc.format, - node_count, - keywords, - ); + builder.add_document(&doc.doc_id, &doc.name, &doc.format, node_count, keywords); } let graph = builder.build(); @@ -782,9 +772,6 @@ mod tests { let item = Engine::build_index_item(&doc); assert_eq!(item.source_path, Some(String::new())); // unwrap_or_default - assert_eq!( - item.format, - vectorless_compiler::parse::DocumentFormat::Pdf - ); + assert_eq!(item.format, vectorless_compiler::parse::DocumentFormat::Pdf); } } diff --git a/crates/vectorless-engine/src/indexer.rs b/crates/vectorless-engine/src/indexer.rs index cb46f06..fd31574 100644 --- a/crates/vectorless-engine/src/indexer.rs +++ b/crates/vectorless-engine/src/indexer.rs @@ -27,15 +27,13 @@ use tracing::info; use uuid::Uuid; use vectorless_compiler::{CompilerInput, PipelineExecutor, PipelineOptions, SourceFormat}; -use vectorless_document::{ - Document, DocumentFormat, DocumentMeta, CURRENT_SCHEMA_VERSION, -}; +use vectorless_document::{CURRENT_SCHEMA_VERSION, Document, DocumentFormat, DocumentMeta}; use vectorless_error::{Error, Result}; use vectorless_llm::LlmClient; use vectorless_utils::fingerprint::Fingerprint; use super::compile_input::CompileSource; -use vectorless_events::{EventEmitter, CompileEvent}; +use vectorless_events::{CompileEvent, EventEmitter}; /// Document compile client. /// @@ -257,7 +255,8 @@ impl IndexerClient { .ok_or_else(|| Error::Parse("Document tree not generated".to_string()))?; let node_count = tree.node_count(); - self.events.emit_compile(CompileEvent::TreeBuilt { node_count }); + self.events + .emit_compile(CompileEvent::TreeBuilt { node_count }); let doc_name = name .map(str::to_string) @@ -276,8 +275,10 @@ impl IndexerClient { meta = meta.with_logic_fingerprint(logic_fp.to_string()); // Extract stats from metrics - let (summary_tokens, duration_ms) = - (result.metrics.total_tokens_generated, result.metrics.total_time_ms()); + let (summary_tokens, duration_ms) = ( + result.metrics.total_tokens_generated, + result.metrics.total_time_ms(), + ); meta.update_processing_stats(node_count, summary_tokens, duration_ms); // Compute content fingerprint from source file if available @@ -308,7 +309,9 @@ impl IndexerClient { }; info!("Compiling complete: {} ({} nodes)", doc.doc_id, node_count); - self.events.emit_compile(CompileEvent::Complete { doc_id: doc.doc_id.clone() }); + self.events.emit_compile(CompileEvent::Complete { + doc_id: doc.doc_id.clone(), + }); Ok(doc) } diff --git a/crates/vectorless-storage/src/persistence.rs b/crates/vectorless-storage/src/persistence.rs index 953eef4..0454fe7 100644 --- a/crates/vectorless-storage/src/persistence.rs +++ b/crates/vectorless-storage/src/persistence.rs @@ -12,7 +12,7 @@ use serde::{Deserialize, Serialize}; use sha2::{Digest, Sha256}; -use vectorless_document::{Document, CURRENT_SCHEMA_VERSION}; +use vectorless_document::{CURRENT_SCHEMA_VERSION, Document}; use vectorless_error::Error; use vectorless_error::Result; diff --git a/crates/vectorless-storage/src/workspace.rs b/crates/vectorless-storage/src/workspace.rs index f1e28cc..832fd8d 100644 --- a/crates/vectorless-storage/src/workspace.rs +++ b/crates/vectorless-storage/src/workspace.rs @@ -259,11 +259,7 @@ impl Workspace { Self::save_meta_index(&inner)?; // Update catalog with DocCard - if let Some(card) = doc - .nav_index - .doc_card() - .cloned() - { + if let Some(card) = doc.nav_index.doc_card().cloned() { inner.catalog.insert(doc_id.clone(), card); Self::save_catalog_index(&inner)?; } @@ -571,11 +567,7 @@ impl Workspace { for key in doc_keys { if let Some(bytes) = inner.backend.get(key)? { if let Ok(doc) = load_document_from_bytes(&bytes) { - if let Some(card) = doc - .nav_index - .doc_card() - .cloned() - { + if let Some(card) = doc.nav_index.doc_card().cloned() { inner.catalog.insert(doc.doc_id.clone(), card); } } diff --git a/crates/vectorless-utils/src/keywords.rs b/crates/vectorless-utils/src/keywords.rs index 954418a..38644a2 100644 --- a/crates/vectorless-utils/src/keywords.rs +++ b/crates/vectorless-utils/src/keywords.rs @@ -5,17 +5,128 @@ /// Common English stop words for keyword filtering. pub const STOPWORDS: &[&str] = &[ - "a", "an", "the", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", - "do", "does", "did", "will", "would", "could", "should", "may", "might", "must", "shall", - "can", "need", "dare", "ought", "used", "to", "of", "in", "for", "on", "with", "at", "by", - "from", "as", "into", "through", "during", "before", "after", "above", "below", "between", - "under", "again", "further", "then", "once", "here", "there", "when", "where", "why", "how", - "all", "each", "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only", - "own", "same", "so", "than", "too", "very", "just", "and", "but", "if", "or", "because", - "until", "while", "about", "what", "which", "who", "whom", "this", "that", "these", "those", - "i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", "yours", - "yourself", "yourselves", "he", "him", "his", "himself", "she", "her", "hers", "herself", - "it", "its", "itself", "they", "them", "their", "theirs", "themselves", + "a", + "an", + "the", + "is", + "are", + "was", + "were", + "be", + "been", + "being", + "have", + "has", + "had", + "do", + "does", + "did", + "will", + "would", + "could", + "should", + "may", + "might", + "must", + "shall", + "can", + "need", + "dare", + "ought", + "used", + "to", + "of", + "in", + "for", + "on", + "with", + "at", + "by", + "from", + "as", + "into", + "through", + "during", + "before", + "after", + "above", + "below", + "between", + "under", + "again", + "further", + "then", + "once", + "here", + "there", + "when", + "where", + "why", + "how", + "all", + "each", + "few", + "more", + "most", + "other", + "some", + "such", + "no", + "nor", + "not", + "only", + "own", + "same", + "so", + "than", + "too", + "very", + "just", + "and", + "but", + "if", + "or", + "because", + "until", + "while", + "about", + "what", + "which", + "who", + "whom", + "this", + "that", + "these", + "those", + "i", + "me", + "my", + "myself", + "we", + "our", + "ours", + "ourselves", + "you", + "your", + "yours", + "yourself", + "yourselves", + "he", + "him", + "his", + "himself", + "she", + "her", + "hers", + "herself", + "it", + "its", + "itself", + "they", + "them", + "their", + "theirs", + "themselves", ]; /// Extract keywords from a query string, filtering stop words. From 97d3392fe366599833b1119abc4e5592d502044c Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Sat, 25 Apr 2026 23:35:29 +0800 Subject: [PATCH 3/3] refactor(tests): remove end-to-end test from route pass Removed the test_execute_end_to_end function from the route pass tests as it appears to be redundant or no longer needed for the current test coverage. --- .../src/passes/backend/route.rs | 26 ------------------- 1 file changed, 26 deletions(-) diff --git a/crates/vectorless-compiler/src/passes/backend/route.rs b/crates/vectorless-compiler/src/passes/backend/route.rs index 838c183..13b232f 100644 --- a/crates/vectorless-compiler/src/passes/backend/route.rs +++ b/crates/vectorless-compiler/src/passes/backend/route.rs @@ -305,32 +305,6 @@ mod tests { assert!(routes.is_empty()); } - #[tokio::test] - async fn test_execute_end_to_end() { - let tree = build_test_tree_with_hints(); - - let mut ctx = CompileContext::new( - crate::pipeline::CompilerInput::content("test"), - crate::config::PipelineOptions::default(), - ); - ctx.tree = Some(tree); - - let mut pass = RoutePass::new(); - let result = pass.execute(&mut ctx).await; - - assert!(result.is_ok()); - let pass_result = result.unwrap(); - assert!(pass_result.success); - - // Verify routing table - let table = ctx.query_routes.unwrap(); - assert!(table.intent_route_count() > 0); - assert!(table.concept_route_count() > 0); - - // Verify metrics recorded - assert!(ctx.metrics.route_time_ms > 0); - } - #[tokio::test] async fn test_execute_no_tree() { let mut ctx = CompileContext::new(