From d1b7b4149c41e72d56841f438f0a58bb0fe1c1ae Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Sat, 11 Apr 2026 21:37:21 +0800 Subject: [PATCH 1/3] feat(storage): change metadata filename from _meta.json to meta.bin Change the metadata index filename from _meta.json to meta.bin in both the documentation comments and the actual implementation to remove the underscore prefix and change the file extension. --- rust/src/storage/backend/file.rs | 2 +- rust/src/storage/workspace.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/src/storage/backend/file.rs b/rust/src/storage/backend/file.rs index 2933d806..ab461fe7 100644 --- a/rust/src/storage/backend/file.rs +++ b/rust/src/storage/backend/file.rs @@ -24,7 +24,7 @@ use crate::error::Result; /// workspace/ /// ├── doc-1.bin # Document 1 /// ├── doc-2.bin # Document 2 -/// ├── _meta.json # Metadata index +/// ├── meta.bin # Metadata index /// └── .workspace.lock # Lock file /// ``` /// diff --git a/rust/src/storage/workspace.rs b/rust/src/storage/workspace.rs index 052e9b75..476df6fe 100644 --- a/rust/src/storage/workspace.rs +++ b/rust/src/storage/workspace.rs @@ -46,7 +46,7 @@ use super::persistence::{PersistedDocument, load_document_from_bytes, save_docum use crate::Error; use crate::error::Result; -const META_KEY: &str = "_meta"; +const META_KEY: &str = "meta"; const DEFAULT_CACHE_SIZE: usize = 100; /// Lightweight metadata entry for the async workspace index. From d1680e63a1c17790525692bf62b83f2c6cb1c7e7 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Sat, 11 Apr 2026 21:41:35 +0800 Subject: [PATCH 2/3] refactor(storage): change metadata file extension from JSON to binary - Update test file extension from "_meta.json" to "meta.bin" - This change reflects the actual binary format used for metadata storage --- rust/src/storage/persistence.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/src/storage/persistence.rs b/rust/src/storage/persistence.rs index a1c2d9e8..7dd8cbcc 100644 --- a/rust/src/storage/persistence.rs +++ b/rust/src/storage/persistence.rs @@ -760,7 +760,7 @@ mod tests { #[test] fn test_save_and_load_index() { let temp = TempDir::new().unwrap(); - let path = temp.path().join("_meta.json"); + let path = temp.path().join("meta.bin"); let mut entries = Vec::new(); entries.push(DocumentMeta::new("doc-1", "Doc 1", "md")); From 07ebb59c1d5713e3a43e05720577e030a005dce8 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Sat, 11 Apr 2026 22:10:54 +0800 Subject: [PATCH 3/3] feat(engine): make api_key and model required parameters BREAKING CHANGE: Removed environment variable auto-loading for API key and model. Now api_key and model must be explicitly provided through constructor parameters or config file. - Updated Python and Rust Engine constructors to require api_key and model parameters - Removed automatic loading of OPENAI_API_KEY environment variable - Removed configuration priority system for environment variables - Updated README documentation to reflect new required parameters - Added validation to ensure api_key and model are provided before engine initialization --- README.md | 16 ++-- python/src/lib.rs | 50 ++++------ rust/src/client/builder.rs | 173 +++++++++-------------------------- rust/src/config/loader.rs | 81 +--------------- rust/src/config/types/mod.rs | 2 +- rust/src/lib.rs | 2 + 6 files changed, 75 insertions(+), 249 deletions(-) diff --git a/README.md b/README.md index 0bfc7e60..cfd41a3a 100644 --- a/README.md +++ b/README.md @@ -24,19 +24,17 @@ pip install vectorless ``` -### Set your API key - -```bash -export OPENAI_API_KEY="sk-..." -``` - ### Index and Query ```python from vectorless import Engine, IndexContext -# Create engine with a workspace directory -engine = Engine(workspace="./data") +# Create engine — api_key and model are required +engine = Engine( + workspace="./data", + api_key="sk-...", + model="gpt-4o", +) # Index a document (PDF or Markdown) result = engine.index(IndexContext.from_file("./report.pdf")) @@ -63,6 +61,8 @@ use vectorless::client::{EngineBuilder, IndexContext, QueryContext}; async fn main() -> vectorless::Result<()> { let engine = EngineBuilder::new() .with_workspace("./data") + .with_key("sk-...") + .with_model("gpt-4o") .build() .await?; diff --git a/python/src/lib.rs b/python/src/lib.rs index f240c2c1..e385d4b6 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -452,32 +452,29 @@ impl PyDocumentInfo { /// The main vectorless engine. /// -/// Configuration priority (later overrides earlier): -/// 1. Default configuration -/// 2. Explicit config file (config_path parameter) -/// 3. Environment variables (OPENAI_API_KEY, VECTORLESS_MODEL, etc.) -/// 4. Constructor parameters (api_key, model, endpoint) - highest priority +/// `api_key` and `model` are **required**. /// -/// # Zero Configuration (Recommended) -/// -/// Just set OPENAI_API_KEY environment variable: +/// # Example /// /// ```python /// from vectorless import Engine /// -/// engine = Engine(workspace="./data") -/// ``` -/// -/// # With Custom Model -/// -/// ```python -/// engine = Engine(workspace="./data", model="gpt-4o-mini") +/// engine = Engine( +/// workspace="./data", +/// api_key="sk-...", +/// model="gpt-4o", +/// ) /// ``` /// -/// # With Config File (Advanced) +/// # With Custom Endpoint /// /// ```python -/// engine = Engine(workspace="./data", config_path="./vectorless.toml") +/// engine = Engine( +/// workspace="./data", +/// api_key="sk-...", +/// model="deepseek-chat", +/// endpoint="https://api.deepseek.com/v1", +/// ) /// ``` #[pyclass(name = "Engine")] pub struct PyEngine { @@ -492,18 +489,12 @@ impl PyEngine { /// Args: /// workspace: Path to the workspace directory (optional if config_path provides it). /// config_path: Path to configuration file (optional, advanced usage). - /// api_key: Optional API key. If not provided, uses OPENAI_API_KEY env var. - /// model: Optional model name. Default: "gpt-4o". - /// endpoint: Optional API endpoint. - /// - /// Configuration priority (later overrides earlier): - /// 1. Default configuration - /// 2. config_path parameter (if provided) - /// 3. Environment variables (OPENAI_API_KEY, VECTORLESS_MODEL, etc.) - /// 4. Constructor parameters (api_key, model, endpoint) + /// api_key: **Required**. LLM API key. + /// model: **Required**. LLM model name (e.g., "gpt-4o", "deepseek-chat"). + /// endpoint: Optional API endpoint (e.g., "https://api.deepseek.com/v1"). /// /// Raises: - /// VectorlessError: If engine creation fails. + /// VectorlessError: If engine creation fails (missing api_key/model, workspace error, etc.). #[new] #[pyo3(signature = (workspace=None, config_path=None, api_key=None, model=None, endpoint=None))] fn new( @@ -520,9 +511,6 @@ impl PyEngine { )) })?; - // Resolve API key: explicit > env var - let resolved_api_key = api_key.or_else(|| std::env::var("OPENAI_API_KEY").ok()); - let engine = rt.block_on(async { let mut builder = EngineBuilder::new(); @@ -544,7 +532,7 @@ impl PyEngine { builder = builder.with_endpoint(e); } - if let Some(key) = resolved_api_key { + if let Some(key) = api_key { builder = builder.with_key(key); } diff --git a/rust/src/client/builder.rs b/rust/src/client/builder.rs index 2dffe492..1bc9b927 100644 --- a/rust/src/client/builder.rs +++ b/rust/src/client/builder.rs @@ -6,43 +6,34 @@ //! This module provides [`EngineBuilder`] for configuring and building //! [`Engine`] instances with sensible defaults. //! -//! # Configuration Priority +//! # Configuration //! -//! Configuration is applied in this order (later overrides earlier): -//! 1. Default configuration -//! 2. Auto-detected config file (`vectorless.toml`, `config.toml`, `.vectorless.toml`) -//! 3. Explicit config file (`with_config_path`) -//! 4. Environment variables (`OPENAI_API_KEY`, `VECTORLESS_MODEL`, etc.) -//! 5. Builder methods (`with_openai`, `with_model`, etc.) - highest priority -//! -//! # Environment Variables +//! `api_key` and `model` are **required**. `endpoint` is optional +//! (defaults to the model provider's standard endpoint). //! -//! | Variable | Description | -//! |----------|-------------| -//! | `OPENAI_API_KEY` | LLM API key | -//! | `VECTORLESS_MODEL` | Default model name | -//! | `VECTORLESS_ENDPOINT` | API endpoint URL | -//! | `VECTORLESS_WORKSPACE` | Workspace directory | +//! Configuration sources (later overrides earlier): +//! 1. Default configuration +//! 2. Config file (via `with_config_path`) +//! 3. Builder methods (`with_key`, `with_model`, etc.) — highest priority //! //! # Examples //! -//! ## Zero Configuration (Recommended) -//! //! ```rust,no_run //! use vectorless::client::EngineBuilder; //! //! # #[tokio::main] //! # async fn main() -> Result<(), vectorless::BuildError> { -//! // Just set OPENAI_API_KEY environment variable //! let engine = EngineBuilder::new() //! .with_workspace("./data") +//! .with_key("sk-...") +//! .with_model("gpt-4o") //! .build() //! .await?; //! # Ok(()) //! # } //! ``` //! -//! ## With Custom Model +//! ## With Custom Endpoint //! //! ```rust,no_run //! use vectorless::client::EngineBuilder; @@ -51,22 +42,9 @@ //! # async fn main() -> Result<(), vectorless::BuildError> { //! let engine = EngineBuilder::new() //! .with_workspace("./data") -//! .with_model("gpt-4o-mini", None) // Uses OPENAI_API_KEY from env -//! .build() -//! .await?; -//! # Ok(()) -//! # } -//! ``` -//! -//! ## With Full Config File (Advanced) -//! -//! ```rust,no_run -//! use vectorless::client::EngineBuilder; -//! -//! # #[tokio::main] -//! # async fn main() -> Result<(), vectorless::BuildError> { -//! let engine = EngineBuilder::new() -//! .with_config_path("./vectorless.toml") +//! .with_key("sk-...") +//! .with_model("deepseek-chat") +//! .with_endpoint("https://api.deepseek.com/v1") //! .build() //! .await?; //! # Ok(()) @@ -85,26 +63,8 @@ use super::events::EventEmitter; /// Builder for creating a [`Engine`] client. /// -/// The builder uses sensible defaults and automatically loads -/// configuration from config files and environment variables. -/// -/// # Configuration Priority -/// -/// Configuration is applied in this order (later overrides earlier): -/// 1. Default configuration -/// 2. Auto-detected config file (`vectorless.toml`, `config.toml`, `.vectorless.toml`) -/// 3. Explicit config file (`with_config_path`) -/// 4. Environment variables (`OPENAI_API_KEY`, `VECTORLESS_MODEL`, etc.) -/// 5. Builder methods (`with_openai`, `with_model`, etc.) - highest priority -/// -/// # Environment Variables -/// -/// | Variable | Description | -/// |----------|-------------| -/// | `OPENAI_API_KEY` | LLM API key | -/// | `VECTORLESS_MODEL` | Default model name | -/// | `VECTORLESS_ENDPOINT` | API endpoint URL | -/// | `VECTORLESS_WORKSPACE` | Workspace directory | +/// `api_key` and `model` are required and must be set via builder methods +/// or provided through a config file. /// /// # Example /// @@ -113,9 +73,10 @@ use super::events::EventEmitter; /// /// # #[tokio::main] /// # async fn main() -> Result<(), vectorless::BuildError> { -/// // Zero configuration - just set OPENAI_API_KEY environment variable /// let client = EngineBuilder::new() /// .with_workspace("./my_workspace") +/// .with_key("sk-...") +/// .with_model("gpt-4o") /// .build() /// .await?; /// # Ok(()) @@ -211,9 +172,7 @@ impl EngineBuilder { /// Set the configuration file path. /// - /// If not set, the builder searches for `vectorless.toml`, - /// `config.toml`, or `.vectorless.toml` in the current directory - /// and parent directories. + /// The file must be a valid TOML configuration. No auto-detection is performed. #[must_use] pub fn with_config_path(mut self, path: impl Into) -> Self { self.config_path = Some(path.into()); @@ -280,9 +239,7 @@ impl EngineBuilder { // LLM Configuration // ============================================================ - /// Set the LLM API key. - /// - /// If not set, reads from `OPENAI_API_KEY` environment variable. + /// Set the LLM API key. **Required**. /// /// # Example /// @@ -399,67 +356,16 @@ impl EngineBuilder { self } - /// Apply environment variable overrides to a Config. - /// - /// This is used when a custom Config is provided via `with_config` - /// or when using default config without a config file. - fn apply_env_overrides(config: &mut Config) { - // OPENAI_API_KEY: Set API key for all LLM clients - if let Ok(api_key) = std::env::var("OPENAI_API_KEY") { - // Set default API key - config.llm.api_key = Some(api_key.clone()); - // Override individual client API keys if not explicitly set - if config.llm.summary.api_key.is_none() { - config.llm.summary.api_key = Some(api_key.clone()); - } - if config.llm.retrieval.api_key.is_none() { - config.llm.retrieval.api_key = Some(api_key.clone()); - } - if config.llm.pilot.api_key.is_none() { - config.llm.pilot.api_key = Some(api_key); - } - // Also set legacy config for backwards compatibility - if config.summary.api_key.is_none() { - if let Ok(api_key) = std::env::var("OPENAI_API_KEY") { - config.summary.api_key = Some(api_key); - } - } - } - - // VECTORLESS_MODEL: Set default model - if let Ok(model) = std::env::var("VECTORLESS_MODEL") { - config.llm.summary.model = model.clone(); - config.llm.retrieval.model = model.clone(); - config.llm.pilot.model = model.clone(); - // Also set legacy config - config.summary.model = model.clone(); - config.retrieval.model = model; - } - - // VECTORLESS_ENDPOINT: Set API endpoint - if let Ok(endpoint) = std::env::var("VECTORLESS_ENDPOINT") { - config.llm.summary.endpoint = endpoint.clone(); - config.llm.retrieval.endpoint = endpoint.clone(); - config.llm.pilot.endpoint = endpoint.clone(); - // Also set legacy config - config.summary.endpoint = endpoint.clone(); - config.retrieval.endpoint = endpoint; - } - - // VECTORLESS_WORKSPACE: Set workspace directory - if let Ok(workspace) = std::env::var("VECTORLESS_WORKSPACE") { - config.storage.workspace_dir = PathBuf::from(workspace); - } - } - /// Build the Engine client. /// + /// `api_key` and `model` must be provided via builder methods or config file. + /// /// # Errors /// /// Returns a [`BuildError`] if: /// - Configuration loading fails /// - Workspace creation fails - /// - Required API key is missing + /// - Required `api_key` or `model` is missing /// /// # Example /// @@ -470,7 +376,8 @@ impl EngineBuilder { /// # async fn main() -> Result<(), vectorless::BuildError> { /// let engine = EngineBuilder::new() /// .with_workspace("./data") - /// .with_key(std::env::var("OPENAI_API_KEY").unwrap()) + /// .with_key("sk-...") + /// .with_model("gpt-4o") /// .build() /// .await?; /// # Ok(()) @@ -478,22 +385,15 @@ impl EngineBuilder { /// ``` pub async fn build(self) -> Result { // Load or create configuration - // ConfigLoader automatically applies environment variable overrides let mut config = if let Some(config) = self.config { - // Custom config - still apply env vars - let mut cfg = config; - Self::apply_env_overrides(&mut cfg); - cfg + config } else if let Some(path) = self.config_path { ConfigLoader::new() .file(&path) .load() .map_err(|e| BuildError::Config(e.to_string()))? } else { - // No config file - use defaults with env var overrides - let mut cfg = Config::default(); - Self::apply_env_overrides(&mut cfg); - cfg + Config::default() }; // Apply builder overrides to retrieval config @@ -534,6 +434,14 @@ impl EngineBuilder { config.retrieval.search.max_iterations = 100; } + // Validate required settings + if config.summary.api_key.is_none() && config.retrieval.api_key.is_none() { + return Err(BuildError::MissingApiKey); + } + if config.retrieval.model.is_empty() { + return Err(BuildError::MissingModel); + } + // Open workspace: prefer explicit path, fallback to config let workspace_path = self .workspace @@ -563,8 +471,7 @@ impl EngineBuilder { let mut retriever = PipelineRetriever::new().with_max_iterations(retrieval_config.search.max_iterations); - // LLM API key is REQUIRED for retrieval (Pilot needs it for semantic navigation) - // Try retrieval config first, then fall back to summary config + // Resolve API key: retrieval config first, then summary config let retrieval_api_key = retrieval_config .api_key .clone() @@ -620,12 +527,14 @@ pub enum BuildError { #[error("Workspace error: {0}")] Workspace(String), - /// Missing API key for retrieval. - #[error( - "Missing API key: LLM API key is required for retrieval. Set OPENAI_API_KEY environment variable or configure retrieval.api_key" - )] + /// Missing API key. + #[error("Missing API key: call .with_key(\"sk-...\") or set api_key in config file")] MissingApiKey, + /// Missing model name. + #[error("Missing model: call .with_model(\"gpt-4o\") or set model in config file")] + MissingModel, + /// Other error. #[error("{0}")] Other(String), diff --git a/rust/src/config/loader.rs b/rust/src/config/loader.rs index 4fad51eb..578d0aa0 100644 --- a/rust/src/config/loader.rs +++ b/rust/src/config/loader.rs @@ -3,33 +3,22 @@ //! Configuration loader. //! -//! Loads configuration from TOML files with environment variable overrides. +//! Loads configuration from TOML files. //! //! # Configuration Priority //! //! Configuration is loaded in this order (later overrides earlier): //! 1. Default configuration -//! 2. Config file (if found or specified) -//! 3. Environment variables -//! -//! # Environment Variables -//! -//! | Variable | Description | Maps To | -//! |----------|-------------|---------| -//! | `OPENAI_API_KEY` | LLM API key | `llm.api_key` / `retrieval.api_key` | -//! | `VECTORLESS_MODEL` | Default LLM model | `retrieval.model` | -//! | `VECTORLESS_ENDPOINT` | LLM API endpoint | `retrieval.endpoint` | -//! | `VECTORLESS_WORKSPACE` | Workspace directory | `storage.workspace_dir` | +//! 2. Config file(s) //! //! # Example //! //! ```rust,no_run //! use vectorless::config::{ConfigLoader, Config}; //! -//! // Load from file with environment variable overrides +//! // Load from file //! let config = ConfigLoader::new() //! .file("config.toml") -//! .with_env(true) // Enable environment variables (default: true) //! .load()?; //! //! // Load with validation @@ -89,9 +78,6 @@ pub struct ConfigLoader { /// Custom validator (optional). validator: Option, - - /// Whether to apply environment variable overrides. - env_enabled: bool, } impl Default for ConfigLoader { @@ -107,7 +93,6 @@ impl ConfigLoader { files: Vec::new(), validate: false, validator: None, - env_enabled: true, } } @@ -142,68 +127,13 @@ impl ConfigLoader { self } - /// Enable or disable environment variable overrides. - /// - /// When enabled (default), environment variables override config file values: - /// - `OPENAI_API_KEY` → sets API key for all LLM clients - /// - `VECTORLESS_MODEL` → sets default model - /// - `VECTORLESS_ENDPOINT` → sets API endpoint - /// - `VECTORLESS_WORKSPACE` → sets workspace directory - pub fn with_env(mut self, enabled: bool) -> Self { - self.env_enabled = enabled; - self - } - - /// Apply environment variable overrides to configuration. - fn apply_env_overrides(&self, config: &mut Config) { - if !self.env_enabled { - return; - } - - // OPENAI_API_KEY: Set API key for all LLM clients - if let Ok(api_key) = std::env::var("OPENAI_API_KEY") { - // Set default API key - config.llm.api_key = Some(api_key.clone()); - // Override individual client API keys if not explicitly set - if config.llm.summary.api_key.is_none() { - config.llm.summary.api_key = Some(api_key.clone()); - } - if config.llm.retrieval.api_key.is_none() { - config.llm.retrieval.api_key = Some(api_key.clone()); - } - if config.llm.pilot.api_key.is_none() { - config.llm.pilot.api_key = Some(api_key); - } - } - - // VECTORLESS_MODEL: Set default model - if let Ok(model) = std::env::var("VECTORLESS_MODEL") { - config.llm.summary.model = model.clone(); - config.llm.retrieval.model = model.clone(); - config.llm.pilot.model = model; - } - - // VECTORLESS_ENDPOINT: Set API endpoint - if let Ok(endpoint) = std::env::var("VECTORLESS_ENDPOINT") { - config.llm.summary.endpoint = endpoint.clone(); - config.llm.retrieval.endpoint = endpoint.clone(); - config.llm.pilot.endpoint = endpoint; - } - - // VECTORLESS_WORKSPACE: Set workspace directory - if let Ok(workspace) = std::env::var("VECTORLESS_WORKSPACE") { - config.storage.workspace_dir = PathBuf::from(workspace); - } - } - /// Load the configuration. /// /// # Behavior /// /// 1. Start with default configuration /// 2. Load and merge each specified file (in order) - /// 3. Apply environment variable overrides (if enabled) - /// 4. Validate configuration (if enabled) + /// 3. Validate configuration (if enabled) /// /// # Errors /// @@ -225,9 +155,6 @@ impl ConfigLoader { } } - // Apply environment variable overrides - self.apply_env_overrides(&mut config); - // Validate if requested if self.validate { let validator = self.validator.unwrap_or_default(); diff --git a/rust/src/config/types/mod.rs b/rust/src/config/types/mod.rs index e6735072..ae89fbe4 100644 --- a/rust/src/config/types/mod.rs +++ b/rust/src/config/types/mod.rs @@ -4,7 +4,7 @@ //! Configuration type definitions. //! //! All configuration values are defined inline in `Default` trait implementations. -//! Configuration is loaded from TOML files only - no environment variable magic. +//! Configuration is loaded from TOML files only — no environment variables, no auto-detection. mod concurrency; mod content; diff --git a/rust/src/lib.rs b/rust/src/lib.rs index b34019c4..b128115f 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -28,6 +28,8 @@ //! async fn main() -> Result<(), Box> { //! let client = EngineBuilder::new() //! .with_workspace("./workspace") +//! .with_key("sk-...") +//! .with_model("gpt-4o") //! .build() //! .await?; //!