From 95c256a4c4e53a0cb44afa3aab22bc50dc328ad0 Mon Sep 17 00:00:00 2001 From: rUv Date: Wed, 25 Feb 2026 13:32:21 +0000 Subject: [PATCH] fix(security): path traversal in MCP server vector_db_backup (CWE-22) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add path validation to all MCP tools that accept user-supplied file paths, preventing arbitrary file read/write via directory traversal. Vulnerable functions patched: - tool_backup: db_path and backup_path now validated - tool_create_db: params.path now validated - get_or_open_db: path now validated Implementation: - validate_path() canonicalizes paths and checks they resolve within the configured data_dir (defaults to cwd) - Configurable via mcp.data_dir in config or RUVECTOR_MCP_DATA_DIR env - Rejects absolute paths outside data_dir, ../traversal, and symlink escapes - 8 unit tests covering all POC attack vectors from the report CVSS 3.1: 9.1 (Critical) → Mitigated Closes #207 Co-Authored-By: claude-flow --- crates/ruvector-cli/src/config.rs | 17 ++ crates/ruvector-cli/src/mcp/handlers.rs | 219 +++++++++++++++++++++++- 2 files changed, 228 insertions(+), 8 deletions(-) diff --git a/crates/ruvector-cli/src/config.rs b/crates/ruvector-cli/src/config.rs index b2782e932..d3b705268 100644 --- a/crates/ruvector-cli/src/config.rs +++ b/crates/ruvector-cli/src/config.rs @@ -75,6 +75,12 @@ pub struct McpConfig { /// Enable CORS #[serde(default = "default_true")] pub cors: bool, + + /// Allowed data directory for MCP file operations (path confinement) + /// All db_path and backup_path values must resolve within this directory. + /// Defaults to the current working directory. + #[serde(default = "default_data_dir")] + pub data_dir: String, } // Default value functions @@ -98,6 +104,12 @@ fn default_batch_size() -> usize { 1000 } +fn default_data_dir() -> String { + std::env::current_dir() + .map(|p| p.to_string_lossy().to_string()) + .unwrap_or_else(|_| ".".to_string()) +} + fn default_host() -> String { "127.0.0.1".to_string() } @@ -144,6 +156,7 @@ impl Default for McpConfig { host: default_host(), port: default_port(), cors: true, + data_dir: default_data_dir(), } } } @@ -219,6 +232,10 @@ impl Config { self.mcp.port = port.parse().context("Invalid RUVECTOR_MCP_PORT")?; } + if let Ok(data_dir) = std::env::var("RUVECTOR_MCP_DATA_DIR") { + self.mcp.data_dir = data_dir; + } + Ok(()) } diff --git a/crates/ruvector-cli/src/mcp/handlers.rs b/crates/ruvector-cli/src/mcp/handlers.rs index 179adbf37..006864fd2 100644 --- a/crates/ruvector-cli/src/mcp/handlers.rs +++ b/crates/ruvector-cli/src/mcp/handlers.rs @@ -11,6 +11,7 @@ use ruvector_core::{ use ruvector_gnn::{compress::TensorCompress, search::differentiable_search}; use serde_json::{json, Value}; use std::collections::HashMap; +use std::path::{Path, PathBuf}; use std::sync::Arc; use std::time::Instant; use tokio::sync::RwLock; @@ -23,17 +24,24 @@ pub struct McpHandler { gnn_cache: Arc, /// Tensor compressor for GNN operations tensor_compress: Arc, + /// Allowed base directory for all file operations (path confinement) + allowed_data_dir: PathBuf, } impl McpHandler { pub fn new(config: Config) -> Self { let gnn_cache = Arc::new(GnnCache::new(GnnCacheConfig::default())); + let allowed_data_dir = PathBuf::from(&config.mcp.data_dir); + // Canonicalize at startup so all later comparisons are absolute + let allowed_data_dir = std::fs::canonicalize(&allowed_data_dir) + .unwrap_or_else(|_| std::env::current_dir().unwrap_or_else(|_| PathBuf::from("/"))); Self { config, databases: Arc::new(RwLock::new(HashMap::new())), gnn_cache, tensor_compress: Arc::new(TensorCompress::new()), + allowed_data_dir, } } @@ -44,6 +52,61 @@ impl McpHandler { handler } + /// Validate that a user-supplied path resolves within the allowed data directory. + /// + /// Prevents CWE-22 path traversal by: + /// 1. Resolving the path relative to `allowed_data_dir` (not cwd) + /// 2. Canonicalizing to eliminate `..`, symlinks, and other tricks + /// 3. Checking that the canonical path starts with the allowed directory + fn validate_path(&self, user_path: &str) -> Result { + // Reject obviously malicious absolute paths outside data dir + let path = Path::new(user_path); + + // If relative, resolve against allowed_data_dir + let resolved = if path.is_absolute() { + PathBuf::from(user_path) + } else { + self.allowed_data_dir.join(user_path) + }; + + // For existing paths, canonicalize resolves symlinks and .. + // For non-existing paths, canonicalize the parent and append the filename + let canonical = if resolved.exists() { + std::fs::canonicalize(&resolved) + .with_context(|| format!("Failed to resolve path: {}", user_path))? + } else { + // Canonicalize the parent directory (must exist), then append filename + let parent = resolved.parent().unwrap_or(Path::new("/")); + let parent_canonical = if parent.exists() { + std::fs::canonicalize(parent) + .with_context(|| format!("Parent directory does not exist: {}", parent.display()))? + } else { + // Create the parent directory within allowed_data_dir if it doesn't exist + anyhow::bail!( + "Path '{}' references non-existent directory '{}'", + user_path, + parent.display() + ); + }; + let filename = resolved + .file_name() + .ok_or_else(|| anyhow::anyhow!("Invalid path: no filename in '{}'", user_path))?; + parent_canonical.join(filename) + }; + + // Security check: canonical path must be inside allowed_data_dir + if !canonical.starts_with(&self.allowed_data_dir) { + anyhow::bail!( + "Access denied: path '{}' resolves to '{}' which is outside the allowed data directory '{}'", + user_path, + canonical.display(), + self.allowed_data_dir.display() + ); + } + + Ok(canonical) + } + /// Handle MCP request pub async fn handle_request(&self, request: McpRequest) -> McpResponse { match request.method.as_str() { @@ -387,8 +450,11 @@ impl McpHandler { let params: CreateDbParams = serde_json::from_value(args.clone()).context("Invalid parameters")?; + // Validate path to prevent directory traversal (CWE-22) + let validated_path = self.validate_path(¶ms.path)?; + let mut db_options = self.config.to_db_options(); - db_options.storage_path = params.path.clone(); + db_options.storage_path = validated_path.to_string_lossy().to_string(); db_options.dimensions = params.dimensions; if let Some(metric) = params.distance_metric { @@ -402,12 +468,13 @@ impl McpHandler { } let db = VectorDB::new(db_options)?; + let path_str = validated_path.to_string_lossy().to_string(); self.databases .write() .await - .insert(params.path.clone(), Arc::new(db)); + .insert(path_str.clone(), Arc::new(db)); - Ok(format!("Database created at: {}", params.path)) + Ok(format!("Database created at: {}", path_str)) } async fn tool_insert(&self, args: &Value) -> Result { @@ -461,27 +528,39 @@ impl McpHandler { async fn tool_backup(&self, args: &Value) -> Result { let params: BackupParams = serde_json::from_value(args.clone())?; - std::fs::copy(¶ms.db_path, ¶ms.backup_path).context("Failed to backup database")?; + // Validate both paths to prevent directory traversal (CWE-22) + let validated_db_path = self.validate_path(¶ms.db_path)?; + let validated_backup_path = self.validate_path(¶ms.backup_path)?; + + std::fs::copy(&validated_db_path, &validated_backup_path) + .context("Failed to backup database")?; - Ok(format!("Backed up to: {}", params.backup_path)) + Ok(format!( + "Backed up to: {}", + validated_backup_path.display() + )) } async fn get_or_open_db(&self, path: &str) -> Result> { + // Validate path to prevent directory traversal (CWE-22) + let validated_path = self.validate_path(path)?; + let path_str = validated_path.to_string_lossy().to_string(); + let databases = self.databases.read().await; - if let Some(db) = databases.get(path) { + if let Some(db) = databases.get(&path_str) { return Ok(db.clone()); } drop(databases); // Open new database let mut db_options = self.config.to_db_options(); - db_options.storage_path = path.to_string(); + db_options.storage_path = path_str.clone(); let db = Arc::new(VectorDB::new(db_options)?); self.databases .write() .await - .insert(path.to_string(), db.clone()); + .insert(path_str, db.clone()); Ok(db) } @@ -734,3 +813,127 @@ impl McpHandler { .to_string()) } } + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::tempdir; + + fn handler_with_data_dir(data_dir: &Path) -> McpHandler { + let mut config = Config::default(); + config.mcp.data_dir = data_dir.to_string_lossy().to_string(); + McpHandler::new(config) + } + + #[test] + fn test_validate_path_allows_relative_within_data_dir() { + let dir = tempdir().unwrap(); + let handler = handler_with_data_dir(dir.path()); + + // Create a file to validate against + std::fs::write(dir.path().join("test.db"), b"test").unwrap(); + + let result = handler.validate_path("test.db"); + assert!(result.is_ok(), "Should allow relative path within data dir"); + assert!(result.unwrap().starts_with(dir.path())); + } + + #[test] + fn test_validate_path_blocks_absolute_outside_data_dir() { + let dir = tempdir().unwrap(); + let handler = handler_with_data_dir(dir.path()); + + let result = handler.validate_path("/etc/passwd"); + assert!(result.is_err(), "Should block /etc/passwd"); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("outside the allowed data directory"), + "Error should mention path confinement: {}", + err + ); + } + + #[test] + fn test_validate_path_blocks_dot_dot_traversal() { + let dir = tempdir().unwrap(); + // Create a subdir so ../.. resolves to something real + let subdir = dir.path().join("sub"); + std::fs::create_dir_all(&subdir).unwrap(); + let handler = handler_with_data_dir(&subdir); + + let result = handler.validate_path("../../../etc/passwd"); + assert!( + result.is_err(), + "Should block ../ traversal: {:?}", + result + ); + } + + #[test] + fn test_validate_path_blocks_dot_dot_in_middle() { + let dir = tempdir().unwrap(); + let handler = handler_with_data_dir(dir.path()); + + // Create the inner directory + std::fs::create_dir_all(dir.path().join("a")).unwrap(); + + let result = handler.validate_path("a/../../etc/passwd"); + assert!( + result.is_err(), + "Should block ../ in the middle of path" + ); + } + + #[test] + fn test_validate_path_allows_subdirectory_within_data_dir() { + let dir = tempdir().unwrap(); + let handler = handler_with_data_dir(dir.path()); + + // Create subdirectory + std::fs::create_dir_all(dir.path().join("backups")).unwrap(); + + let result = handler.validate_path("backups/mydb.bak"); + assert!( + result.is_ok(), + "Should allow path in subdirectory: {:?}", + result + ); + assert!(result.unwrap().starts_with(dir.path())); + } + + #[test] + fn test_validate_path_allows_new_file_in_data_dir() { + let dir = tempdir().unwrap(); + let handler = handler_with_data_dir(dir.path()); + + let result = handler.validate_path("new_database.db"); + assert!( + result.is_ok(), + "Should allow new file in data dir: {:?}", + result + ); + } + + #[test] + fn test_validate_path_blocks_absolute_path_to_etc() { + let dir = tempdir().unwrap(); + let handler = handler_with_data_dir(dir.path()); + + // Test all 3 POCs from the issue + for path in &["/etc/passwd", "/etc/shadow", "/etc/hosts"] { + let result = handler.validate_path(path); + assert!(result.is_err(), "Should block {}", path); + } + } + + #[test] + fn test_validate_path_blocks_home_ssh_keys() { + let dir = tempdir().unwrap(); + let handler = handler_with_data_dir(dir.path()); + + let result = handler.validate_path("~/.ssh/id_rsa"); + // This is a relative path so it won't expand ~, but test the principle + let result2 = handler.validate_path("/root/.ssh/id_rsa"); + assert!(result2.is_err(), "Should block /root/.ssh/id_rsa"); + } +}