Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions crates/ruvector-cli/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,12 @@ pub struct McpConfig {
/// Enable CORS
#[serde(default = "default_true")]
pub cors: bool,

/// Allowed data directory for MCP file operations (path confinement)
/// All db_path and backup_path values must resolve within this directory.
/// Defaults to the current working directory.
#[serde(default = "default_data_dir")]
pub data_dir: String,
}

// Default value functions
Expand All @@ -98,6 +104,12 @@ fn default_batch_size() -> usize {
1000
}

fn default_data_dir() -> String {
std::env::current_dir()
.map(|p| p.to_string_lossy().to_string())
.unwrap_or_else(|_| ".".to_string())
}

fn default_host() -> String {
"127.0.0.1".to_string()
}
Expand Down Expand Up @@ -144,6 +156,7 @@ impl Default for McpConfig {
host: default_host(),
port: default_port(),
cors: true,
data_dir: default_data_dir(),
}
}
}
Expand Down Expand Up @@ -219,6 +232,10 @@ impl Config {
self.mcp.port = port.parse().context("Invalid RUVECTOR_MCP_PORT")?;
}

if let Ok(data_dir) = std::env::var("RUVECTOR_MCP_DATA_DIR") {
self.mcp.data_dir = data_dir;
}

Ok(())
}

Expand Down
219 changes: 211 additions & 8 deletions crates/ruvector-cli/src/mcp/handlers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use ruvector_core::{
use ruvector_gnn::{compress::TensorCompress, search::differentiable_search};
use serde_json::{json, Value};
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::Instant;
use tokio::sync::RwLock;
Expand All @@ -23,17 +24,24 @@ pub struct McpHandler {
gnn_cache: Arc<GnnCache>,
/// Tensor compressor for GNN operations
tensor_compress: Arc<TensorCompress>,
/// Allowed base directory for all file operations (path confinement)
allowed_data_dir: PathBuf,
}

impl McpHandler {
pub fn new(config: Config) -> Self {
let gnn_cache = Arc::new(GnnCache::new(GnnCacheConfig::default()));
let allowed_data_dir = PathBuf::from(&config.mcp.data_dir);
// Canonicalize at startup so all later comparisons are absolute
let allowed_data_dir = std::fs::canonicalize(&allowed_data_dir)
.unwrap_or_else(|_| std::env::current_dir().unwrap_or_else(|_| PathBuf::from("/")));

Self {
config,
databases: Arc::new(RwLock::new(HashMap::new())),
gnn_cache,
tensor_compress: Arc::new(TensorCompress::new()),
allowed_data_dir,
}
}

Expand All @@ -44,6 +52,61 @@ impl McpHandler {
handler
}

/// Validate that a user-supplied path resolves within the allowed data directory.
///
/// Prevents CWE-22 path traversal by:
/// 1. Resolving the path relative to `allowed_data_dir` (not cwd)
/// 2. Canonicalizing to eliminate `..`, symlinks, and other tricks
/// 3. Checking that the canonical path starts with the allowed directory
fn validate_path(&self, user_path: &str) -> Result<PathBuf> {
// Reject obviously malicious absolute paths outside data dir
let path = Path::new(user_path);

// If relative, resolve against allowed_data_dir
let resolved = if path.is_absolute() {
PathBuf::from(user_path)
} else {
self.allowed_data_dir.join(user_path)
};

// For existing paths, canonicalize resolves symlinks and ..
// For non-existing paths, canonicalize the parent and append the filename
let canonical = if resolved.exists() {
std::fs::canonicalize(&resolved)
.with_context(|| format!("Failed to resolve path: {}", user_path))?
} else {
// Canonicalize the parent directory (must exist), then append filename
let parent = resolved.parent().unwrap_or(Path::new("/"));
let parent_canonical = if parent.exists() {
std::fs::canonicalize(parent)
.with_context(|| format!("Parent directory does not exist: {}", parent.display()))?
} else {
// Create the parent directory within allowed_data_dir if it doesn't exist
anyhow::bail!(
"Path '{}' references non-existent directory '{}'",
user_path,
parent.display()
);
};
let filename = resolved
.file_name()
.ok_or_else(|| anyhow::anyhow!("Invalid path: no filename in '{}'", user_path))?;
parent_canonical.join(filename)
};

// Security check: canonical path must be inside allowed_data_dir
if !canonical.starts_with(&self.allowed_data_dir) {
anyhow::bail!(
"Access denied: path '{}' resolves to '{}' which is outside the allowed data directory '{}'",
user_path,
canonical.display(),
self.allowed_data_dir.display()
);
}

Ok(canonical)
}

/// Handle MCP request
pub async fn handle_request(&self, request: McpRequest) -> McpResponse {
match request.method.as_str() {
Expand Down Expand Up @@ -387,8 +450,11 @@ impl McpHandler {
let params: CreateDbParams =
serde_json::from_value(args.clone()).context("Invalid parameters")?;

// Validate path to prevent directory traversal (CWE-22)
let validated_path = self.validate_path(&params.path)?;

let mut db_options = self.config.to_db_options();
db_options.storage_path = params.path.clone();
db_options.storage_path = validated_path.to_string_lossy().to_string();
db_options.dimensions = params.dimensions;

if let Some(metric) = params.distance_metric {
Expand All @@ -402,12 +468,13 @@ impl McpHandler {
}

let db = VectorDB::new(db_options)?;
let path_str = validated_path.to_string_lossy().to_string();
self.databases
.write()
.await
.insert(params.path.clone(), Arc::new(db));
.insert(path_str.clone(), Arc::new(db));

Ok(format!("Database created at: {}", params.path))
Ok(format!("Database created at: {}", path_str))
}

async fn tool_insert(&self, args: &Value) -> Result<String> {
Expand Down Expand Up @@ -461,27 +528,39 @@ impl McpHandler {
async fn tool_backup(&self, args: &Value) -> Result<String> {
let params: BackupParams = serde_json::from_value(args.clone())?;

std::fs::copy(&params.db_path, &params.backup_path).context("Failed to backup database")?;
// Validate both paths to prevent directory traversal (CWE-22)
let validated_db_path = self.validate_path(&params.db_path)?;
let validated_backup_path = self.validate_path(&params.backup_path)?;

std::fs::copy(&validated_db_path, &validated_backup_path)
.context("Failed to backup database")?;

Ok(format!("Backed up to: {}", params.backup_path))
Ok(format!(
"Backed up to: {}",
validated_backup_path.display()
))
}

async fn get_or_open_db(&self, path: &str) -> Result<Arc<VectorDB>> {
// Validate path to prevent directory traversal (CWE-22)
let validated_path = self.validate_path(path)?;
let path_str = validated_path.to_string_lossy().to_string();

let databases = self.databases.read().await;
if let Some(db) = databases.get(path) {
if let Some(db) = databases.get(&path_str) {
return Ok(db.clone());
}
drop(databases);

// Open new database
let mut db_options = self.config.to_db_options();
db_options.storage_path = path.to_string();
db_options.storage_path = path_str.clone();

let db = Arc::new(VectorDB::new(db_options)?);
self.databases
.write()
.await
.insert(path.to_string(), db.clone());
.insert(path_str, db.clone());

Ok(db)
}
Expand Down Expand Up @@ -734,3 +813,127 @@ impl McpHandler {
.to_string())
}
}

#[cfg(test)]
mod tests {
use super::*;
use tempfile::tempdir;

fn handler_with_data_dir(data_dir: &Path) -> McpHandler {
let mut config = Config::default();
config.mcp.data_dir = data_dir.to_string_lossy().to_string();
McpHandler::new(config)
}

#[test]
fn test_validate_path_allows_relative_within_data_dir() {
let dir = tempdir().unwrap();
let handler = handler_with_data_dir(dir.path());

// Create a file to validate against
std::fs::write(dir.path().join("test.db"), b"test").unwrap();

let result = handler.validate_path("test.db");
assert!(result.is_ok(), "Should allow relative path within data dir");
assert!(result.unwrap().starts_with(dir.path()));
}

#[test]
fn test_validate_path_blocks_absolute_outside_data_dir() {
let dir = tempdir().unwrap();
let handler = handler_with_data_dir(dir.path());

let result = handler.validate_path("/etc/passwd");
assert!(result.is_err(), "Should block /etc/passwd");
let err = result.unwrap_err().to_string();
assert!(
err.contains("outside the allowed data directory"),
"Error should mention path confinement: {}",
err
);
}

#[test]
fn test_validate_path_blocks_dot_dot_traversal() {
let dir = tempdir().unwrap();
// Create a subdir so ../.. resolves to something real
let subdir = dir.path().join("sub");
std::fs::create_dir_all(&subdir).unwrap();
let handler = handler_with_data_dir(&subdir);

let result = handler.validate_path("../../../etc/passwd");
assert!(
result.is_err(),
"Should block ../ traversal: {:?}",
result
);
}

#[test]
fn test_validate_path_blocks_dot_dot_in_middle() {
let dir = tempdir().unwrap();
let handler = handler_with_data_dir(dir.path());

// Create the inner directory
std::fs::create_dir_all(dir.path().join("a")).unwrap();

let result = handler.validate_path("a/../../etc/passwd");
assert!(
result.is_err(),
"Should block ../ in the middle of path"
);
}

#[test]
fn test_validate_path_allows_subdirectory_within_data_dir() {
let dir = tempdir().unwrap();
let handler = handler_with_data_dir(dir.path());

// Create subdirectory
std::fs::create_dir_all(dir.path().join("backups")).unwrap();

let result = handler.validate_path("backups/mydb.bak");
assert!(
result.is_ok(),
"Should allow path in subdirectory: {:?}",
result
);
assert!(result.unwrap().starts_with(dir.path()));
}

#[test]
fn test_validate_path_allows_new_file_in_data_dir() {
let dir = tempdir().unwrap();
let handler = handler_with_data_dir(dir.path());

let result = handler.validate_path("new_database.db");
assert!(
result.is_ok(),
"Should allow new file in data dir: {:?}",
result
);
}

#[test]
fn test_validate_path_blocks_absolute_path_to_etc() {
let dir = tempdir().unwrap();
let handler = handler_with_data_dir(dir.path());

// Test all 3 POCs from the issue
for path in &["/etc/passwd", "/etc/shadow", "/etc/hosts"] {
let result = handler.validate_path(path);
assert!(result.is_err(), "Should block {}", path);
}
}

#[test]
fn test_validate_path_blocks_home_ssh_keys() {
let dir = tempdir().unwrap();
let handler = handler_with_data_dir(dir.path());

let result = handler.validate_path("~/.ssh/id_rsa");
// This is a relative path so it won't expand ~, but test the principle
let result2 = handler.validate_path("/root/.ssh/id_rsa");
assert!(result2.is_err(), "Should block /root/.ssh/id_rsa");
}
}
Loading