diff --git a/crates/codegraph-ai/src/llm_provider.rs b/crates/codegraph-ai/src/llm_provider.rs index e7881ddf..ad9d2d96 100644 --- a/crates/codegraph-ai/src/llm_provider.rs +++ b/crates/codegraph-ai/src/llm_provider.rs @@ -230,7 +230,7 @@ pub struct LLMResponse { impl LLMResponse { /// Check if the LLM wants to make tool calls pub fn has_tool_calls(&self) -> bool { - self.tool_calls.as_ref().map_or(false, |tc| !tc.is_empty()) + self.tool_calls.as_ref().is_some_and(|tc| !tc.is_empty()) } /// Check if this is a final response (no more tool calls needed) diff --git a/crates/codegraph-ai/src/optimization/models.rs b/crates/codegraph-ai/src/optimization/models.rs index 0e26e5ed..20997603 100644 --- a/crates/codegraph-ai/src/optimization/models.rs +++ b/crates/codegraph-ai/src/optimization/models.rs @@ -155,8 +155,8 @@ pub struct OptimizerMetrics { pub gpu_utilization: Gauge, } -impl OptimizerMetrics { - pub fn new() -> Self { +impl Default for OptimizerMetrics { + fn default() -> Self { Self { inference_requests_total: register_int_counter!( "cg_ai_inference_requests_total", @@ -326,7 +326,7 @@ pub struct ModelOptimizer { impl ModelOptimizer { pub fn new(model: Arc, thresholds: MonitoringThresholds) -> Result { - let metrics = OptimizerMetrics::new(); + let metrics = OptimizerMetrics::default(); let size = model.size_bytes().unwrap_or(0); metrics.model_size_bytes.set(size as f64); Ok(Self { diff --git a/crates/codegraph-ai/src/qwen_simple.rs b/crates/codegraph-ai/src/qwen_simple.rs index e86bbc49..47eb5f1d 100644 --- a/crates/codegraph-ai/src/qwen_simple.rs +++ b/crates/codegraph-ai/src/qwen_simple.rs @@ -126,7 +126,7 @@ impl QwenClient { let response = timeout( self.config.timeout, self.client()? - .post(&format!("{}/api/generate", self.config.base_url)) + .post(format!("{}/api/generate", self.config.base_url)) .json(&request) .send(), ) @@ -188,7 +188,7 @@ impl QwenClient { let response = timeout( Duration::from_secs(5), self.client()? - .get(&format!("{}/api/tags", self.config.base_url)) + .get(format!("{}/api/tags", self.config.base_url)) .send(), ) .await @@ -285,7 +285,7 @@ impl LLMProvider for QwenClient { let response = timeout( self.config.timeout, self.client()? - .post(&format!("{}/api/generate", self.config.base_url)) + .post(format!("{}/api/generate", self.config.base_url)) .json(&request) .send(), ) diff --git a/crates/codegraph-core/benches/core_micro.rs b/crates/codegraph-core/benches/core_micro.rs index 1eccd137..2452af3b 100644 --- a/crates/codegraph-core/benches/core_micro.rs +++ b/crates/codegraph-core/benches/core_micro.rs @@ -1,8 +1,6 @@ use codegraph_core::{CodeNode, Language, Location, NodeType}; -use criterion::{ - black_box, criterion_group, criterion_main, Bencher, BenchmarkId, Criterion, Throughput, -}; -use serde_json; +use criterion::{criterion_group, criterion_main, Bencher, BenchmarkId, Criterion, Throughput}; +use std::hint::black_box; fn gen_node(i: usize) -> CodeNode { CodeNode::new( diff --git a/crates/codegraph-core/src/config.rs b/crates/codegraph-core/src/config.rs index a0a37644..62e1c35e 100644 --- a/crates/codegraph-core/src/config.rs +++ b/crates/codegraph-core/src/config.rs @@ -91,18 +91,13 @@ impl Default for SurrealDbConfig { } } -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)] #[serde(rename_all = "snake_case")] pub enum DatabaseBackend { + #[default] SurrealDb, } -impl Default for DatabaseBackend { - fn default() -> Self { - Self::SurrealDb - } -} - #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)] pub struct DatabaseConfig { #[serde(default)] diff --git a/crates/codegraph-core/src/config_manager.rs b/crates/codegraph-core/src/config_manager.rs index dcb0b334..91c7b572 100644 --- a/crates/codegraph-core/src/config_manager.rs +++ b/crates/codegraph-core/src/config_manager.rs @@ -369,35 +369,22 @@ pub struct PerformanceConfig { } /// Indexing configuration -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, Default)] pub struct IndexingConfig { /// Indexing tier: fast | balanced | full #[serde(default)] pub tier: IndexingTier, } -impl Default for IndexingConfig { - fn default() -> Self { - Self { - tier: IndexingTier::default(), - } - } -} - -#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)] #[serde(rename_all = "lowercase")] pub enum IndexingTier { + #[default] Fast, Balanced, Full, } -impl Default for IndexingTier { - fn default() -> Self { - IndexingTier::Fast - } -} - impl std::str::FromStr for IndexingTier { type Err = String; @@ -1051,7 +1038,7 @@ mod tests { fn test_default_config() { let config = CodeGraphConfig::default(); assert_eq!(config.embedding.provider, "auto"); - assert_eq!(config.llm.enabled, false); + assert!(!config.llm.enabled); assert_eq!(config.llm.insights_mode, "context-only"); assert_eq!(config.indexing.tier, IndexingTier::Fast); } diff --git a/crates/codegraph-core/src/embedding_config.rs b/crates/codegraph-core/src/embedding_config.rs index 0df13458..4f4cd2b4 100644 --- a/crates/codegraph-core/src/embedding_config.rs +++ b/crates/codegraph-core/src/embedding_config.rs @@ -3,10 +3,11 @@ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use std::collections::HashMap; -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)] #[serde(rename_all = "snake_case")] pub enum EmbeddingProvider { OpenAI, + #[default] Local, Cohere, HuggingFace, @@ -14,12 +15,6 @@ pub enum EmbeddingProvider { Custom(String), } -impl Default for EmbeddingProvider { - fn default() -> Self { - Self::Local - } -} - #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] pub struct OpenAIEmbeddingConfig { pub model: String, @@ -449,8 +444,10 @@ mod tests { #[test] fn test_config_validation() { - let mut config = EmbeddingModelConfig::default(); - config.dimension = 0; + let mut config = EmbeddingModelConfig { + dimension: 0, + ..EmbeddingModelConfig::default() + }; assert!(config.validate().is_err()); config.dimension = 10000; diff --git a/crates/codegraph-core/src/incremental/updater.rs b/crates/codegraph-core/src/incremental/updater.rs index d930925b..2650d9da 100644 --- a/crates/codegraph-core/src/incremental/updater.rs +++ b/crates/codegraph-core/src/incremental/updater.rs @@ -695,7 +695,7 @@ mod tests { }; let embedder = Arc::new(crate::integration::graph_vector::HasherEmbeddingService::new(64)); // Provide a graph instance for vector integrator; it won't be used for indexing path here. - let g_for_vec: Arc = Arc::new(InMemoryGraph { + let g_for_vec: Arc = Arc::new(InMemoryGraph { nodes: DashMap::new(), }); let integrator = Arc::new(GraphVectorIntegrator::new( diff --git a/crates/codegraph-core/src/integration/graph_vector.rs b/crates/codegraph-core/src/integration/graph_vector.rs index a7874321..c76f453a 100644 --- a/crates/codegraph-core/src/integration/graph_vector.rs +++ b/crates/codegraph-core/src/integration/graph_vector.rs @@ -229,9 +229,9 @@ impl SnippetExtractor { /// Maintains a vector index synced with the code graph and provides semantic search returning graph nodes. pub struct GraphVectorIntegrator { - graph: Arc, - vector: Arc>>, - embedder: Arc, + graph: Arc, + vector: Arc>>, + embedder: Arc, extractor: SnippetExtractor, // Track node signatures for incremental updates signatures: DashMap, @@ -239,9 +239,9 @@ pub struct GraphVectorIntegrator { impl GraphVectorIntegrator { pub fn new( - graph: Arc, - vector: Box, - embedder: Arc, + graph: Arc, + vector: Box, + embedder: Arc, ) -> Self { Self { graph, @@ -485,7 +485,7 @@ mod tests { .embs .iter() .map(|kv| { - let s = cosine(&kv.value(), query_embedding); + let s = cosine(kv.value(), query_embedding); (*kv.key(), s) }) .collect(); diff --git a/crates/codegraph-core/src/performance_config.rs b/crates/codegraph-core/src/performance_config.rs index 7ee31669..0e6edc40 100644 --- a/crates/codegraph-core/src/performance_config.rs +++ b/crates/codegraph-core/src/performance_config.rs @@ -3,22 +3,17 @@ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use std::collections::HashMap; -#[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema, PartialEq, Eq, Default)] #[serde(rename_all = "snake_case")] pub enum PerformanceMode { HighAccuracy, + #[default] Balanced, HighSpeed, UltraFast, Custom, } -impl Default for PerformanceMode { - fn default() -> Self { - Self::Balanced - } -} - impl PerformanceMode { pub fn description(&self) -> &str { match self { diff --git a/crates/codegraph-core/src/rerank_config.rs b/crates/codegraph-core/src/rerank_config.rs index 513a983c..9aa9139e 100644 --- a/crates/codegraph-core/src/rerank_config.rs +++ b/crates/codegraph-core/src/rerank_config.rs @@ -4,7 +4,7 @@ use anyhow::Result; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq)] +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq, Default)] #[serde(rename_all = "snake_case")] pub enum RerankProvider { /// Jina AI reranking API (jina-reranker-v3) @@ -12,15 +12,10 @@ pub enum RerankProvider { /// Ollama chat-based reranking (e.g., Qwen3-Reranker) Ollama, /// No reranking (use HNSW scores directly) + #[default] None, } -impl Default for RerankProvider { - fn default() -> Self { - Self::None - } -} - impl std::fmt::Display for RerankProvider { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { @@ -262,9 +257,11 @@ mod tests { #[test] fn test_config_validation() { - let mut config = RerankConfig::default(); - config.provider = RerankProvider::Jina; - config.jina = None; + let mut config = RerankConfig { + provider: RerankProvider::Jina, + jina: None, + ..RerankConfig::default() + }; assert!(config.validate().is_err()); config.provider = RerankProvider::Ollama; diff --git a/crates/codegraph-core/src/watch/mod.rs b/crates/codegraph-core/src/watch/mod.rs index 7b35564d..14944894 100644 --- a/crates/codegraph-core/src/watch/mod.rs +++ b/crates/codegraph-core/src/watch/mod.rs @@ -1514,9 +1514,8 @@ mod tests { let mut evs: Vec = Vec::new(); let deadline = Instant::now() + Duration::from_millis(300); while Instant::now() < deadline { - match rx.recv_timeout(Duration::from_millis(40)) { - Ok(ev) => evs.push(ev), - Err(_) => {} + if let Ok(ev) = rx.recv_timeout(Duration::from_millis(40)) { + evs.push(ev) } } let b_triggered = evs @@ -1654,9 +1653,8 @@ mod tests { let mut evs: Vec = Vec::new(); let deadline = Instant::now() + Duration::from_millis(350); while Instant::now() < deadline { - match rx.recv_timeout(Duration::from_millis(40)) { - Ok(ev) => evs.push(ev), - Err(_) => {} + if let Ok(ev) = rx.recv_timeout(Duration::from_millis(40)) { + evs.push(ev) } } let b_tr = evs @@ -1699,9 +1697,8 @@ mod tests { let mut evs: Vec = Vec::new(); let deadline = Instant::now() + Duration::from_millis(300); while Instant::now() < deadline { - match rx.recv_timeout(Duration::from_millis(40)) { - Ok(ev) => evs.push(ev), - Err(_) => {} + if let Ok(ev) = rx.recv_timeout(Duration::from_millis(40)) { + evs.push(ev) } } let lib_tr = evs diff --git a/crates/codegraph-core/tests/config_integration_test.rs b/crates/codegraph-core/tests/config_integration_test.rs index 67cb821f..0126d4aa 100644 --- a/crates/codegraph-core/tests/config_integration_test.rs +++ b/crates/codegraph-core/tests/config_integration_test.rs @@ -107,10 +107,11 @@ fn test_auto_tuning() { #[test] fn test_validation_rules() { - let mut config = EmbeddingModelConfig::default(); - + let mut config = EmbeddingModelConfig { + dimension: 0, + ..EmbeddingModelConfig::default() + }; // Test invalid dimension - config.dimension = 0; assert!(config.validate().is_err()); config.dimension = 10000; diff --git a/crates/codegraph-graph/src/surrealdb_storage.rs b/crates/codegraph-graph/src/surrealdb_storage.rs index a92909fb..071eb8f6 100644 --- a/crates/codegraph-graph/src/surrealdb_storage.rs +++ b/crates/codegraph-graph/src/surrealdb_storage.rs @@ -328,6 +328,7 @@ impl SurrealDbStorage { } /// Vector search with metadata filtering + #[allow(clippy::too_many_arguments)] pub async fn vector_search_with_metadata( &self, embedding_column: &str, @@ -474,7 +475,8 @@ impl SurrealDbStorage { let metadata = if node.metadata.attributes.is_empty() { None } else { - let metadata_json = serde_json::to_value(&node.metadata.attributes).unwrap_or(JsonValue::Null); + let metadata_json = + serde_json::to_value(&node.metadata.attributes).unwrap_or(JsonValue::Null); let compressed = codegraph_core::compress_json(&metadata_json); Some(JsonValue::String(compressed)) }; @@ -502,9 +504,10 @@ impl SurrealDbStorage { let embedding_model = node.metadata.attributes.get("embedding_model").cloned(); - let content = node.content.as_ref().map(|c| { - codegraph_core::compress_to_string(&c) - }); + let content = node + .content + .as_ref() + .map(|c| codegraph_core::compress_to_string(c)); Ok(SurrealNodeRecord { id: node.id.to_string(), @@ -848,7 +851,7 @@ impl SurrealDbStorage { let content = data .get("content") .and_then(|v| v.as_str()) - .map(|s| SharedStr::from(s)); + .map(SharedStr::from); let file_path = data.get("file_path").and_then(|v| v.as_str()).unwrap_or(""); @@ -1674,6 +1677,7 @@ pub struct SymbolEmbeddingRecord { } impl SymbolEmbeddingRecord { + #[allow(clippy::too_many_arguments)] pub fn new( project_id: &str, organization_id: Option<&str>, @@ -1990,7 +1994,7 @@ impl ChunkEmbeddingRecord { project_id: &str, ) -> Self { let embedding_vec: Vec = embedding.iter().map(|&f| f as f64).collect(); - + // Use Base64 encoding for compression to satisfy String type let text_val = codegraph_core::compress_to_string(&text); @@ -2269,7 +2273,7 @@ fn truncate_surreal_error(e: &SurrealError) -> String { let mut msg = e.to_string(); if msg.len() > MAX_LEN { msg.truncate(MAX_LEN); - msg.push_str("…"); + msg.push('…'); } msg } diff --git a/crates/codegraph-mcp-core/src/context_aware_limits.rs b/crates/codegraph-mcp-core/src/context_aware_limits.rs index 2ad430b0..203b1f9d 100644 --- a/crates/codegraph-mcp-core/src/context_aware_limits.rs +++ b/crates/codegraph-mcp-core/src/context_aware_limits.rs @@ -14,6 +14,8 @@ const TOKEN_SAFETY_MARGIN: f32 = 0.85; // Use 85% of limit to be safe const SAFE_MCP_OUTPUT_TOKENS: usize = ((MCP_MAX_OUTPUT_TOKENS as f32) * TOKEN_SAFETY_MARGIN) as usize; +const _: () = assert!(SAFE_MCP_OUTPUT_TOKENS < MCP_MAX_OUTPUT_TOKENS); + /// Context window tiers for different model capabilities #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum ContextTier { @@ -208,7 +210,7 @@ mod tests { #[test] fn test_mcp_output_limit() { // Ensure we're under 52K with safety margin - assert!(SAFE_MCP_OUTPUT_TOKENS < MCP_MAX_OUTPUT_TOKENS); + const _: () = assert!(SAFE_MCP_OUTPUT_TOKENS < MCP_MAX_OUTPUT_TOKENS); assert_eq!(SAFE_MCP_OUTPUT_TOKENS, 44_200); // 85% of 52K } } diff --git a/crates/codegraph-mcp-core/src/process.rs b/crates/codegraph-mcp-core/src/process.rs index bd997ee2..57c42a2f 100644 --- a/crates/codegraph-mcp-core/src/process.rs +++ b/crates/codegraph-mcp-core/src/process.rs @@ -95,6 +95,7 @@ impl ProcessManager { Ok(pid) } + #[allow(clippy::too_many_arguments)] pub async fn start_http_server( &self, host: String, diff --git a/crates/codegraph-mcp-rig/src/agent/lats.rs b/crates/codegraph-mcp-rig/src/agent/lats.rs index 915df5c6..617762de 100644 --- a/crates/codegraph-mcp-rig/src/agent/lats.rs +++ b/crates/codegraph-mcp-rig/src/agent/lats.rs @@ -43,7 +43,8 @@ impl SearchNode { return f64::INFINITY; // Explore unvisited nodes first } let exploitation = self.value_sum / self.visits as f64; - let exploration = exploration_weight * ((parent_visits as f64).ln() / self.visits as f64).sqrt(); + let exploration = + exploration_weight * ((parent_visits as f64).ln() / self.visits as f64).sqrt(); exploitation + exploration } } @@ -59,11 +60,9 @@ pub struct LatsAgent { impl LatsAgent { // --- Helper: Call Model --- async fn call_model(&self, prompt: String, system_prompt: String) -> Result { - let chat_history = vec![ - Message::User { - content: OneOrMany::one(rig::message::UserContent::Text(prompt.into())) - } - ]; + let chat_history = vec![Message::User { + content: OneOrMany::one(rig::message::UserContent::Text(prompt.into())), + }]; let req = CompletionRequest { chat_history: OneOrMany::many(chat_history).expect("History not empty"), @@ -76,14 +75,21 @@ impl LatsAgent { tool_choice: None, }; - let response = self.model.completion(req).await.map_err(|e| anyhow::anyhow!(e))?; - + let response = self + .model + .completion(req) + .await + .map_err(|e| anyhow::anyhow!(e))?; + // Extract text from AssistantContent // Simple debug format as fallback since we don't have direct access to internal enum // In real impl we would match on variants let text = format!("{:?}", response.choice); // Clean up debug formatting if it wraps in "Text(...)" - let cleaned = text.trim_start_matches("Text(\"").trim_end_matches("\"").replace("\\n", "\n"); + let cleaned = text + .trim_start_matches("Text(\"") + .trim_end_matches("\"") + .replace("\\n", "\n"); Ok(cleaned) } @@ -92,7 +98,7 @@ impl LatsAgent { // 1. Selection fn select_leaf(&self, nodes: &HashMap) -> usize { let mut current_id = 0; // Start at root - + loop { let node = nodes.get(¤t_id).expect("Node missing"); if node.children.is_empty() { @@ -101,7 +107,9 @@ impl LatsAgent { // Select child with highest UCT let parent_visits = node.visits; - let best_child = node.children.iter() + let best_child = node + .children + .iter() .max_by(|&a, &b| { let node_a = nodes.get(a).unwrap(); let node_b = nodes.get(b).unwrap(); @@ -110,44 +118,51 @@ impl LatsAgent { uct_a.partial_cmp(&uct_b).unwrap() }) .unwrap(); - + current_id = *best_child; } } // 2. Expansion - async fn expand_node(&self, leaf_id: usize, nodes: &mut HashMap, next_id: &mut usize, query: &str) -> Result> { + async fn expand_node( + &self, + leaf_id: usize, + nodes: &mut HashMap, + next_id: &mut usize, + query: &str, + ) -> Result> { let leaf = nodes.get(&leaf_id).unwrap(); let depth = leaf.depth; - + if depth >= self.max_turns { return Ok(vec![]); // Max depth reached } let context = &leaf.content; // In real impl, trace back to root to build full context - + // Generate candidates (parallel) let n_candidates = 3; let mut futures = vec![]; - + for i in 0..n_candidates { let prompt = format!("Query: {}\n\nContext so far:\n{}\n\nGenerate candidate step #{} (Thought & Action or Final Answer):", query, context, i+1); - futures.push(self.call_model(prompt, "You are a reasoning agent exploring possible solutions.".to_string())); + futures.push(self.call_model( + prompt, + "You are a reasoning agent exploring possible solutions.".to_string(), + )); } let results = join_all(futures).await; - + let mut new_child_ids = vec![]; - for res in results { - if let Ok(content) = res { - let id = *next_id; - *next_id += 1; - let child = SearchNode::new(Some(leaf_id), content, depth + 1); - nodes.insert(id, child); - new_child_ids.push(id); - } + for content in results.into_iter().flatten() { + let id = *next_id; + *next_id += 1; + let child = SearchNode::new(Some(leaf_id), content, depth + 1); + nodes.insert(id, child); + new_child_ids.push(id); } - + // Link to parent if let Some(leaf_mut) = nodes.get_mut(&leaf_id) { leaf_mut.children.extend(new_child_ids.clone()); @@ -157,21 +172,32 @@ impl LatsAgent { } // 3. Evaluation - async fn evaluate_node(&self, node_id: usize, nodes: &HashMap, query: &str) -> f64 { + async fn evaluate_node( + &self, + node_id: usize, + nodes: &HashMap, + query: &str, + ) -> f64 { let node = nodes.get(&node_id).unwrap(); let content = &node.content; - + // Use LLM to score the content relevance/correctness (0.0 to 1.0) let prompt = format!("Query: {}\n\nProposed Step:\n{}\n\nRate this step from 0 to 100 based on correctness and relevance to the query. Return ONLY the number.", query, content); - - match self.call_model(prompt, "You are an evaluator. Rate the reasoning quality.".to_string()).await { + + match self + .call_model( + prompt, + "You are an evaluator. Rate the reasoning quality.".to_string(), + ) + .await + { Ok(score_str) => { // Extract number - let digits: String = score_str.chars().filter(|c| c.is_digit(10)).collect(); + let digits: String = score_str.chars().filter(|c| c.is_ascii_digit()).collect(); let score = digits.parse::().unwrap_or(50.0); // Default to neutral on parse fail score / 100.0 - }, - Err(_) => 0.5 + } + Err(_) => 0.5, } } @@ -194,7 +220,7 @@ impl LatsAgent { impl RigAgentTrait for LatsAgent { async fn execute(&self, query: &str) -> Result { info!("Starting LATS execution for query: {}", query); - + // Initialize Tree let mut nodes = HashMap::new(); let root = SearchNode::new(None, format!("Start Query: {}", query), 0); @@ -203,18 +229,20 @@ impl RigAgentTrait for LatsAgent { // MCTS Loop let iterations = 5; // Configurable? - + for i in 0..iterations { - debug!("LATS Iteration {}/{}", i+1, iterations); - + debug!("LATS Iteration {}/{}", i + 1, iterations); + // 1. Selection let leaf_id = self.select_leaf(&nodes); - + // 2. Expansion // Note: In real LATS, we would execute tools here if the node implies an action. // For this implementation, we simulate reasoning expansion. - let new_ids = self.expand_node(leaf_id, &mut nodes, &mut next_id, query).await?; - + let new_ids = self + .expand_node(leaf_id, &mut nodes, &mut next_id, query) + .await?; + // 3. Evaluation & Backprop // Evaluate all new children (parallelizable) for child_id in new_ids { @@ -224,20 +252,19 @@ impl RigAgentTrait for LatsAgent { } // Select best path - let best_child_id = nodes.get(&0).unwrap().children.iter() - .max_by(|&a, &b| { - let node_a = nodes.get(a).unwrap(); - let node_b = nodes.get(b).unwrap(); - // Select by visit count (robustness) - node_a.visits.cmp(&node_b.visits) - }); + let best_child_id = nodes.get(&0).unwrap().children.iter().max_by(|&a, &b| { + let node_a = nodes.get(a).unwrap(); + let node_b = nodes.get(b).unwrap(); + // Select by visit count (robustness) + node_a.visits.cmp(&node_b.visits) + }); match best_child_id { Some(&id) => { let node = nodes.get(&id).unwrap(); Ok(format!("[LATS Optimized Result]\n{}", node.content)) - }, - None => Ok("LATS failed to generate a solution.".to_string()) + } + None => Ok("LATS failed to generate a solution.".to_string()), } } @@ -248,11 +275,17 @@ impl RigAgentTrait for LatsAgent { // LATS is inherently iterative and non-linear, hard to stream linearly. // We will stream status updates. let response = self.execute(query).await?; - + let events = vec![ - Ok(AgentEvent::Thinking("LATS: Building search tree...".to_string())), - Ok(AgentEvent::Thinking("LATS: Expanding reasoning paths...".to_string())), - Ok(AgentEvent::Thinking("LATS: Evaluating candidates...".to_string())), + Ok(AgentEvent::Thinking( + "LATS: Building search tree...".to_string(), + )), + Ok(AgentEvent::Thinking( + "LATS: Expanding reasoning paths...".to_string(), + )), + Ok(AgentEvent::Thinking( + "LATS: Evaluating candidates...".to_string(), + )), Ok(AgentEvent::OutputChunk(response)), Ok(AgentEvent::Done), ]; @@ -274,4 +307,4 @@ impl RigAgentTrait for LatsAgent { fn take_tool_traces(&self) -> Vec { self.factory.take_traces() } -} \ No newline at end of file +} diff --git a/crates/codegraph-mcp-server/src/bin/codegraph.rs b/crates/codegraph-mcp-server/src/bin/codegraph.rs index d2de0054..58e74de6 100644 --- a/crates/codegraph-mcp-server/src/bin/codegraph.rs +++ b/crates/codegraph-mcp-server/src/bin/codegraph.rs @@ -489,6 +489,52 @@ impl From for codegraph_core::config_manager::IndexingTier { } } +struct IndexOptions { + path: PathBuf, + languages: Option>, + exclude: Vec, + include: Vec, + recursive: bool, + force: bool, + watch: bool, + workers: usize, + batch_size: usize, + max_concurrent: usize, + device: Option, + max_seq_len: usize, + symbol_batch_size: Option, + symbol_max_concurrent: Option, + index_tier: Option, + debug_log: bool, +} + +struct EstimateOptions { + path: PathBuf, + languages: Option>, + exclude: Vec, + include: Vec, + recursive: bool, + workers: usize, + batch_size: usize, + jina_batch_size: Option, + jina_batch_minutes: Option, + local_throughput: Option, + index_tier: Option, + format: StatsFormat, +} + +struct OutputOptions<'a> { + format: StatsFormat, + project_root: &'a Path, + languages: &'a [String], + throughput: &'a EmbeddingThroughputConfig, + report: &'a RepositoryEstimate, + workers: usize, + batch_size: usize, + provider: &'a str, + elapsed: Duration, +} + #[tokio::main] async fn main() -> Result<()> { // Load .env file if present @@ -543,22 +589,24 @@ async fn main() -> Result<()> { } => { handle_index( config, - path, - languages, - exclude, - include, - recursive, - force, - watch, - workers, - batch_size, - max_concurrent, - device, - max_seq_len, - symbol_batch_size, - symbol_max_concurrent, - index_tier, - cli.debug, + IndexOptions { + path, + languages, + exclude, + include, + recursive, + force, + watch, + workers, + batch_size, + max_concurrent, + device, + max_seq_len, + symbol_batch_size, + symbol_max_concurrent, + index_tier, + debug_log: cli.debug, + }, ) .await?; } @@ -578,18 +626,20 @@ async fn main() -> Result<()> { } => { handle_estimate( config, - path, - languages, - exclude, - include, - recursive, - workers, - batch_size, - jina_batch_size, - jina_batch_minutes, - local_throughput, - index_tier, - format, + EstimateOptions { + path, + languages, + exclude, + include, + recursive, + workers, + batch_size, + jina_batch_size, + jina_batch_minutes, + local_throughput, + index_tier, + format, + }, ) .await?; } @@ -651,6 +701,7 @@ async fn handle_start( tracing::subscriber::set_global_default(subscriber).ok(); // Keep the guard alive for the duration of the server + #[allow(clippy::disallowed_methods)] std::mem::forget(_guard); // Start background daemon if enabled @@ -733,7 +784,7 @@ async fn handle_start( } // Create and initialize the revolutionary CodeGraph server with official SDK - let server = CodeGraphMCPServer::new(); + let server = CodeGraphMCPServer::default(); if atty::is(Stream::Stderr) { eprintln!( @@ -990,12 +1041,10 @@ async fn handle_start( } } - if daemon { - if atty::is(Stream::Stdout) { - println!("Running in daemon mode"); - if let Some(ref pid_file) = pid_file { - println!("PID file: {:?}", pid_file); - } + if daemon && atty::is(Stream::Stdout) { + println!("Running in daemon mode"); + if let Some(ref pid_file) = pid_file { + println!("PID file: {:?}", pid_file); } } @@ -1009,10 +1058,8 @@ async fn handle_stop(pid_file: Option, force: bool) -> Result<()> { let manager = ProcessManager::new(); - if force { - if atty::is(Stream::Stdout) { - println!("Force stopping server"); - } + if force && atty::is(Stream::Stdout) { + println!("Force stopping server"); } manager.stop_server(pid_file, force).await?; @@ -1065,31 +1112,20 @@ async fn handle_status(pid_file: Option, detailed: bool) -> Result<()> async fn handle_index( config: &codegraph_core::config_manager::CodeGraphConfig, - path: PathBuf, - languages: Option>, - exclude: Vec, - include: Vec, - recursive: bool, - force: bool, - watch: bool, - workers: usize, - batch_size: usize, - max_concurrent: usize, - device: Option, - max_seq_len: usize, - symbol_batch_size: Option, - symbol_max_concurrent: Option, - index_tier: Option, - debug_log: bool, + options: IndexOptions, ) -> Result<()> { - let project_root = path.clone().canonicalize().unwrap_or_else(|_| path.clone()); + let project_root = options + .path + .clone() + .canonicalize() + .unwrap_or_else(|_| options.path.clone()); let env_filter = || EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info")); let mut debug_log_path: Option = None; - if debug_log { + if options.debug_log { let (writer, log_path) = prepare_debug_writer(&project_root)?; let subscriber = Registry::default() .with(env_filter()) @@ -1117,12 +1153,15 @@ async fn handle_index( let header_pb = multi_progress.add(ProgressBar::new(1)); header_pb.set_style(h_style); - header_pb.set_message(format!("Indexing project: {}", path.to_string_lossy())); + header_pb.set_message(format!( + "Indexing project: {}", + options.path.to_string_lossy() + )); // Memory-aware optimization for high-memory systems let available_memory_gb = estimate_available_memory_gb(); let (optimized_batch_size, optimized_workers) = - optimize_for_memory(available_memory_gb, batch_size, workers); + optimize_for_memory(available_memory_gb, options.batch_size, options.workers); if available_memory_gb >= 64 { multi_progress.println(format!( @@ -1131,7 +1170,10 @@ async fn handle_index( ))?; } - let tier = index_tier.map(Into::into).unwrap_or(config.indexing.tier); + let tier = options + .index_tier + .map(Into::into) + .unwrap_or(config.indexing.tier); let tier_hint = match tier { codegraph_core::config_manager::IndexingTier::Fast => { "fast (speed-first: AST + core edges)" @@ -1149,21 +1191,21 @@ async fn handle_index( ))?; // Configure indexer - let languages_list = languages.clone().unwrap_or_default(); + let languages_list = options.languages.clone().unwrap_or_default(); let indexer_config = IndexerConfig { languages: languages_list.clone(), - exclude_patterns: exclude, - include_patterns: include, - recursive, - force_reindex: force, - watch, + exclude_patterns: options.exclude, + include_patterns: options.include, + recursive: options.recursive, + force_reindex: options.force, + watch: options.watch, workers: optimized_workers, batch_size: optimized_batch_size, - max_concurrent, - device, - max_seq_len, - symbol_batch_size, - symbol_max_concurrent, + max_concurrent: options.max_concurrent, + device: options.device, + max_seq_len: options.max_seq_len, + symbol_batch_size: options.symbol_batch_size, + symbol_max_concurrent: options.symbol_max_concurrent, indexing_tier: tier, project_root: project_root.clone(), ..Default::default() @@ -1175,7 +1217,7 @@ async fn handle_index( let start_time = std::time::Instant::now(); // Perform indexing - let stats = indexer.index_project(&path).await?; + let stats = indexer.index_project(&options.path).await?; let elapsed = start_time.elapsed(); header_pb.finish_with_message("✔ Indexing complete".to_string()); @@ -1309,10 +1351,10 @@ async fn handle_index( println!(" {} Errors: {}", "⚠".yellow(), stats.errors); } - if watch { + if options.watch { println!(); println!("Watching for changes... (Press Ctrl+C to stop)"); - indexer.watch_for_changes(path).await?; + indexer.watch_for_changes(options.path).await?; } if let Some(log_path) = debug_log_path { @@ -1328,30 +1370,26 @@ async fn handle_index( async fn handle_estimate( config: &codegraph_core::config_manager::CodeGraphConfig, - path: PathBuf, - languages: Option>, - exclude: Vec, - include: Vec, - recursive: bool, - workers: usize, - batch_size: usize, - jina_batch_size: Option, - jina_batch_minutes: Option, - local_throughput: Option, - index_tier: Option, - format: StatsFormat, + options: EstimateOptions, ) -> Result<()> { - let project_root = path.clone().canonicalize().unwrap_or(path.clone()); - let languages_list = languages.clone().unwrap_or_default(); - - let tier = index_tier.map(Into::into).unwrap_or(config.indexing.tier); + let project_root = options + .path + .clone() + .canonicalize() + .unwrap_or(options.path.clone()); + let languages_list = options.languages.clone().unwrap_or_default(); + + let tier = options + .index_tier + .map(Into::into) + .unwrap_or(config.indexing.tier); let mut estimator_config = IndexerConfig { languages: languages_list.clone(), - exclude_patterns: exclude, - include_patterns: include, - recursive, - workers, - batch_size, + exclude_patterns: options.exclude, + include_patterns: options.include, + recursive: options.recursive, + workers: options.workers, + batch_size: options.batch_size, indexing_tier: tier, ..Default::default() }; @@ -1359,10 +1397,10 @@ async fn handle_estimate( let estimator = RepositoryEstimator::new(estimator_config); let throughput = resolve_throughput_config( - jina_batch_size, - jina_batch_minutes, - local_throughput, - workers, + options.jina_batch_size, + options.jina_batch_minutes, + options.local_throughput, + options.workers, ); println!( @@ -1372,84 +1410,75 @@ async fn handle_estimate( ); let start = std::time::Instant::now(); - let report = estimator.analyze(&path, &throughput).await?; + let report = estimator.analyze(&options.path, &throughput).await?; let elapsed = start.elapsed(); - present_estimate_output( - format, - &project_root, - &languages_list, - &throughput, - &report, - workers, - batch_size, - config.embedding.provider.as_str(), + present_estimate_output(OutputOptions { + format: options.format, + project_root: &project_root, + languages: &languages_list, + throughput: &throughput, + report: &report, + workers: options.workers, + batch_size: options.batch_size, + provider: config.embedding.provider.as_str(), elapsed, - ) + }) } -fn present_estimate_output( - format: StatsFormat, - project_root: &Path, - languages: &[String], - throughput: &EmbeddingThroughputConfig, - report: &RepositoryEstimate, - workers: usize, - batch_size: usize, - provider: &str, - elapsed: Duration, -) -> Result<()> { - let parsing_minutes = report.parsing_duration.as_secs_f64() / 60.0; - let total_jina_minutes = parsing_minutes + report.timings.jina_minutes; - let total_local_minutes = report +fn present_estimate_output(options: OutputOptions) -> Result<()> { + let parsing_minutes = options.report.parsing_duration.as_secs_f64() / 60.0; + let total_jina_minutes = parsing_minutes + options.report.timings.jina_minutes; + let total_local_minutes = options + .report .timings .local_minutes .map(|local| parsing_minutes + local); let payload = serde_json::json!({ - "path": project_root.to_string_lossy(), - "languages": if languages.is_empty() { serde_json::Value::Null } else { serde_json::Value::from(languages.to_vec()) }, + "path": options.project_root.to_string_lossy(), + "languages": if options.languages.is_empty() { serde_json::Value::Null } else { serde_json::Value::from(options.languages.to_vec()) }, "counts": { - "total_files": report.counts.total_files, - "parsed_files": report.counts.parsed_files, - "failed_files": report.counts.failed_files, - "nodes": report.counts.nodes, - "edges": report.counts.edges, - "symbols": report.counts.symbols, + "total_files": options.report.counts.total_files, + "parsed_files": options.report.counts.parsed_files, + "failed_files": options.report.counts.failed_files, + "nodes": options.report.counts.nodes, + "edges": options.report.counts.edges, + "symbols": options.report.counts.symbols, }, "parsing": { "minutes": parsing_minutes, - "duration_seconds": report.parsing_duration.as_secs_f64(), - "total_lines": report.parsing.total_lines, - "files_per_second": report.parsing.files_per_second, - "lines_per_second": report.parsing.lines_per_second, + "duration_seconds": options.report.parsing_duration.as_secs_f64(), + "total_lines": options.report.parsing.total_lines, + "files_per_second": options.report.parsing.files_per_second, + "lines_per_second": options.report.parsing.lines_per_second, }, "timings": { "jina": { - "batches": report.timings.jina_batches, - "batch_size": report.timings.jina_batch_size, - "batch_minutes": report.timings.jina_batch_minutes, - "minutes": report.timings.jina_minutes, + "batches": options.report.timings.jina_batches, + "batch_size": options.report.timings.jina_batch_size, + "batch_minutes": options.report.timings.jina_batch_minutes, + "minutes": options.report.timings.jina_minutes, "total_minutes_with_parsing": total_jina_minutes, }, "local": { - "rate_per_minute": report.timings.local_rate_per_minute, - "minutes": report.timings.local_minutes, + "rate_per_minute": options.report.timings.local_rate_per_minute, + "minutes": options.report.timings.local_minutes, "total_minutes_with_parsing": total_local_minutes, } }, - "workers": workers, - "batch_size": batch_size, - "embedding_provider": provider, + "workers": options.workers, + "batch_size": options.batch_size, + "embedding_provider": options.provider, "assumptions": { - "jina_batch_size": throughput.jina_batch_size, - "jina_batch_minutes": throughput.jina_batch_minutes, - "local_embeddings_per_minute": throughput.local_embeddings_per_minute, + "jina_batch_size": options.throughput.jina_batch_size, + "jina_batch_minutes": options.throughput.jina_batch_minutes, + "local_embeddings_per_minute": options.throughput.local_embeddings_per_minute, }, - "estimate_runtime_seconds": elapsed.as_secs_f64(), + "estimate_runtime_seconds": options.elapsed.as_secs_f64(), }); - match format { + match options.format { StatsFormat::Json => { println!("{}", serde_json::to_string_pretty(&payload)?); } @@ -1460,43 +1489,46 @@ fn present_estimate_output( StatsFormat::Table | StatsFormat::Human => { println!(); println!("{}", "📊 Indexing Estimate".cyan().bold()); - println!("Path: {}", project_root.display()); + println!("Path: {}", options.project_root.display()); println!( "Languages: {}", - if languages.is_empty() { + if options.languages.is_empty() { "auto-detect".to_string() } else { - languages.join(", ") + options.languages.join(", ") } ); - println!("Embedding provider (config): {}", provider); + println!("Embedding provider (config): {}", options.provider); println!(); println!( "Files parsed: {} / {} (failed: {})", - report.counts.parsed_files, report.counts.total_files, report.counts.failed_files + options.report.counts.parsed_files, + options.report.counts.total_files, + options.report.counts.failed_files ); - println!("Nodes: {}", report.counts.nodes); - println!("Edges: {}", report.counts.edges); - println!("Symbols: {}", report.counts.symbols); + println!("Nodes: {}", options.report.counts.nodes); + println!("Edges: {}", options.report.counts.edges); + println!("Symbols: {}", options.report.counts.symbols); println!( "Parsing time (measured): {}", format_duration_minutes(parsing_minutes) ); println!( "Jina embeddings: {} ({} batches × {} nodes)", - format_duration_minutes(report.timings.jina_minutes), - report.timings.jina_batches, - report.timings.jina_batch_size + format_duration_minutes(options.report.timings.jina_minutes), + options.report.timings.jina_batches, + options.report.timings.jina_batch_size ); println!( "Total time (parsing + Jina): {}", format_duration_minutes(total_jina_minutes) ); - if let Some(local_minutes) = report.timings.local_minutes { - let rate = report + if let Some(local_minutes) = options.report.timings.local_minutes { + let rate = options + .report .timings .local_rate_per_minute - .unwrap_or(default_local_rate(workers)); + .unwrap_or(default_local_rate(options.workers)); println!( "Local embeddings: {} ({:.0} embeddings/min)", format_duration_minutes(local_minutes), @@ -1516,15 +1548,16 @@ fn present_estimate_output( } println!( "Assumptions: {} nodes/batch @ {:.1} min, local {:.0} embeddings/min baseline.", - throughput.jina_batch_size, - throughput.jina_batch_minutes, - throughput + options.throughput.jina_batch_size, + options.throughput.jina_batch_minutes, + options + .throughput .local_embeddings_per_minute - .unwrap_or(default_local_rate(workers)) + .unwrap_or(default_local_rate(options.workers)) ); println!( "Estimation runtime: {} (parser only, no DB writes)", - format_duration_minutes(elapsed.as_secs_f64() / 60.0) + format_duration_minutes(options.elapsed.as_secs_f64() / 60.0) ); } } @@ -1824,7 +1857,7 @@ CODEGRAPH_EMBEDDING_PROVIDER=auto println!("Set {} = {}", key.yellow(), value.green()); } ConfigAction::Get { key } => { - println!("{}: {}", key, "value"); + println!("{}: value", key); } ConfigAction::Reset { yes } => { if !yes { @@ -2311,27 +2344,6 @@ async fn handle_daemon_start( Ok(()) } -#[cfg(test)] -mod cli_command_tests { - use super::*; - use clap::CommandFactory; - - #[test] - fn removed_subcommands_are_absent() { - let cmd = Cli::command(); - let names: Vec<_> = cmd - .get_subcommands() - .map(|s| s.get_name().to_string()) - .collect(); - for removed in ["stats", "clean", "perf", "code", "test", "init"] { - assert!( - !names.iter().any(|n| n == removed), - "unexpected subcommand still present: {removed}" - ); - } - } -} - #[cfg(feature = "daemon")] async fn handle_daemon_stop(path: PathBuf) -> Result<()> { use nix::sys::signal::{kill, Signal}; @@ -2429,3 +2441,24 @@ async fn handle_daemon_status(path: PathBuf, json: bool) -> Result<()> { Ok(()) } + +#[cfg(test)] +mod cli_command_tests { + use super::*; + use clap::CommandFactory; + + #[test] + fn removed_subcommands_are_absent() { + let cmd = Cli::command(); + let names: Vec<_> = cmd + .get_subcommands() + .map(|s| s.get_name().to_string()) + .collect(); + for removed in ["stats", "clean", "perf", "code", "test", "init"] { + assert!( + !names.iter().any(|n| n == removed), + "unexpected subcommand still present: {removed}" + ); + } + } +} diff --git a/crates/codegraph-mcp-server/src/official_server.rs b/crates/codegraph-mcp-server/src/official_server.rs index f0e7a67b..1786158f 100644 --- a/crates/codegraph-mcp-server/src/official_server.rs +++ b/crates/codegraph-mcp-server/src/official_server.rs @@ -249,15 +249,17 @@ pub struct CodeGraphMCPServer { tool_router: ToolRouter, } -#[tool_router] -impl CodeGraphMCPServer { - pub fn new() -> Self { +impl Default for CodeGraphMCPServer { + fn default() -> Self { Self { counter: Arc::new(Mutex::new(0)), tool_router: Self::tool_router(), } } +} +#[tool_router] +impl CodeGraphMCPServer { // /// Increment counter with proper parameter schema (DISABLED - redundant for development) // #[tool(description = "Increment the counter by a specified amount")] // async fn increment(&self, params: Parameters) -> Result { @@ -450,7 +452,9 @@ impl CodeGraphMCPServer { } #[cfg(feature = "ai-enhanced")] - fn extract_pinpoint(item: &serde_json::Value) -> (Option, Option, Option) { + fn extract_pinpoint( + item: &serde_json::Value, + ) -> (Option, Option, Option) { let file_path = item .get("file_path") .and_then(|v| v.as_str()) @@ -473,7 +477,11 @@ impl CodeGraphMCPServer { .get("line_number") .and_then(|v| v.as_u64()) .map(|n| n as usize) - .or_else(|| item.get("start_line").and_then(|v| v.as_u64()).map(|n| n as usize)) + .or_else(|| { + item.get("start_line") + .and_then(|v| v.as_u64()) + .map(|n| n as usize) + }) .or_else(|| { item.get("location") .and_then(|loc| loc.get("start_line")) @@ -820,8 +828,10 @@ impl CodeGraphMCPServer { progress_notifier.notify_analyzing().await; // Detect agent architecture from environment (defaults to Rig) - let architecture = AgentArchitecture::parse(&std::env::var("CODEGRAPH_AGENT_ARCHITECTURE").unwrap_or_else(|_| "rig".to_string())) - .unwrap_or(AgentArchitecture::Rig); + let architecture = AgentArchitecture::parse( + &std::env::var("CODEGRAPH_AGENT_ARCHITECTURE").unwrap_or_else(|_| "rig".to_string()), + ) + .unwrap_or(AgentArchitecture::Rig); tracing::info!("Using agent architecture: {:?}", architecture); let step_counter = Arc::new(AtomicUsize::new(0)); @@ -1056,9 +1066,9 @@ impl CodeGraphMCPServer { }; let synthesized = structured_output.or_else(|| { - rig_traces - .as_deref() - .and_then(|t| Self::synthesize_structured_output_from_traces(analysis_type, &result.answer, t)) + rig_traces.as_deref().and_then(|t| { + Self::synthesize_structured_output_from_traces(analysis_type, &result.answer, t) + }) }); // Format result as JSON with structured output if available @@ -1134,18 +1144,16 @@ impl ServerHandler for CodeGraphMCPServer { } } - fn list_prompts( + async fn list_prompts( &self, _request: Option, _context: RequestContext, - ) -> impl Future> + Send + '_ { - async move { - Ok(ListPromptsResult { - prompts: vec![initial_instructions_prompt()], - next_cursor: None, - meta: None, - }) - } + ) -> Result { + Ok(ListPromptsResult { + prompts: vec![initial_instructions_prompt()], + next_cursor: None, + meta: None, + }) } fn get_prompt( diff --git a/crates/codegraph-mcp-tools/src/graph_tool_executor.rs b/crates/codegraph-mcp-tools/src/graph_tool_executor.rs index 83db520a..3e2688c3 100644 --- a/crates/codegraph-mcp-tools/src/graph_tool_executor.rs +++ b/crates/codegraph-mcp-tools/src/graph_tool_executor.rs @@ -274,7 +274,7 @@ impl GraphToolExecutor { /// Generate a cache key from project, tool name, and parameters fn cache_key(project_id: &str, tool_name: &str, parameters: &JsonValue) -> String { // Create deterministic key from project + function name + serialized params - format!("{}:{}:{}", project_id, tool_name, parameters.to_string()) + format!("{}:{}:{}", project_id, tool_name, parameters) } /// Execute a tool call from LLM @@ -350,9 +350,10 @@ impl GraphToolExecutor { .await? } _ => { - return Err( - McpError::Protocol(format!("Tool not implemented: {}", tool_name)).into(), - ); + return Err(McpError::Protocol(format!( + "Tool not implemented: {}", + tool_name + ))); } }; @@ -891,10 +892,13 @@ mod tests { inner: Arc::new(Mutex::new(Vec::new())), } } + } - fn into_string(&self) -> String { + impl std::fmt::Display for BufferWriter { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let bytes = self.inner.lock().unwrap().clone(); - String::from_utf8(bytes).unwrap() + let s = String::from_utf8(bytes).map_err(|_| std::fmt::Error)?; + write!(f, "{}", s) } } @@ -934,6 +938,6 @@ mod tests { with_default(subscriber, f); - writer.into_string() + writer.to_string() } } diff --git a/crates/codegraph-mcp/src/analyzers/architecture.rs b/crates/codegraph-mcp/src/analyzers/architecture.rs index f8929a6d..c6b1bea2 100644 --- a/crates/codegraph-mcp/src/analyzers/architecture.rs +++ b/crates/codegraph-mcp/src/analyzers/architecture.rs @@ -32,9 +32,10 @@ pub fn analyze_architecture( nodes: &[CodeNode], edges: &mut Vec, ) -> Result { - let mut stats = ArchitectureStats::default(); - - stats.package_cycles_detected = count_package_cycles(nodes, edges); + let mut stats = ArchitectureStats { + package_cycles_detected: count_package_cycles(nodes, edges), + ..ArchitectureStats::default() + }; let boundary = read_boundary_config(project_root).unwrap_or_default(); if boundary.deny.is_empty() { @@ -127,6 +128,7 @@ fn count_package_cycles(nodes: &[CodeNode], edges: &[EdgeRelationship]) -> usize let mut lowlink: HashMap = HashMap::new(); let mut cycles = 0usize; + #[allow(clippy::too_many_arguments)] fn strongconnect( v: NodeId, index: &mut usize, @@ -256,7 +258,7 @@ mod tests { #[test] fn cycle_detection_counts_sccs() { let dir = tempdir().expect("tempdir"); - let nodes = vec![CodeNode::new_test(), CodeNode::new_test()]; + let nodes = [CodeNode::new_test(), CodeNode::new_test()]; let a = nodes[0].id; let b = nodes[1].id; diff --git a/crates/codegraph-mcp/src/analyzers/enrichment.rs b/crates/codegraph-mcp/src/analyzers/enrichment.rs index c7b122c6..3852a209 100644 --- a/crates/codegraph-mcp/src/analyzers/enrichment.rs +++ b/crates/codegraph-mcp/src/analyzers/enrichment.rs @@ -17,7 +17,7 @@ pub struct EnrichmentStats { pub fn apply_basic_enrichment( project_root: &Path, - nodes: &mut Vec, + nodes: &mut [CodeNode], edges: &mut Vec, ) -> Result { let mut stats = EnrichmentStats::default(); @@ -163,7 +163,7 @@ pub fn apply_basic_enrichment( if edge.metadata.get("analyzer").map(|v| v.as_str()) != Some("lsp_definition") { continue; } - + // Count all LSP-resolved edges in the metric stats.uses_edges_derived += 1; @@ -203,8 +203,6 @@ fn rust_doc_comment_block(lines: &[String], line_1based: u32) -> Option let trimmed = l.trim_start(); if let Some(rest) = trimmed.strip_prefix("///") { collected.push(rest.trim_start().to_string()); - } else if trimmed.is_empty() && !collected.is_empty() { - break; } else { break; } diff --git a/crates/codegraph-mcp/src/analyzers/lsp.rs b/crates/codegraph-mcp/src/analyzers/lsp.rs index 716750b5..82884816 100644 --- a/crates/codegraph-mcp/src/analyzers/lsp.rs +++ b/crates/codegraph-mcp/src/analyzers/lsp.rs @@ -18,7 +18,7 @@ use tracing::{error, info}; use url::Url; pub fn encode_lsp_message(body: &str) -> Vec { - format!("Content-Length: {}\r\n\r\n{}", body.as_bytes().len(), body).into_bytes() + format!("Content-Length: {}\r\n\r\n{}", body.len(), body).into_bytes() } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -122,15 +122,15 @@ impl LspClient { .kill_on_drop(true) .spawn()?; - let mut stdin = child - .stdin - .take() - .ok_or_else(|| anyhow!("missing stdin"))?; + let mut stdin = child.stdin.take().ok_or_else(|| anyhow!("missing stdin"))?; let stdout = child .stdout .take() .ok_or_else(|| anyhow!("missing stdout"))?; - let stderr = child.stderr.take().ok_or_else(|| anyhow!("missing stderr"))?; + let stderr = child + .stderr + .take() + .ok_or_else(|| anyhow!("missing stderr"))?; let (tx, mut rx) = mpsc::channel::(100); let pending_requests = Arc::new(DashMap::>>::new()); @@ -180,7 +180,7 @@ impl LspClient { content_length_buf.clear(); // Read headers let mut content_length: Option = None; - + loop { if reader.read_line(&mut content_length_buf).await.unwrap_or(0) == 0 { return; // EOF @@ -189,7 +189,7 @@ impl LspClient { if line.is_empty() { break; // End of headers } - + let lower = line.to_ascii_lowercase(); if let Some(rest) = lower.strip_prefix("content-length:") { content_length = rest.trim().parse::().ok(); @@ -235,7 +235,9 @@ impl LspClient { let mut reader = BufReader::new(stderr); let mut line = String::new(); while let Ok(n) = reader.read_line(&mut line).await { - if n == 0 { break; } + if n == 0 { + break; + } // debug!("LSP stderr: {}", line.trim()); line.clear(); } @@ -271,14 +273,17 @@ impl LspClient { pub async fn request(&self, method: &str, params: JsonValue) -> Result { let id = self.next_id.fetch_add(1, Ordering::Relaxed); let (tx, rx) = oneshot::channel(); - + self.pending_requests.insert(id, tx); - - self.tx.send(LspRequest::Request { - id, - method: method.to_string(), - params, - }).await.map_err(|_| anyhow!("LSP server channel closed"))?; + + self.tx + .send(LspRequest::Request { + id, + method: method.to_string(), + params, + }) + .await + .map_err(|_| anyhow!("LSP server channel closed"))?; // 30s timeout for individual requests match tokio::time::timeout(Duration::from_secs(30), rx).await { @@ -291,14 +296,18 @@ impl LspClient { } pub async fn notify(&self, method: &str, params: JsonValue) -> Result<()> { - self.tx.send(LspRequest::Notify { - method: method.to_string(), - params, - }).await.map_err(|_| anyhow!("LSP server channel closed"))?; + self.tx + .send(LspRequest::Notify { + method: method.to_string(), + params, + }) + .await + .map_err(|_| anyhow!("LSP server channel closed"))?; Ok(()) } } +#[allow(clippy::too_many_arguments)] pub fn enrich_nodes_and_edges_with_lsp( server_path: &Path, server_args: &[&str], @@ -314,7 +323,7 @@ pub fn enrich_nodes_and_edges_with_lsp( let rt = tokio::runtime::Builder::new_current_thread() .enable_all() .build()?; - + rt.block_on(async { enrich_async( server_path, @@ -331,6 +340,7 @@ pub fn enrich_nodes_and_edges_with_lsp( }) } +#[allow(clippy::too_many_arguments)] async fn enrich_async( server_path: &Path, server_args: &[&str], @@ -342,7 +352,8 @@ async fn enrich_async( nodes: &mut [CodeNode], edges: &mut [EdgeRelationship], ) -> Result { - let project_root = std::fs::canonicalize(project_root).unwrap_or_else(|_| project_root.to_path_buf()); + let project_root = + std::fs::canonicalize(project_root).unwrap_or_else(|_| project_root.to_path_buf()); let root_uri = Url::from_directory_path(&project_root) .map_err(|_| anyhow::anyhow!("failed to create file URI"))? .to_string(); @@ -361,7 +372,9 @@ async fn enrich_async( let line0 = node.location.line.saturating_sub(1); for key in normalized_file_keys(&project_root, Path::new(&file)) { nodes_by_file_line_name.insert((key.clone(), line0, node.name.to_string()), idx); - nodes_by_file_line.entry((key.clone(), line0)).or_insert(idx); + nodes_by_file_line + .entry((key.clone(), line0)) + .or_insert(idx); files_with_nodes.insert(key); } } @@ -377,9 +390,7 @@ async fn enrich_async( let mut files_to_process: Vec = Vec::new(); for file_path in files { let file_keys = normalized_file_keys(&project_root, file_path); - let has_nodes = file_keys - .iter() - .any(|key| files_with_nodes.contains(key)); + let has_nodes = file_keys.iter().any(|key| files_with_nodes.contains(key)); let has_edges = file_keys .iter() .any(|key| def_edges_by_file.contains_key(key)); @@ -390,7 +401,10 @@ async fn enrich_async( } let total_files = files_to_process.len(); - info!("🧠 LSP Analysis: Processing {} files concurrently", total_files); + info!( + "🧠 LSP Analysis: Processing {} files concurrently", + total_files + ); // Pre-collect edge spans to avoid borrowing `edges` inside the async block let mut file_edge_spans: std::collections::HashMap> = @@ -415,35 +429,43 @@ async fn enrich_async( let project_root = project_root.clone(); let language_id = language_id.to_string(); let file_edge_spans = file_edge_spans.clone(); - + async move { let abs_path = absolute_file_path(&project_root, &file_path); - let Ok(content) = tokio::fs::read_to_string(&abs_path).await else { return Ok(None) }; + let Ok(content) = tokio::fs::read_to_string(&abs_path).await else { + return Ok(None); + }; let file_keys = normalized_file_keys(&project_root, &file_path); - - let Ok(uri) = Url::from_file_path(&abs_path) else { return Ok(None) }; + + let Ok(uri) = Url::from_file_path(&abs_path) else { + return Ok(None); + }; let uri_str = uri.to_string(); - + let pos_index = LspPositionIndex::new(&content); // Open - client.notify( - "textDocument/didOpen", - serde_json::json!({ - "textDocument": { - "uri": uri_str, - "languageId": language_id, - "version": 1, - "text": content - } - }) - ).await?; + client + .notify( + "textDocument/didOpen", + serde_json::json!({ + "textDocument": { + "uri": uri_str, + "languageId": language_id, + "version": 1, + "text": content + } + }), + ) + .await?; // Symbols - let symbols = client.request( - "textDocument/documentSymbol", - serde_json::json!({ "textDocument": { "uri": uri_str } }), - ).await?; + let symbols = client + .request( + "textDocument/documentSymbol", + serde_json::json!({ "textDocument": { "uri": uri_str } }), + ) + .await?; // Definitions let mut def_results = Vec::new(); @@ -463,7 +485,7 @@ async fn enrich_async( "position": { "line": pos.line, "character": pos.character } }) ).await; - + if let Ok(def) = def_response { def_results.push((edge_idx, def)); } @@ -473,10 +495,12 @@ async fn enrich_async( } // Close (fire and forget) - let _ = client.notify( - "textDocument/didClose", - serde_json::json!({ "textDocument": { "uri": uri_str } }), - ).await; + let _ = client + .notify( + "textDocument/didClose", + serde_json::json!({ "textDocument": { "uri": uri_str } }), + ) + .await; Ok::<_, anyhow::Error>(Some((file_keys, symbols, def_results))) } @@ -486,7 +510,7 @@ async fn enrich_async( let mut stats = LspEnrichmentStats::default(); let mut results = stream; let mut processed = 0; - + // Process results as they come in and mutate state while let Some(res) = results.next().await { if let Ok(Some((file_keys, symbols, def_results))) = res { @@ -502,17 +526,26 @@ async fn enrich_async( } if let Some(node_idx) = node_idx { let node = &mut nodes[node_idx]; - node.metadata.attributes.insert("qualified_name".to_string(), sym.qualified_name.clone()); - node.metadata.attributes.insert("analyzer".to_string(), "lsp_symbols".to_string()); - node.metadata.attributes.insert("analyzer_confidence".to_string(), "1.0".to_string()); + node.metadata + .attributes + .insert("qualified_name".to_string(), sym.qualified_name.clone()); + node.metadata + .attributes + .insert("analyzer".to_string(), "lsp_symbols".to_string()); + node.metadata + .attributes + .insert("analyzer_confidence".to_string(), "1.0".to_string()); stats.nodes_enriched += 1; } } // 2. Process Definitions for (edge_idx, def) in def_results { - let Some((target_file, target_line0)) = extract_first_definition_location(&def) else { continue; }; - + let Some((target_file, target_line0)) = extract_first_definition_location(&def) + else { + continue; + }; + let target_idx = nodes_by_file_line .get(&(target_file.clone(), target_line0)) .copied() @@ -524,20 +557,28 @@ async fn enrich_async( if let Some(target_idx) = target_idx { let target = &nodes[target_idx]; - let target_name = target.metadata.attributes.get("qualified_name") + let target_name = target + .metadata + .attributes + .get("qualified_name") .cloned() .unwrap_or_else(|| target.name.to_string()); - + let edge = &mut edges[edge_idx]; edge.to = target_name; - edge.metadata.insert("analyzer".to_string(), "lsp_definition".to_string()); - edge.metadata.insert("analyzer_confidence".to_string(), "1.0".to_string()); + edge.metadata + .insert("analyzer".to_string(), "lsp_definition".to_string()); + edge.metadata + .insert("analyzer_confidence".to_string(), "1.0".to_string()); stats.edges_resolved += 1; } } processed += 1; if processed % 10 == 0 { - info!("🧠 LSP progress: {}/{} files processed", processed, total_files); + info!( + "🧠 LSP progress: {}/{} files processed", + processed, total_files + ); } } } @@ -577,7 +618,7 @@ fn normalize_path(path: &Path) -> PathBuf { fn extract_first_definition_location(def: &JsonValue) -> Option<(String, u32)> { let loc = if let Some(arr) = def.as_array() { - arr.first()? + arr.first()? } else { def }; @@ -669,7 +710,7 @@ fn normalized_file_keys(project_root: &Path, file_path: &Path) -> Vec { keys.push(normalized_str.clone()); if normalized.is_absolute() { - if let Some(rel) = normalized.strip_prefix(project_root).ok() { + if let Ok(rel) = normalized.strip_prefix(project_root) { keys.push(rel.to_string_lossy().to_string()); } } else { diff --git a/crates/codegraph-mcp/src/analyzers/mod.rs b/crates/codegraph-mcp/src/analyzers/mod.rs index 3e48a386..2c3c5390 100644 --- a/crates/codegraph-mcp/src/analyzers/mod.rs +++ b/crates/codegraph-mcp/src/analyzers/mod.rs @@ -208,10 +208,8 @@ pub fn find_tool_candidates_on_path(tool: &str, path_env: &str) -> Vec let mut out = Vec::new(); for dir in std::env::split_paths(path_env) { let candidate = dir.join(tool); - if candidate.is_file() { - if !out.contains(&candidate) { - out.push(candidate); - } + if candidate.is_file() && !out.contains(&candidate) { + out.push(candidate); } #[cfg(windows)] { diff --git a/crates/codegraph-mcp/src/analyzers/module_linker.rs b/crates/codegraph-mcp/src/analyzers/module_linker.rs index f1c3b5a3..7e55cc40 100644 --- a/crates/codegraph-mcp/src/analyzers/module_linker.rs +++ b/crates/codegraph-mcp/src/analyzers/module_linker.rs @@ -213,9 +213,7 @@ fn module_key(project_root: &Path, file_path: &Path, language: &Language) -> Opt Language::Go => "go", Language::Java => "java", Language::Cpp => "cpp", - other => match other { - _ => return None, - }, + _other => return None, }; if *language != Language::Rust { return Some(format!("module::{}::{}", lang, s)); @@ -265,7 +263,8 @@ fn canonical_import_target( let spec = spec.trim(); if *language == Language::Rust { - if let Some(resolved) = resolve_rust_import(project_root, from_file, spec, known_module_keys) + if let Some(resolved) = + resolve_rust_import(project_root, from_file, spec, known_module_keys) { return resolved; } diff --git a/crates/codegraph-mcp/src/connection.rs b/crates/codegraph-mcp/src/connection.rs index 3e2426c7..e27a8f73 100644 --- a/crates/codegraph-mcp/src/connection.rs +++ b/crates/codegraph-mcp/src/connection.rs @@ -45,6 +45,8 @@ impl McpClientConfig { } } +type NotifyHandler = RwLock>>; + /// Core MCP connection supporting JSON-RPC 2.0 and MCP handshake pub struct McpConnection { #[allow(dead_code)] @@ -55,7 +57,7 @@ pub struct McpConnection { protocol: RwLock, pending: DashMap>, // request_id -> tx in_flight: AtomicU64, - notify_handler: RwLock>>, + notify_handler: NotifyHandler, } impl McpConnection { diff --git a/crates/codegraph-mcp/src/estimation.rs b/crates/codegraph-mcp/src/estimation.rs index 51fea342..49f39e4d 100644 --- a/crates/codegraph-mcp/src/estimation.rs +++ b/crates/codegraph-mcp/src/estimation.rs @@ -115,7 +115,7 @@ pub struct RepositoryEstimator { impl RepositoryEstimator { pub fn new(config: IndexerConfig) -> Self { Self { - parser: TreeSitterParser::new(), + parser: TreeSitterParser::default(), config, } } diff --git a/crates/codegraph-mcp/src/indexer.rs b/crates/codegraph-mcp/src/indexer.rs index e30167be..b7e7d879 100644 --- a/crates/codegraph-mcp/src/indexer.rs +++ b/crates/codegraph-mcp/src/indexer.rs @@ -203,9 +203,7 @@ pub(crate) fn filter_edges_for_tier( edges.retain(|edge| !matches!(edge.edge_type, EdgeType::References)); } codegraph_core::config_manager::IndexingTier::Fast => { - edges.retain(|edge| { - !matches!(edge.edge_type, EdgeType::Uses | EdgeType::References) - }); + edges.retain(|edge| !matches!(edge.edge_type, EdgeType::Uses | EdgeType::References)); } } before.saturating_sub(edges.len()) @@ -685,7 +683,7 @@ impl ProjectIndexer { } } - let parser = TreeSitterParser::new(); + let parser = TreeSitterParser::default(); let project_root = config.project_root.clone(); let (surreal, surreal_pool) = Self::connect_surreal_from_env().await?; let surreal_writer = SurrealWriterHandle::new(surreal_pool); @@ -978,10 +976,11 @@ impl ProjectIndexer { let analyzer_settings = AnalyzerSettings::for_tier(self.config.indexing_tier); let path_env = std::env::var("PATH").unwrap_or_default(); let mut analyzer_languages: HashSet = HashSet::new(); - let needs_language_scan = - analyzer_settings.lsp_enabled() || analyzer_settings.build_context || analyzer_settings.dataflow; + let needs_language_scan = analyzer_settings.lsp_enabled() + || analyzer_settings.build_context + || analyzer_settings.dataflow; if needs_language_scan { - let registry = codegraph_parser::LanguageRegistry::new(); + let registry = codegraph_parser::LanguageRegistry::default(); for (p, _) in &files_to_index { if let Some(lang) = registry.detect_language(&p.to_string_lossy()) { analyzer_languages.insert(lang); @@ -1106,8 +1105,7 @@ impl ProjectIndexer { let start = std::time::Instant::now(); info!( "🧠 Language-server analysis starting (mode: {}, languages: {:?})", - lsp_mode_label, - analyzer_languages + lsp_mode_label, analyzer_languages ); let project_root = self.project_root.clone(); @@ -1117,7 +1115,7 @@ impl ProjectIndexer { codegraph_core::Language, Vec, > = std::collections::HashMap::new(); - let registry = codegraph_parser::LanguageRegistry::new(); + let registry = codegraph_parser::LanguageRegistry::default(); for (p, _) in &files { if let Some(lang) = registry.detect_language(&p.to_string_lossy()) { language_files.entry(lang).or_default().push(p.clone()); @@ -1280,7 +1278,8 @@ impl ProjectIndexer { } let mut dataflow_stats = crate::analyzers::dataflow::DataflowStats::default(); - if analyzer_settings.dataflow && analyzer_languages.contains(&codegraph_core::Language::Rust) + if analyzer_settings.dataflow + && analyzer_languages.contains(&codegraph_core::Language::Rust) { let start = std::time::Instant::now(); info!("🌊 Dataflow enrichment starting (local def-use)"); @@ -1785,7 +1784,7 @@ impl ProjectIndexer { info!(" 🚀 M4 Max optimization: Parallel processing with bulk database operations"); // REVOLUTIONARY: Parallel symbol resolution optimized for M4 Max 128GB - let chunk_size = (edges.len() / 12).max(100).min(1000); // Optimal for 12+ cores + let chunk_size = (edges.len() / 12).clamp(100, 1000); // Optimal for 12+ cores let chunks: Vec<_> = edges.chunks(chunk_size).collect(); let total_chunks = chunks.len(); @@ -1878,16 +1877,12 @@ impl ProjectIndexer { (known_embeddings, unresolved_embeddings, node_degrees) }; #[cfg(not(feature = "ai-enhanced"))] - let (symbol_embeddings, unresolved_embeddings, node_degrees): ( - std::collections::HashMap>, - std::collections::HashMap>, - std::collections::HashMap, - ) = { + let (symbol_embeddings, unresolved_embeddings, node_degrees) = { info!("🚀 Pattern-only resolution: AI semantic matching disabled (ai-enhanced feature not enabled)"); ( - std::collections::HashMap::new(), - std::collections::HashMap::new(), - std::collections::HashMap::new(), + std::collections::HashMap::>::new(), + std::collections::HashMap::>::new(), + std::collections::HashMap::::new(), ) }; @@ -4215,10 +4210,8 @@ impl ProjectIndexer { nodes.iter().map(|n| n.id.to_string()).collect(); // Build symbol name to node ID map for edge resolution - let symbol_map: std::collections::HashMap = nodes - .iter() - .map(|n| (n.name.to_string(), n.id.clone())) - .collect(); + let symbol_map: std::collections::HashMap = + nodes.iter().map(|n| (n.name.to_string(), n.id)).collect(); // Convert EdgeRelationship to CodeEdge for intra-file edges only let resolved_edges: Vec = edges @@ -4230,7 +4223,7 @@ impl ProjectIndexer { Some( codegraph_graph::CodeEdge::new( edge_rel.from, - target_id.clone(), + *target_id, edge_rel.edge_type, ) .with_project_id(self.project_id.clone()), @@ -4481,7 +4474,8 @@ mod tests { #[test] fn analyzer_requires_rust_analyzer_when_lsp_enabled() { - let settings = AnalyzerSettings::for_tier(codegraph_core::config_manager::IndexingTier::Full); + let settings = + AnalyzerSettings::for_tier(codegraph_core::config_manager::IndexingTier::Full); let err = ProjectIndexer::validate_analyzer_tools( &[codegraph_core::Language::Rust], @@ -4531,7 +4525,9 @@ mod tests { ); assert_eq!(removed, 2); assert!(edges.iter().any(|e| e.edge_type == EdgeType::Calls)); - assert!(edges.iter().any(|e| e.edge_type == EdgeType::Other("flows_to".to_string()))); + assert!(edges + .iter() + .any(|e| e.edge_type == EdgeType::Other("flows_to".to_string()))); assert!(!edges.iter().any(|e| e.edge_type == EdgeType::Uses)); assert!(!edges.iter().any(|e| e.edge_type == EdgeType::References)); } diff --git a/crates/codegraph-mcp/tests/daemon_watch.rs b/crates/codegraph-mcp/tests/daemon_watch.rs index 2d4c4046..b3781304 100644 --- a/crates/codegraph-mcp/tests/daemon_watch.rs +++ b/crates/codegraph-mcp/tests/daemon_watch.rs @@ -33,11 +33,13 @@ async fn watch_updates_file_metadata_on_change() -> Result<()> { let file_path = project_dir.path().join("foo.rs"); fs::write(&file_path, "fn foo() {}\n").await?; - let mut config = IndexerConfig::default(); - config.project_root = project_dir.path().to_path_buf(); - config.languages = vec!["rust".to_string()]; - config.recursive = true; - config.force_reindex = true; + let config = IndexerConfig { + project_root: project_dir.path().to_path_buf(), + languages: vec!["rust".to_string()], + recursive: true, + force_reindex: true, + ..Default::default() + }; let global_config = CodeGraphConfig::default(); let progress = MultiProgress::with_draw_target(ProgressDrawTarget::hidden()); diff --git a/crates/codegraph-mcp/tests/graph_tools_smoke.rs b/crates/codegraph-mcp/tests/graph_tools_smoke.rs index b58dd5d3..536baa74 100644 --- a/crates/codegraph-mcp/tests/graph_tools_smoke.rs +++ b/crates/codegraph-mcp/tests/graph_tools_smoke.rs @@ -447,8 +447,8 @@ async fn test_semantic_search_nodes_via_chunks() { let mut unique_node_ids: std::collections::HashSet = std::collections::HashSet::new(); for (i, result) in results.iter().enumerate() { - let has_content = result.get("content").map_or(false, |c| !c.is_null()); - let has_node_id = result.get("node_id").map_or(false, |n| !n.is_null()); + let has_content = result.get("content").is_some_and(|c| !c.is_null()); + let has_node_id = result.get("node_id").is_some_and(|n| !n.is_null()); let has_outgoing = result.get("outgoing_edges").is_some(); let has_incoming = result.get("incoming_edges").is_some(); diff --git a/crates/codegraph-mcp/tests/integration_tests.rs b/crates/codegraph-mcp/tests/integration_tests.rs index f89513de..3d0a9716 100644 --- a/crates/codegraph-mcp/tests/integration_tests.rs +++ b/crates/codegraph-mcp/tests/integration_tests.rs @@ -122,7 +122,7 @@ async fn test_mcp_server_startup() { async fn test_language_support_comprehensive() { println!("🌍 Testing comprehensive language support..."); - let registry = codegraph_parser::LanguageRegistry::new(); + let registry = codegraph_parser::LanguageRegistry::default(); // Test all 11 supported languages let language_tests = vec![ diff --git a/crates/codegraph-parser/src/fast_ml/enhancer.rs b/crates/codegraph-parser/src/fast_ml/enhancer.rs index f56ef97b..c29a9b41 100644 --- a/crates/codegraph-parser/src/fast_ml/enhancer.rs +++ b/crates/codegraph-parser/src/fast_ml/enhancer.rs @@ -113,7 +113,7 @@ mod tests { }; let enhanced = enhancer.enhance(result, content); - assert!(enhanced.edges.len() > 0, "Should add pattern-based edges"); + assert!(!enhanced.edges.is_empty(), "Should add pattern-based edges"); } #[test] @@ -142,7 +142,7 @@ mod tests { let enhanced = enhancer.enhance(result, "fn foo() {}"); assert!( - enhanced.edges.len() >= 1, + !enhanced.edges.is_empty(), "Symbol resolver should preserve/augment edges" ); } @@ -157,6 +157,6 @@ mod tests { let enhanced = enhance_extraction(result, content); // Should work without panicking - assert!(enhanced.nodes.len() > 0); + assert!(!enhanced.nodes.is_empty()); } } diff --git a/crates/codegraph-parser/src/fast_ml/pattern_matcher.rs b/crates/codegraph-parser/src/fast_ml/pattern_matcher.rs index 60b7c84e..55a7e5a3 100644 --- a/crates/codegraph-parser/src/fast_ml/pattern_matcher.rs +++ b/crates/codegraph-parser/src/fast_ml/pattern_matcher.rs @@ -300,7 +300,7 @@ mod tests { let enhanced = matcher.enhance_extraction(result, content); println!("Edges: {:?}", enhanced.edges.len()); assert!( - enhanced.edges.len() > 0, + !enhanced.edges.is_empty(), "Should have found Rust patterns in content" ); } diff --git a/crates/codegraph-parser/src/file_collect.rs b/crates/codegraph-parser/src/file_collect.rs index 7ae655a8..a56adf44 100644 --- a/crates/codegraph-parser/src/file_collect.rs +++ b/crates/codegraph-parser/src/file_collect.rs @@ -129,11 +129,7 @@ pub fn collect_source_files_with_config( filtered_files += 1; // Size extraction (best-effort) - let size = dent - .metadata() - .ok() - .and_then(|m| Some(m.len())) - .unwrap_or(0); + let size = dent.metadata().ok().map(|m| m.len()).unwrap_or(0); paths.push((path.to_path_buf(), size)); } diff --git a/crates/codegraph-parser/src/language.rs b/crates/codegraph-parser/src/language.rs index 183a87aa..70982dec 100644 --- a/crates/codegraph-parser/src/language.rs +++ b/crates/codegraph-parser/src/language.rs @@ -13,8 +13,8 @@ pub struct LanguageRegistry { configs: HashMap, } -impl LanguageRegistry { - pub fn new() -> Self { +impl Default for LanguageRegistry { + fn default() -> Self { let mut configs = HashMap::new(); configs.insert( @@ -125,6 +125,12 @@ impl LanguageRegistry { Self { configs } } +} + +impl LanguageRegistry { + pub fn new() -> Self { + Self::default() + } pub fn detect_language(&self, file_path: &str) -> Option { let extension = std::path::Path::new(file_path).extension()?.to_str()?; @@ -157,7 +163,7 @@ mod tests { #[test] fn registered_languages_use_supported_versions() { - let registry = LanguageRegistry::new(); + let registry = LanguageRegistry::default(); for (language, config) in ®istry.configs { let version = config.language.abi_version(); assert!( diff --git a/crates/codegraph-parser/src/languages/cpp.rs b/crates/codegraph-parser/src/languages/cpp.rs index 79e36ad6..81a189ba 100644 --- a/crates/codegraph-parser/src/languages/cpp.rs +++ b/crates/codegraph-parser/src/languages/cpp.rs @@ -311,7 +311,7 @@ impl<'a> CppCollector<'a> { code.metadata .attributes .insert("kind".into(), "destructor".into()); - } else if ctx.current_class.as_ref().map_or(false, |c| c == &name) { + } else if ctx.current_class.as_ref() == Some(&name) { code.metadata .attributes .insert("kind".into(), "constructor".into()); diff --git a/crates/codegraph-parser/src/languages/extractor_utils.rs b/crates/codegraph-parser/src/languages/extractor_utils.rs index 1b84c0c2..60f37038 100644 --- a/crates/codegraph-parser/src/languages/extractor_utils.rs +++ b/crates/codegraph-parser/src/languages/extractor_utils.rs @@ -57,7 +57,7 @@ pub fn children_by_kind<'a>(node: &Node<'a>, kind: &str) -> Vec> { } /// Get text of a child by field name -pub fn child_text_by_field<'a>(node: &Node, field_name: &str, content: &'a str) -> Option { +pub fn child_text_by_field(node: &Node, field_name: &str, content: &str) -> Option { node.child_by_field_name(field_name) .map(|child| node_text(&child, content).to_string()) } diff --git a/crates/codegraph-parser/src/languages/go.rs b/crates/codegraph-parser/src/languages/go.rs index 6c7536ee..854a8bc7 100644 --- a/crates/codegraph-parser/src/languages/go.rs +++ b/crates/codegraph-parser/src/languages/go.rs @@ -177,7 +177,7 @@ impl<'a> GoCollector<'a> { } // Detect exported functions (capitalized) - if name.chars().next().map_or(false, |c| c.is_uppercase()) { + if name.chars().next().is_some_and(|c| c.is_uppercase()) { code.metadata .attributes .insert("exported".into(), "true".into()); @@ -225,7 +225,7 @@ impl<'a> GoCollector<'a> { } // Detect exported methods - if name.chars().next().map_or(false, |c| c.is_uppercase()) { + if name.chars().next().is_some_and(|c| c.is_uppercase()) { code.metadata .attributes .insert("exported".into(), "true".into()); @@ -379,7 +379,7 @@ impl<'a> GoCollector<'a> { code.span = Some(self.span_for(node)); // Detect exported types - if name.chars().next().map_or(false, |c| c.is_uppercase()) { + if name.chars().next().is_some_and(|c| c.is_uppercase()) { code.metadata .attributes .insert("exported".into(), "true".into()); diff --git a/crates/codegraph-parser/src/languages/php.rs b/crates/codegraph-parser/src/languages/php.rs index d9f84108..3d97f2cc 100644 --- a/crates/codegraph-parser/src/languages/php.rs +++ b/crates/codegraph-parser/src/languages/php.rs @@ -43,8 +43,10 @@ impl PhpExtractor { || file_path.contains("Controller.php") || file_path.contains("Model.php"); - let mut ctx = PhpContext::default(); - ctx.is_framework_file = is_framework; + let ctx = PhpContext { + is_framework_file: is_framework, + ..Default::default() + }; collector.walk(&mut cursor, ctx); collector.into_nodes() @@ -61,8 +63,10 @@ impl PhpExtractor { || file_path.contains("Controller.php") || file_path.contains("Model.php"); - let mut ctx = PhpContext::default(); - ctx.is_framework_file = is_framework; + let ctx = PhpContext { + is_framework_file: is_framework, + ..Default::default() + }; collector.walk(&mut cursor, ctx); collector.into_result() diff --git a/crates/codegraph-parser/src/languages/ruby.rs b/crates/codegraph-parser/src/languages/ruby.rs index 7a0d2ab1..e9131b36 100644 --- a/crates/codegraph-parser/src/languages/ruby.rs +++ b/crates/codegraph-parser/src/languages/ruby.rs @@ -41,8 +41,10 @@ impl RubyExtractor { || file_path.contains("/config/") || file_path.contains("/db/migrate"); - let mut ctx = RubyContext::default(); - ctx.is_rails_file = is_rails; + let ctx = RubyContext { + is_rails_file: is_rails, + ..Default::default() + }; collector.walk(&mut cursor, ctx); collector.into_nodes() @@ -58,8 +60,10 @@ impl RubyExtractor { || file_path.contains("/config/") || file_path.contains("/db/migrate"); - let mut ctx = RubyContext::default(); - ctx.is_rails_file = is_rails; + let ctx = RubyContext { + is_rails_file: is_rails, + ..Default::default() + }; collector.walk(&mut cursor, ctx); collector.into_result() @@ -222,16 +226,15 @@ impl<'a> RubyCollector<'a> { && ctx .current_class .as_ref() - .map_or(false, |c| c.ends_with("Controller")) - { - if matches!( + .is_some_and(|c| c.ends_with("Controller")) + && matches!( name.as_str(), "index" | "show" | "new" | "create" | "edit" | "update" | "destroy" - ) { - code.metadata - .attributes - .insert("rails_action".into(), "true".into()); - } + ) + { + code.metadata + .attributes + .insert("rails_action".into(), "true".into()); } // Detect metaprogramming patterns diff --git a/crates/codegraph-parser/src/languages/rust.rs b/crates/codegraph-parser/src/languages/rust.rs index 7ad71755..3269e2c7 100644 --- a/crates/codegraph-parser/src/languages/rust.rs +++ b/crates/codegraph-parser/src/languages/rust.rs @@ -287,7 +287,7 @@ impl<'a> Collector<'a> { code.metadata .attributes .insert("impl_for".into(), for_type.clone()); - + // Add reference to the 'for' type if let Some(type_node) = node.child_by_field_name("type") { self.edges.push(EdgeRelationship { @@ -303,7 +303,7 @@ impl<'a> Collector<'a> { code.metadata .attributes .insert("impl_trait".into(), trait_name.clone()); - + // Add reference to the trait if let Some(trait_node) = node.child_by_field_name("trait") { self.edges.push(EdgeRelationship { @@ -371,7 +371,7 @@ impl<'a> Collector<'a> { code.metadata .attributes .insert("lifetimes".into(), json!(lifetimes).to_string()); - + // REVOLUTIONARY: Extract references from parameters and return type self.extract_references_from_signature(node, code.id); @@ -539,11 +539,8 @@ impl<'a> Collector<'a> { if cursor.goto_first_child() { loop { let n = cursor.node(); - match n.kind() { - "type_parameters" => { - self.collect_type_parameters(n, &mut generics, &mut lifetimes); - } - _ => {} + if n.kind() == "type_parameters" { + self.collect_type_parameters(n, &mut generics, &mut lifetimes); } if !cursor.goto_next_sibling() { break; @@ -854,7 +851,7 @@ fn parse_impl_signature_text(text: &str) -> ImplInfo { if let Some(idx) = s.find(" for ") { // trait impl let head = s.trim_start_matches("impl").trim(); - let trait_part = head[..idx - 0].trim(); + let trait_part = head[..idx].trim(); let after_for = &s[idx + 5..]; let ty = after_for.split('{').next().unwrap_or(after_for).trim(); info.trait_name = Some(trait_part.to_string()); diff --git a/crates/codegraph-parser/src/parser.rs b/crates/codegraph-parser/src/parser.rs index f7e2fda8..f9338fa6 100644 --- a/crates/codegraph-parser/src/parser.rs +++ b/crates/codegraph-parser/src/parser.rs @@ -49,17 +49,23 @@ pub struct TreeSitterParser { parser_pool: Arc>>>, } -impl TreeSitterParser { - pub fn new() -> Self { +impl Default for TreeSitterParser { + fn default() -> Self { let num_cpus = num_cpus::get(); Self { - registry: Arc::new(LanguageRegistry::new()), + registry: Arc::new(LanguageRegistry::default()), max_concurrent_files: num_cpus * 2, chunk_size: 50, parsed_cache: Arc::new(dashmap::DashMap::new()), parser_pool: Arc::new(parking_lot::Mutex::new(Vec::new())), } } +} + +impl TreeSitterParser { + pub fn new() -> Self { + Self::default() + } pub fn with_concurrency(mut self, max_concurrent_files: usize) -> Self { self.max_concurrent_files = max_concurrent_files; @@ -318,10 +324,8 @@ impl TreeSitterParser { async fn parse_file_with_caching(&self, file_path: &str) -> Result<(Vec, usize)> { let path = Path::new(file_path); - let metadata = fs::metadata(path) - .await - .map_err(|e| CodeGraphError::Io(e))?; - let last_modified = metadata.modified().map_err(|e| CodeGraphError::Io(e))?; + let metadata = fs::metadata(path).await.map_err(CodeGraphError::Io)?; + let last_modified = metadata.modified().map_err(CodeGraphError::Io)?; // Check cache first if let Some(cached) = self.parsed_cache.get(file_path) { @@ -350,7 +354,7 @@ impl TreeSitterParser { .registry .detect_language(file_path) .unwrap_or(Language::Other("unknown".to_string())); - let content_hash = format!("{:x}", sha2::Sha256::digest(&content)); + let content_hash = format!("{:x}", sha2::Sha256::digest(content)); // Enable tree caching for better performance let cached_tree = if content.len() < 500_000 { @@ -359,13 +363,11 @@ impl TreeSitterParser { if let Ok((nodes, _, _)) = &result { if !nodes.is_empty() { // Parse again just for caching (small performance cost for future gains) - let mut cache_parser = self - .registry - .create_parser(&language) - .unwrap_or_else(|| tree_sitter::Parser::new()); + let mut cache_parser = + self.registry.create_parser(&language).unwrap_or_default(); if let Some(config) = self.registry.get_config(&language) { if cache_parser.set_language(&config.language).is_ok() { - cache_parser.parse(&content, None) + cache_parser.parse(content, None) } else { None } @@ -409,7 +411,7 @@ impl TreeSitterParser { let content = read_file_to_string(file_path) .await - .map_err(|e| CodeGraphError::Io(e))?; + .map_err(CodeGraphError::Io)?; let line_count = content.lines().count(); let nodes = self @@ -783,7 +785,7 @@ impl TreeSitterParser { let content = read_file_to_string(file_path) .await - .map_err(|e| CodeGraphError::Io(e))?; + .map_err(CodeGraphError::Io)?; self.parse_content_with_unified_extraction(&content, file_path, language) .await @@ -822,11 +824,8 @@ impl TreeSitterParser { } } - found_parser.unwrap_or_else(|| { - registry - .create_parser(&language) - .unwrap_or_else(|| tree_sitter::Parser::new()) - }) + found_parser + .unwrap_or_else(|| registry.create_parser(&language).unwrap_or_default()) }; // Ensure parser has correct language set diff --git a/crates/codegraph-parser/src/tests/mod.rs b/crates/codegraph-parser/src/tests/mod.rs index 15fdebfc..c5ea42aa 100644 --- a/crates/codegraph-parser/src/tests/mod.rs +++ b/crates/codegraph-parser/src/tests/mod.rs @@ -8,9 +8,6 @@ use tree_sitter::Parser; #[cfg(test)] mod integration_tests { use super::*; - use tree_sitter_python; - use tree_sitter_rust; - use tree_sitter_typescript; #[test] fn test_full_rust_conversion() { diff --git a/crates/codegraph-parser/src/visitor.rs b/crates/codegraph-parser/src/visitor.rs index 719585cb..ea31c1ba 100644 --- a/crates/codegraph-parser/src/visitor.rs +++ b/crates/codegraph-parser/src/visitor.rs @@ -187,7 +187,7 @@ impl AstToGraphConverter { }); } - if content.contains(&format!("{}", other_name)) + if content.contains(&other_name.to_string()) && matches!( other_entity.node.node_type, Some(NodeType::Variable) @@ -232,31 +232,31 @@ impl AstToGraphConverter { } } } - } else if matches!(self.language, Language::TypeScript | Language::JavaScript) { - if content.contains("extends ") || content.contains("implements ") { - for other_entity in &self.entities { - if content.contains(&format!("extends {}", other_entity.symbol_name)) { - self.relationships.push(SemanticRelationship { - from: entity.node.id, - to: other_entity.node.id, - edge_type: EdgeType::Extends, - context: format!( - "Class {} extends {}", - entity.symbol_name, other_entity.symbol_name - ), - }); - } - if content.contains(&format!("implements {}", other_entity.symbol_name)) { - self.relationships.push(SemanticRelationship { - from: entity.node.id, - to: other_entity.node.id, - edge_type: EdgeType::Implements, - context: format!( - "Class {} implements {}", - entity.symbol_name, other_entity.symbol_name - ), - }); - } + } else if matches!(self.language, Language::TypeScript | Language::JavaScript) + && (content.contains("extends ") || content.contains("implements ")) + { + for other_entity in &self.entities { + if content.contains(&format!("extends {}", other_entity.symbol_name)) { + self.relationships.push(SemanticRelationship { + from: entity.node.id, + to: other_entity.node.id, + edge_type: EdgeType::Extends, + context: format!( + "Class {} extends {}", + entity.symbol_name, other_entity.symbol_name + ), + }); + } + if content.contains(&format!("implements {}", other_entity.symbol_name)) { + self.relationships.push(SemanticRelationship { + from: entity.node.id, + to: other_entity.node.id, + edge_type: EdgeType::Implements, + context: format!( + "Class {} implements {}", + entity.symbol_name, other_entity.symbol_name + ), + }); } } } @@ -543,8 +543,8 @@ impl AstToGraphConverter { } fn node_text(&self, node: &Node) -> SharedStr { - let start = node.start_byte() as usize; - let end = node.end_byte() as usize; + let start = node.start_byte(); + let end = node.end_byte(); SharedStr::from_arc_slice(self.source_bytes.clone(), start, end) } diff --git a/crates/codegraph-parser/src/watcher.rs b/crates/codegraph-parser/src/watcher.rs index 123b6588..e48a40c0 100644 --- a/crates/codegraph-parser/src/watcher.rs +++ b/crates/codegraph-parser/src/watcher.rs @@ -77,7 +77,7 @@ impl FileSystemWatcher { Ok(Self { watcher: None, file_registry: Arc::new(DashMap::new()), - language_registry: Arc::new(LanguageRegistry::new()), + language_registry: Arc::new(LanguageRegistry::default()), event_sender, event_receiver, watched_directories: Arc::new(RwLock::new(HashSet::new())), @@ -274,6 +274,7 @@ impl FileSystemWatcher { }) } + #[allow(clippy::too_many_arguments)] fn handle_fs_event( event: Event, event_sender: Sender, @@ -556,10 +557,7 @@ impl FileSystemWatcher { Err(_) => { // No immediate changes, wait a bit tokio::time::sleep(Duration::from_millis(1)).await; - match self.event_receiver.try_recv() { - Ok(change) => Some(change), - Err(_) => None, - } + self.event_receiver.try_recv().ok() } } } diff --git a/crates/codegraph-vector/examples/rag_demo.rs b/crates/codegraph-vector/examples/rag_demo.rs index a7d34cf0..d9244a7a 100644 --- a/crates/codegraph-vector/examples/rag_demo.rs +++ b/crates/codegraph-vector/examples/rag_demo.rs @@ -20,15 +20,15 @@ async fn main() -> Result<(), Box> { for node in &nodes { rag_system.add_context(node.clone()).await?; println!( - " Added: {} ({})", + " Added: {} ({:?})", node.name, - format!("{:?}", node.node_type.as_ref().unwrap()) + node.node_type.as_ref().unwrap() ); } // Test queries println!("\n3. Testing RAG queries..."); - let test_queries = vec![ + let test_queries = [ "How do I read files?", "Find functions that handle errors", "What are the async operations available?", diff --git a/crates/codegraph-vector/src/cache.rs b/crates/codegraph-vector/src/cache.rs index f2dafe5a..be596584 100644 --- a/crates/codegraph-vector/src/cache.rs +++ b/crates/codegraph-vector/src/cache.rs @@ -110,16 +110,14 @@ where V: Clone + Send + Sync + 'static, { pub fn new(config: CacheConfig) -> Self { - let cache = Self { + Self { data: Arc::new(DashMap::new()), frequency: Arc::new(DashMap::new()), access_order: Arc::new(Mutex::new(VecDeque::new())), config: config.clone(), stats: Arc::new(RwLock::new(CacheStats::new())), cleanup_handle: None, - }; - - cache + } } pub fn start_cleanup_task(&mut self) { diff --git a/crates/codegraph-vector/src/embedding.rs b/crates/codegraph-vector/src/embedding.rs index 7e99f13c..b0406c58 100644 --- a/crates/codegraph-vector/src/embedding.rs +++ b/crates/codegraph-vector/src/embedding.rs @@ -706,9 +706,9 @@ impl EmbeddingGenerator { let hash = simple_hash(&text); let mut rng_state = hash; - for i in 0..dimension { + for e in embedding.iter_mut() { rng_state = rng_state.wrapping_mul(1103515245).wrapping_add(12345); - embedding[i] = ((rng_state as f32 / u32::MAX as f32) - 0.5) * 2.0; + *e = ((rng_state as f32 / u32::MAX as f32) - 0.5) * 2.0; } let norm: f32 = embedding.iter().map(|x| x * x).sum::().sqrt(); diff --git a/crates/codegraph-vector/src/embeddings/generator.rs b/crates/codegraph-vector/src/embeddings/generator.rs index 9b739b48..8f3a9d03 100644 --- a/crates/codegraph-vector/src/embeddings/generator.rs +++ b/crates/codegraph-vector/src/embeddings/generator.rs @@ -570,9 +570,9 @@ impl AdvancedEmbeddingGenerator { h = h.wrapping_mul(33).wrapping_add(b as u32); } let mut state = h; - for i in 0..dim { + for o in out.iter_mut() { state = state.wrapping_mul(1103515245).wrapping_add(12345); - out[i] = ((state as f32 / u32::MAX as f32) - 0.5) * 2.0; + *o = ((state as f32 / u32::MAX as f32) - 0.5) * 2.0; } let norm: f32 = out.iter().map(|x| x * x).sum::().sqrt(); if norm > 0.0 { diff --git a/crates/codegraph-vector/src/insights_generator.rs b/crates/codegraph-vector/src/insights_generator.rs index 5c481c3d..7846194b 100644 --- a/crates/codegraph-vector/src/insights_generator.rs +++ b/crates/codegraph-vector/src/insights_generator.rs @@ -6,10 +6,11 @@ use std::sync::Arc; use tracing::{info, warn}; /// Mode for insights generation -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)] pub enum InsightsMode { /// Fast mode: Return context only, no LLM processing /// Best for agent-based workflows (Claude, GPT-4, etc.) + #[default] ContextOnly, /// Balanced mode: Use reranking + lightweight LLM @@ -21,12 +22,6 @@ pub enum InsightsMode { Deep, } -impl Default for InsightsMode { - fn default() -> Self { - Self::ContextOnly // Default to fastest mode - } -} - /// Configuration for insights generation #[derive(Debug, Clone, Serialize, Deserialize)] pub struct InsightsConfig { diff --git a/crates/codegraph-vector/src/lmstudio_embedding_provider.rs b/crates/codegraph-vector/src/lmstudio_embedding_provider.rs index b408f38d..8d742ff2 100644 --- a/crates/codegraph-vector/src/lmstudio_embedding_provider.rs +++ b/crates/codegraph-vector/src/lmstudio_embedding_provider.rs @@ -28,6 +28,19 @@ pub struct LmStudioEmbeddingConfig { pub max_tokens_per_request: usize, } +impl LmStudioEmbeddingConfig { + pub fn from_env() -> Self { + let mut config = Self::default(); + if let Ok(model) = std::env::var("CODEGRAPH_LMSTUDIO_MODEL") { + config.model = model; + } + if let Ok(url) = std::env::var("CODEGRAPH_LMSTUDIO_URL") { + config.api_base = url; + } + config + } +} + impl Default for LmStudioEmbeddingConfig { fn default() -> Self { Self { diff --git a/crates/codegraph-vector/src/ollama_embedding_provider.rs b/crates/codegraph-vector/src/ollama_embedding_provider.rs index 5115b940..4a30cc09 100644 --- a/crates/codegraph-vector/src/ollama_embedding_provider.rs +++ b/crates/codegraph-vector/src/ollama_embedding_provider.rs @@ -164,7 +164,7 @@ impl OllamaEmbeddingProvider { let response = timeout( Duration::from_secs(5), self.client - .get(&format!("{}/api/tags", self.config.base_url)) + .get(format!("{}/api/tags", self.config.base_url)) .send(), ) .await @@ -471,74 +471,9 @@ where } } - Ok(out - .into_iter() + out.into_iter() .map(|v| v.ok_or_else(|| CodeGraphError::Vector("Missing embedding result".to_string()))) - .collect::>>()?) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn detects_context_overflow_messages() { - assert!(OllamaEmbeddingProvider::is_context_overflow_message( - "Ollama embedding API error: {\"error\":\"the input length exceeds the context length\"}" - )); - assert!(OllamaEmbeddingProvider::is_context_overflow_message( - "maximum context length exceeded" - )); - } - - #[tokio::test] - async fn embed_resilient_with_splits_on_overflow_and_preserves_order() -> Result<()> { - let texts: Vec = (0..10).map(|i| "x".repeat((i + 1) * 10)).collect(); - - // Fail when total chars in the request exceed 120. - let embed_once = |slice: &[String]| { - let lengths: Vec = slice.iter().map(|s| s.len()).collect(); - async move { - let total: usize = lengths.iter().sum(); - if total > 120 { - return Err(CodeGraphError::External( - "the input length exceeds the context length".to_string(), - )); - } - Ok(lengths - .into_iter() - .map(|len| vec![len as f32]) - .collect::>()) - } - }; - - let embeddings = embed_resilient_with(&texts, embed_once).await?; - assert_eq!(embeddings.len(), texts.len()); - for (i, emb) in embeddings.iter().enumerate() { - assert_eq!(emb.len(), 1); - assert_eq!(emb[0], texts[i].len() as f32); - } - Ok(()) - } - - #[tokio::test] - async fn embed_resilient_with_does_not_split_on_non_overflow_error() { - let texts: Vec = (0..4).map(|_| "x".to_string()).collect(); - let calls = std::sync::Arc::new(std::sync::Mutex::new(0usize)); - let calls_clone = calls.clone(); - - let embed_once = move |_slice: &[String]| { - let calls_inner = calls_clone.clone(); - async move { - *calls_inner.lock().unwrap() += 1; - Err(CodeGraphError::External("some other error".to_string())) - } - }; - - let res = embed_resilient_with(&texts, embed_once).await; - assert!(res.is_err()); - assert_eq!(*calls.lock().unwrap(), 1); - } + .collect::>>() } #[async_trait] @@ -689,8 +624,10 @@ pub fn create_ollama_provider() -> OllamaEmbeddingProvider { /// Create Ollama embedding provider with custom model pub fn create_ollama_provider_with_model(model_name: String) -> OllamaEmbeddingProvider { - let mut config = OllamaEmbeddingConfig::default(); - config.model_name = model_name; + let config = OllamaEmbeddingConfig { + model_name, + ..Default::default() + }; OllamaEmbeddingProvider::new(config) } @@ -719,3 +656,67 @@ fn infer_dimension_for_model(model: &str) -> usize { 768 } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn detects_context_overflow_messages() { + assert!(OllamaEmbeddingProvider::is_context_overflow_message( + "Ollama embedding API error: {\"error\":\"the input length exceeds the context length\"}" + )); + assert!(OllamaEmbeddingProvider::is_context_overflow_message( + "maximum context length exceeded" + )); + } + + #[tokio::test] + async fn embed_resilient_with_splits_on_overflow_and_preserves_order() -> Result<()> { + let texts: Vec = (0..10).map(|i| "x".repeat((i + 1) * 10)).collect(); + + // Fail when total chars in the request exceed 120. + let embed_once = |slice: &[String]| { + let lengths: Vec = slice.iter().map(|s| s.len()).collect(); + async move { + let total: usize = lengths.iter().sum(); + if total > 120 { + return Err(CodeGraphError::External( + "the input length exceeds the context length".to_string(), + )); + } + Ok(lengths + .into_iter() + .map(|len| vec![len as f32]) + .collect::>()) + } + }; + + let embeddings = embed_resilient_with(&texts, embed_once).await?; + assert_eq!(embeddings.len(), texts.len()); + for (i, emb) in embeddings.iter().enumerate() { + assert_eq!(emb.len(), 1); + assert_eq!(emb[0], texts[i].len() as f32); + } + Ok(()) + } + + #[tokio::test] + async fn embed_resilient_with_does_not_split_on_non_overflow_error() { + let texts: Vec = (0..4).map(|_| "x".to_string()).collect(); + let calls = std::sync::Arc::new(std::sync::Mutex::new(0usize)); + let calls_clone = calls.clone(); + + let embed_once = move |_slice: &[String]| { + let calls_inner = calls_clone.clone(); + async move { + *calls_inner.lock().unwrap() += 1; + Err(CodeGraphError::External("some other error".to_string())) + } + }; + + let res = embed_resilient_with(&texts, embed_once).await; + assert!(res.is_err()); + assert_eq!(*calls.lock().unwrap(), 1); + } +} diff --git a/crates/codegraph-vector/src/prep/chunker.rs b/crates/codegraph-vector/src/prep/chunker.rs index 1963145b..d4d36088 100644 --- a/crates/codegraph-vector/src/prep/chunker.rs +++ b/crates/codegraph-vector/src/prep/chunker.rs @@ -186,9 +186,8 @@ pub fn build_chunk_plan_with_sources( raw_chunks.extend(chunker.chunk(&segment)); } let mut overlap_tail: Option = None; - let mut chunk_idx = 0; - for chunk_text in raw_chunks { + for (chunk_idx, chunk_text) in raw_chunks.into_iter().enumerate() { let mut text = chunk_text; if let Some(tail) = &overlap_tail { @@ -214,8 +213,6 @@ pub fn build_chunk_plan_with_sources( node_name: node.name.to_string(), }); - chunk_idx += 1; - // Capture tail for next chunk (approximate overlap using chars, UTF-8 safe) if config.overlap_tokens > 0 { let approx_chars = config.overlap_tokens * 4; diff --git a/crates/codegraph-vector/src/providers.rs b/crates/codegraph-vector/src/providers.rs index d9c90d55..f5cdf759 100644 --- a/crates/codegraph-vector/src/providers.rs +++ b/crates/codegraph-vector/src/providers.rs @@ -130,7 +130,7 @@ impl HybridEmbeddingPipeline { primary, fallbacks: Vec::new(), strategy, - health_checker: ProviderHealthChecker::new(), + health_checker: ProviderHealthChecker::default(), } } @@ -255,16 +255,13 @@ impl EmbeddingProvider for HybridEmbeddingPipeline { } /// Health checker to track provider reliability over time +#[derive(Default)] pub struct ProviderHealthChecker { // Implementation for tracking provider health metrics // This would maintain success/failure rates, response times, etc. } impl ProviderHealthChecker { - pub fn new() -> Self { - Self {} - } - pub async fn select_most_reliable<'a>( &self, primary: &'a dyn EmbeddingProvider, diff --git a/crates/codegraph-vector/src/rag/context_retriever.rs b/crates/codegraph-vector/src/rag/context_retriever.rs index 785a9f69..afe772a0 100644 --- a/crates/codegraph-vector/src/rag/context_retriever.rs +++ b/crates/codegraph-vector/src/rag/context_retriever.rs @@ -319,7 +319,7 @@ impl ContextRetriever { else if node .content .as_ref() - .map_or(false, |c| c.to_lowercase().contains(&keyword_lower)) + .is_some_and(|c| c.to_lowercase().contains(&keyword_lower)) { matches += 1; total_weight += 2.0; diff --git a/crates/codegraph-vector/src/rag/query_processor.rs b/crates/codegraph-vector/src/rag/query_processor.rs index b05eeafc..a6080691 100644 --- a/crates/codegraph-vector/src/rag/query_processor.rs +++ b/crates/codegraph-vector/src/rag/query_processor.rs @@ -307,9 +307,9 @@ impl QueryProcessor { let hash = simple_hash(&query); let mut rng_state = hash; - for i in 0..dimension { + for e in embedding.iter_mut() { rng_state = rng_state.wrapping_mul(1103515245).wrapping_add(12345); - embedding[i] = ((rng_state as f32 / u32::MAX as f32) - 0.5) * 2.0; + *e = ((rng_state as f32 / u32::MAX as f32) - 0.5) * 2.0; } // Normalize embedding diff --git a/crates/codegraph-vector/src/rag/response_generator.rs b/crates/codegraph-vector/src/rag/response_generator.rs index 1d5dfdd2..4cd1164c 100644 --- a/crates/codegraph-vector/src/rag/response_generator.rs +++ b/crates/codegraph-vector/src/rag/response_generator.rs @@ -124,7 +124,7 @@ impl ResponseGenerator { let validation_passed = if self.config.enable_answer_validation { let context_texts: Vec<&str> = selected_sources .iter() - .filter_map(|s| Some(s.retrieval_result.context_snippet.as_str())) + .map(|s| s.retrieval_result.context_snippet.as_str()) .collect(); self.validate_answer(&answer, query, &context_texts).await? } else { @@ -340,15 +340,12 @@ impl ResponseGenerator { if let Some(ref node) = source.retrieval_result.node { let part = if self.config.include_code_examples && node.content.is_some() { format!( - "{}. **{}** ({}): {}", + "{}. **{}** ({:?}): {}", i + 1, node.name.as_str(), - format!( - "{:?}", - node.node_type - .as_ref() - .unwrap_or(&codegraph_core::NodeType::Other("unknown".to_string())) - ), + node.node_type + .as_ref() + .unwrap_or(&codegraph_core::NodeType::Other("unknown".to_string())), source.retrieval_result.context_snippet ) } else { diff --git a/crates/codegraph-vector/src/rag/result_ranker.rs b/crates/codegraph-vector/src/rag/result_ranker.rs index 89c6dd70..d762d30c 100644 --- a/crates/codegraph-vector/src/rag/result_ranker.rs +++ b/crates/codegraph-vector/src/rag/result_ranker.rs @@ -129,7 +129,7 @@ impl ResultRanker { pub async fn rank_by_semantic_similarity( &mut self, - results: &mut Vec<(String, f32)>, + results: &mut [(String, f32)], query: &str, ) -> Result<()> { if results.is_empty() { @@ -404,9 +404,9 @@ impl ResultRanker { let hash = simple_hash(&query); let mut rng_state = hash; - for i in 0..dimension { + for e in embedding.iter_mut() { rng_state = rng_state.wrapping_mul(1103515245).wrapping_add(12345); - embedding[i] = ((rng_state as f32 / u32::MAX as f32) - 0.5) * 2.0; + *e = ((rng_state as f32 / u32::MAX as f32) - 0.5) * 2.0; } // Normalize embedding diff --git a/crates/codegraph-vector/src/reranking/ollama.rs b/crates/codegraph-vector/src/reranking/ollama.rs index 5c462417..94febdb6 100644 --- a/crates/codegraph-vector/src/reranking/ollama.rs +++ b/crates/codegraph-vector/src/reranking/ollama.rs @@ -200,7 +200,7 @@ Scores:"#, let request_result = tokio::time::timeout( self.timeout, self.client - .post(&format!("{}/api/chat", self.api_base)) + .post(format!("{}/api/chat", self.api_base)) .header("Content-Type", "application/json") .json(&request) .send(), diff --git a/crates/codegraph-vector/src/search.rs b/crates/codegraph-vector/src/search.rs index d0212698..c04e4aec 100644 --- a/crates/codegraph-vector/src/search.rs +++ b/crates/codegraph-vector/src/search.rs @@ -185,9 +185,9 @@ impl SemanticSearch { let hash = simple_hash(&query); let mut rng_state = hash; - for i in 0..dimension { + for e in embedding.iter_mut() { rng_state = rng_state.wrapping_mul(1103515245).wrapping_add(12345); - embedding[i] = ((rng_state as f32 / u32::MAX as f32) - 0.5) * 2.0; + *e = ((rng_state as f32 / u32::MAX as f32) - 0.5) * 2.0; } let norm: f32 = embedding.iter().map(|x| x * x).sum::().sqrt(); diff --git a/crates/codegraph-vector/src/simd_ops.rs b/crates/codegraph-vector/src/simd_ops.rs index 24916162..74522243 100644 --- a/crates/codegraph-vector/src/simd_ops.rs +++ b/crates/codegraph-vector/src/simd_ops.rs @@ -295,6 +295,7 @@ impl SIMDVectorOps { } #[cfg(not(target_arch = "x86_64"))] + #[allow(clippy::missing_safety_doc)] pub unsafe fn cosine_similarity_avx2(_a: &[f32], _b: &[f32]) -> Result { Err( crate::VectorError::SimdError("AVX2 not supported on this architecture".to_string()) @@ -303,6 +304,7 @@ impl SIMDVectorOps { } #[cfg(not(target_arch = "x86_64"))] + #[allow(clippy::missing_safety_doc)] pub unsafe fn batch_cosine_similarity_avx2( _query: &[f32], _embeddings: &[&[f32]], @@ -315,6 +317,7 @@ impl SIMDVectorOps { } #[cfg(not(target_arch = "x86_64"))] + #[allow(clippy::missing_safety_doc)] pub unsafe fn l2_distance_avx2(_a: &[f32], _b: &[f32]) -> Result { Err( crate::VectorError::SimdError("AVX2 not supported on this architecture".to_string()) @@ -323,6 +326,7 @@ impl SIMDVectorOps { } #[cfg(not(target_arch = "x86_64"))] + #[allow(clippy::missing_safety_doc)] pub unsafe fn dot_product_avx2(_a: &[f32], _b: &[f32]) -> Result { Err( crate::VectorError::SimdError("AVX2 not supported on this architecture".to_string()) @@ -331,6 +335,7 @@ impl SIMDVectorOps { } #[cfg(not(target_arch = "x86_64"))] + #[allow(clippy::missing_safety_doc)] pub unsafe fn normalize_avx2(_vector: &mut [f32]) -> Result<()> { Err( crate::VectorError::SimdError("AVX2 not supported on this architecture".to_string()) @@ -455,7 +460,7 @@ mod tests { println!("Adaptive similarity result: {}", result); // Should be valid similarity score - assert!(result >= -1.0 && result <= 1.0); + assert!((-1.0..=1.0).contains(&result)); } #[test] diff --git a/crates/codegraph-vector/tests/embedding_provider_tests.rs b/crates/codegraph-vector/tests/embedding_provider_tests.rs index f46c012e..3913fd31 100644 --- a/crates/codegraph-vector/tests/embedding_provider_tests.rs +++ b/crates/codegraph-vector/tests/embedding_provider_tests.rs @@ -69,9 +69,9 @@ impl EmbeddingProvider for MockEmbeddingProvider { let mut embedding = vec![0.0f32; self.dimension]; let mut rng_state = hash; - for i in 0..self.dimension { + for e in embedding.iter_mut() { rng_state = rng_state.wrapping_mul(1103515245).wrapping_add(12345); - embedding[i] = ((rng_state as f32 / u32::MAX as f32) - 0.5) * 2.0; + *e = ((rng_state as f32 / u32::MAX as f32) - 0.5) * 2.0; } // L2 normalize diff --git a/crates/codegraph-vector/tests/rag_tests.rs b/crates/codegraph-vector/tests/rag_tests.rs index 4ff63470..c775973c 100644 --- a/crates/codegraph-vector/tests/rag_tests.rs +++ b/crates/codegraph-vector/tests/rag_tests.rs @@ -21,7 +21,7 @@ mod rag_integration_tests { content: Some(content.to_string().into()), embedding: None, location: Location { - file_path: "test.rs".to_string().into(), + file_path: "test.rs".to_string(), line: 1, column: 1, end_line: None, diff --git a/crates/codegraph-zerocopy/src/buffer.rs b/crates/codegraph-zerocopy/src/buffer.rs index b61b0e52..2d901cc2 100644 --- a/crates/codegraph-zerocopy/src/buffer.rs +++ b/crates/codegraph-zerocopy/src/buffer.rs @@ -468,7 +468,7 @@ impl BufferManager { let mut stats = self.stats.write(); stats.total_requests += 1; - let buffer = if size <= 4096 { + if size <= 4096 { stats.small_requests += 1; self.small_pool.get() } else if size <= 65536 { @@ -480,9 +480,7 @@ impl BufferManager { } else { stats.oversized_requests += 1; BytesMut::with_capacity(size) - }; - - buffer + } } /// Return a buffer to the appropriate pool diff --git a/crates/codegraph-zerocopy/src/shared_memory.rs b/crates/codegraph-zerocopy/src/shared_memory.rs index 08e3c889..18a7421e 100644 --- a/crates/codegraph-zerocopy/src/shared_memory.rs +++ b/crates/codegraph-zerocopy/src/shared_memory.rs @@ -8,14 +8,12 @@ use memmap2::{MmapMut, MmapOptions}; use parking_lot::RwLock; use rkyv::api::high::HighValidator; use rkyv::{access, access_unchecked, Archive}; +use std::rc::Rc; use std::{ fs::{File, OpenOptions}, marker::PhantomData, path::Path, - sync::{ - atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering}, - Arc, - }, + sync::atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering}, }; use tracing::{debug, instrument, trace, warn}; @@ -360,7 +358,7 @@ impl<'a> Drop for SharedMemoryWriter<'a> { /// Manager for multiple shared memory segments pub struct SharedMemoryManager { - segments: RwLock>>, + segments: RwLock>>, base_path: std::path::PathBuf, } @@ -382,7 +380,7 @@ impl SharedMemoryManager { &self, name: &str, size: usize, - ) -> ZeroCopyResult> { + ) -> ZeroCopyResult> { let segment_path = self.base_path.join(format!("{}.shm", name)); // Check if segment already exists in manager @@ -400,7 +398,7 @@ impl SharedMemoryManager { SharedMemorySegment::create(&segment_path, size)? }; - let segment = Arc::new(segment); + let segment = Rc::new(segment); // Store in manager { @@ -477,13 +475,13 @@ impl SharedMemoryManager { /// Cross-process lock using shared memory pub struct SharedMemoryLock { - segment: Arc, + segment: Rc, lock_offset: usize, } impl SharedMemoryLock { /// Create a new shared memory lock - pub fn new(segment: Arc, lock_offset: usize) -> Self { + pub fn new(segment: Rc, lock_offset: usize) -> Self { Self { segment, lock_offset, @@ -618,7 +616,7 @@ mod tests { let temp_dir = TempDir::new().unwrap(); let segment_path = temp_dir.path().join("lock_test.shm"); - let segment = Arc::new(SharedMemorySegment::create(&segment_path, 1024).unwrap()); + let segment = Rc::new(SharedMemorySegment::create(&segment_path, 1024).unwrap()); let lock = SharedMemoryLock::new(segment, 0); // Test lock acquisition