diff --git a/rust/src/index/summary/strategy.rs b/rust/src/index/summary/strategy.rs index 33f58e49..fb4bb7b0 100644 --- a/rust/src/index/summary/strategy.rs +++ b/rust/src/index/summary/strategy.rs @@ -6,8 +6,8 @@ use async_trait::async_trait; use crate::document::{DocumentTree, NodeId}; -use crate::llm::{LlmClient, LlmResult}; use crate::llm::memo::{MemoKey, MemoStore, MemoValue}; +use crate::llm::{LlmClient, LlmResult}; use crate::utils::fingerprint::Fingerprint; /// Configuration for summary strategies. diff --git a/rust/src/llm/executor.rs b/rust/src/llm/executor.rs index ef66c134..77447ba4 100644 --- a/rust/src/llm/executor.rs +++ b/rust/src/llm/executor.rs @@ -61,8 +61,8 @@ use async_openai::types::chat::{ use super::config::LlmConfig; use super::error::{LlmError, LlmResult}; use super::fallback::{FallbackChain, FallbackStep}; -use crate::metrics::MetricsHub; use super::throttle::ConcurrencyController; +use crate::metrics::MetricsHub; /// Unified executor for LLM operations. /// diff --git a/rust/src/llm/memo/store.rs b/rust/src/llm/memo/store.rs index b75dfb92..2fdfcea4 100644 --- a/rust/src/llm/memo/store.rs +++ b/rust/src/llm/memo/store.rs @@ -426,7 +426,8 @@ impl MemoStore { } // Restore stats - self.stats.load_from(data.stats.hits, data.stats.misses, data.stats.tokens_saved); + self.stats + .load_from(data.stats.hits, data.stats.misses, data.stats.tokens_saved); info!( "Loaded memo store with {} entries from {:?}", diff --git a/rust/src/llm/pool.rs b/rust/src/llm/pool.rs index 76f04ada..10d9041b 100644 --- a/rust/src/llm/pool.rs +++ b/rust/src/llm/pool.rs @@ -8,8 +8,8 @@ use std::sync::Arc; use super::client::LlmClient; use super::config::LlmConfig; use super::fallback::{FallbackChain, FallbackConfig}; -use crate::metrics::MetricsHub; use super::throttle::ConcurrencyController; +use crate::metrics::MetricsHub; /// Pool of LLM clients for different purposes. /// diff --git a/rust/src/retrieval/complexity/detector.rs b/rust/src/retrieval/complexity/detector.rs index 74a14918..b93f2795 100644 --- a/rust/src/retrieval/complexity/detector.rs +++ b/rust/src/retrieval/complexity/detector.rs @@ -60,8 +60,7 @@ impl ComplexityDetector { } let result = if let Some(ref client) = self.llm_client { - if let Some(complexity) = - crate::retrieval::pilot::detect_with_llm(client, query).await + if let Some(complexity) = crate::retrieval::pilot::detect_with_llm(client, query).await { complexity } else { diff --git a/rust/src/retrieval/decompose.rs b/rust/src/retrieval/decompose.rs index c596b51e..ce711f87 100644 --- a/rust/src/retrieval/decompose.rs +++ b/rust/src/retrieval/decompose.rs @@ -47,8 +47,8 @@ use serde::{Deserialize, Serialize}; use tracing::{debug, info}; -use crate::llm::{LlmClient, LlmExecutor}; use crate::llm::memo::{MemoKey, MemoOpType, MemoStore, MemoValue}; +use crate::llm::{LlmClient, LlmExecutor}; use crate::utils::fingerprint::Fingerprint; /// Sub-query resulting from decomposition. @@ -269,30 +269,27 @@ impl QueryDecomposer { info!("Decomposing complex query: '{}'", query); // Try LLM-based decomposition if available - let result = if self.config.use_llm && (self.llm_client.is_some() || self.llm_executor.is_some()) { - match self.llm_decompose(query).await { - Ok(result) => result, - Err(e) => { - debug!( - "LLM decomposition failed, falling back to rule-based: {}", - e - ); - self.rule_based_decompose(query)? + let result = + if self.config.use_llm && (self.llm_client.is_some() || self.llm_executor.is_some()) { + match self.llm_decompose(query).await { + Ok(result) => result, + Err(e) => { + debug!( + "LLM decomposition failed, falling back to rule-based: {}", + e + ); + self.rule_based_decompose(query)? + } } - } - } else { - self.rule_based_decompose(query)? - }; + } else { + self.rule_based_decompose(query)? + }; // Cache the result if let Some(ref store) = self.memo_store { let cache_key = Self::build_cache_key(query); if let Ok(json) = serde_json::to_value(&CachedDecomposition::from_result(&result)) { - store.put_with_tokens( - cache_key, - MemoValue::Json(json), - (query.len() / 4) as u64, - ); + store.put_with_tokens(cache_key, MemoValue::Json(json), (query.len() / 4) as u64); } } diff --git a/rust/src/retrieval/pilot/llm_pilot.rs b/rust/src/retrieval/pilot/llm_pilot.rs index f8df6536..289726c3 100644 --- a/rust/src/retrieval/pilot/llm_pilot.rs +++ b/rust/src/retrieval/pilot/llm_pilot.rs @@ -11,8 +11,8 @@ use std::sync::Arc; use tracing::{debug, info, warn}; use crate::document::{DocumentTree, NodeId}; -use crate::llm::{LlmClient, LlmExecutor}; use crate::llm::memo::{MemoKey, MemoStore, MemoValue}; +use crate::llm::{LlmClient, LlmExecutor}; use crate::utils::fingerprint::Fingerprint; use super::budget::BudgetController; diff --git a/rust/src/retrieval/stages/analyze.rs b/rust/src/retrieval/stages/analyze.rs index 79e2a75e..0c8bcdab 100644 --- a/rust/src/retrieval/stages/analyze.rs +++ b/rust/src/retrieval/stages/analyze.rs @@ -164,7 +164,8 @@ impl AnalyzeStage { self.complexity_detector = detector; // Also enable query decomposition - let mut decomposer = QueryDecomposer::new(DecompositionConfig::default()).with_llm_client(client); + let mut decomposer = + QueryDecomposer::new(DecompositionConfig::default()).with_llm_client(client); if let Some(ref store) = self.memo_store { decomposer = decomposer.with_memo_store(store.clone()); } diff --git a/rust/src/retrieval/sufficiency/llm_judge.rs b/rust/src/retrieval/sufficiency/llm_judge.rs index 343153a5..0bb676f3 100644 --- a/rust/src/retrieval/sufficiency/llm_judge.rs +++ b/rust/src/retrieval/sufficiency/llm_judge.rs @@ -171,11 +171,7 @@ Be conservative - only mark as sufficient if you're confident the content answer if let Some(ref store) = self.memo_store { let cache_key = self.build_cache_key(query, content); let tokens = (prompt.len() / 4) as u64; - store.put_with_tokens( - cache_key, - MemoValue::Text(format!("{:?}", result)), - tokens, - ); + store.put_with_tokens(cache_key, MemoValue::Text(format!("{:?}", result)), tokens); } result