Skip to content
Merged

Dev #54

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions rust/src/retrieval/content/scorer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,12 @@ impl RelevanceScorer {
pub fn score_chunk(&self, chunk: &ContentChunk, ctx: &ScoringContext) -> ContentRelevance {
let mut components = ScoreComponents::default();

// 1. Keyword score
components.keyword_score = self.compute_keyword_score(&chunk.content);
// 1. Keyword score (content + title + summary combined)
components.keyword_score = self.compute_keyword_score(&format!(
"{} {}",
chunk.title,
chunk.content
));

// 2. BM25 score (if enabled)
if matches!(
Expand Down
139 changes: 43 additions & 96 deletions rust/src/retrieval/pilot/llm_pilot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,12 @@ pub struct LlmPilot {
executor: Option<Arc<LlmExecutor>>,
/// Pilot configuration.
config: PilotConfig,
/// Budget controller.
/// Budget controller for per-level call tracking.
budget: BudgetController,
/// Shared pipeline budget — the primary budget source when set.
/// When available, Pilot checks this before making LLM calls and
/// records token consumption here.
pipeline_budget: parking_lot::RwLock<Option<Arc<crate::retrieval::pipeline::RetrievalBudgetController>>>,
/// Context builder.
context_builder: ContextBuilder,
/// Prompt builder.
Expand Down Expand Up @@ -111,6 +115,7 @@ impl LlmPilot {
executor: None,
config,
budget,
pipeline_budget: parking_lot::RwLock::new(None),
context_builder: ContextBuilder::new(token_budget),
prompt_builder: PromptBuilder::new(),
response_parser: ResponseParser::new(),
Expand All @@ -131,6 +136,7 @@ impl LlmPilot {
executor: Some(Arc::new(executor)),
config,
budget,
pipeline_budget: parking_lot::RwLock::new(None),
context_builder: ContextBuilder::new(token_budget),
prompt_builder: PromptBuilder::new(),
response_parser: ResponseParser::new(),
Expand All @@ -150,6 +156,7 @@ impl LlmPilot {
executor: Some(executor),
config,
budget,
pipeline_budget: parking_lot::RwLock::new(None),
context_builder: ContextBuilder::new(token_budget),
prompt_builder: PromptBuilder::new(),
response_parser: ResponseParser::new(),
Expand All @@ -172,6 +179,7 @@ impl LlmPilot {
executor: None,
config,
budget,
pipeline_budget: parking_lot::RwLock::new(None),
context_builder,
prompt_builder,
response_parser: ResponseParser::new(),
Expand Down Expand Up @@ -208,6 +216,17 @@ impl LlmPilot {
self
}

/// Set the shared pipeline budget controller.
///
/// When set, this becomes the primary budget gate for LLM calls.
/// The Pilot's own BudgetController still tracks per-level call counts,
/// but token consumption is recorded against the pipeline budget.
/// Call this at query time (not construction time) since the pipeline
/// budget is created per-query.
pub fn set_pipeline_budget(&self, budget: Arc<crate::retrieval::pipeline::RetrievalBudgetController>) {
*self.pipeline_budget.write() = Some(budget);
}

/// Check if using LlmExecutor (unified throttle/retry/fallback).
pub fn has_executor(&self) -> bool {
self.executor.is_some()
Expand Down Expand Up @@ -259,7 +278,17 @@ impl LlmPilot {
}

/// Check if budget allows LLM calls.
///
/// Checks the shared pipeline budget first (if set), then falls back
/// to the Pilot's own per-call budget.
fn has_budget(&self) -> bool {
// Primary: check pipeline budget
if let Some(ref pb) = *self.pipeline_budget.read() {
if pb.status().should_stop() {
return false;
}
}
// Secondary: check Pilot's own call-level budget
self.budget.can_call()
}

Expand Down Expand Up @@ -335,58 +364,35 @@ impl LlmPilot {
}
}

println!(
"[DEBUG] LlmPilot::call_llm() - point={:?}, estimated_tokens={}",
point, prompt.estimated_tokens
);
println!(
"[DEBUG] LlmPilot::call_llm() - SYSTEM PROMPT:\n{}",
prompt.system
);
println!(
"[DEBUG] LlmPilot::call_llm() - USER PROMPT:\n{}",
prompt.user
);
println!(
"[DEBUG] LlmPilot::call_llm() - candidates count: {}",
candidates.len()
);
debug!(
"Calling LLM for {:?} point (estimated: {} tokens)",
point, prompt.estimated_tokens
);

// Make LLM call -use executor if available, otherwise use client directly
let result = if let Some(ref executor) = self.executor {
println!("[DEBUG] LlmPilot::call_llm() - using LlmExecutor");
// Use LlmExecutor for unified throttle/retry/fallback
executor.complete(&prompt.system, &prompt.user).await
} else {
println!("[DEBUG] LlmPilot::call_llm() - using direct client");
// Fallback to direct client call
self.client.complete(&prompt.system, &prompt.user).await
};

match result {
Ok(response) => {
println!(
"[DEBUG] LlmPilot::call_llm() - RAW LLM RESPONSE:\n{}",
response
);
// Record usage (estimate output tokens)
let output_tokens = self.estimate_tokens(&response);
let total_tokens = prompt.estimated_tokens + output_tokens;
self.budget
.record_usage(prompt.estimated_tokens, output_tokens, 0);

// Also record in pipeline budget if shared
if let Some(ref pb) = *self.pipeline_budget.read() {
pb.record_tokens(total_tokens);
}

// Parse response
let mut decision = self.response_parser.parse(&response, candidates, point);
println!(
"[DEBUG] LlmPilot::call_llm() - PARSED DECISION: confidence={:.2}, ranked={}, direction={:?}, reasoning={}",
decision.confidence,
decision.ranked_candidates.len(),
std::mem::discriminant(&decision.direction),
decision.reasoning.chars().take(100).collect::<String>()
);

// Apply learner adjustment if available
if let Some(ref adj) = adjustment {
Expand Down Expand Up @@ -525,13 +531,11 @@ impl Pilot for LlmPilot {
fn should_intervene(&self, state: &SearchState<'_>) -> bool {
// Check mode
if !self.config.mode.uses_llm() {
println!("[DEBUG] LlmPilot::should_intervene() - mode doesn't use LLM");
return false;
}

// Check budget
if !self.has_budget() {
println!("[DEBUG] LlmPilot::should_intervene() - budget exhausted");
debug!("Budget exhausted, skipping intervention");
return false;
}
Expand All @@ -540,11 +544,6 @@ impl Pilot for LlmPilot {

// Condition 1: Fork point with enough candidates
if state.candidates.len() > intervention.fork_threshold {
println!(
"[DEBUG] LlmPilot::should_intervene() - YES: fork point with {} candidates (threshold={})",
state.candidates.len(),
intervention.fork_threshold
);
debug!(
"Intervening: fork point with {} candidates",
state.candidates.len()
Expand All @@ -554,20 +553,12 @@ impl Pilot for LlmPilot {

// Condition 2: Scores are too close (algorithm uncertain)
if self.scores_are_close(state) {
println!(
"[DEBUG] LlmPilot::should_intervene() - YES: scores are close (best={:.2})",
state.best_score
);
debug!("Intervening: scores are close");
return true;
}

// Condition 3: Low confidence (best score too low)
if intervention.is_low_confidence(state.best_score) {
println!(
"[DEBUG] LlmPilot::should_intervene() - YES: low confidence (best_score={:.2}, threshold={:.2})",
state.best_score, intervention.low_score_threshold
);
debug!(
"Intervening: low confidence (best_score={:.2})",
state.best_score
Expand All @@ -577,26 +568,15 @@ impl Pilot for LlmPilot {

// Condition 4: Backtracking and guide_at_backtrack is enabled
if state.is_backtracking && self.config.guide_at_backtrack {
println!("[DEBUG] LlmPilot::should_intervene() - YES: backtracking");
debug!("Intervening: backtracking");
return true;
}

println!(
"[DEBUG] LlmPilot::should_intervene() - NO: candidates={}, best_score={:.2}",
state.candidates.len(),
state.best_score
);
false
}

async fn decide(&self, state: &SearchState<'_>) -> PilotDecision {
let point = self.get_intervention_point(state);
println!(
"[DEBUG] LlmPilot::decide() - intervention_point={:?}, candidates={}",
point,
state.candidates.len()
);

// Build context
let context = self.context_builder.build(state);
Expand All @@ -619,16 +599,7 @@ impl Pilot for LlmPilot {
.collect();

// Make LLM call
let decision = self.call_llm(point, &context, &candidate_info).await;

println!(
"[DEBUG] LlmPilot::decide() - result: confidence={:.2}, direction={:?}, ranked={}",
decision.confidence,
std::mem::discriminant(&decision.direction),
decision.ranked_candidates.len()
);

decision
self.call_llm(point, &context, &candidate_info).await
}

async fn guide_start(
Expand All @@ -637,20 +608,13 @@ impl Pilot for LlmPilot {
query: &str,
start_node: NodeId,
) -> Option<PilotDecision> {
println!(
"[DEBUG] LlmPilot::guide_start() called, query='{}', start_node={:?}",
query, start_node
);

// Check if guide_at_start is enabled
if !self.config.guide_at_start {
println!("[DEBUG] LlmPilot::guide_start() - guide_at_start=false, skipping");
return None;
}

// Check budget
if !self.has_budget() {
println!("[DEBUG] LlmPilot::guide_start() - budget exhausted, skipping");
debug!("Budget exhausted, cannot guide start");
return None;
}
Expand All @@ -664,10 +628,6 @@ impl Pilot for LlmPilot {
debug!("Start node has no children, no guidance needed");
return None;
}
println!(
"[DEBUG] LlmPilot::guide_start() - {} children candidates from start_node",
node_ids.len()
);

// Build CandidateInfo with titles
let candidates: Vec<super::parser::CandidateInfo> = node_ids
Expand All @@ -683,30 +643,12 @@ impl Pilot for LlmPilot {
.collect();

// Make LLM call
println!("[DEBUG] LlmPilot::guide_start() - calling LLM...");
let decision = self
.call_llm(InterventionPoint::Start, &context, &candidates)
.await;

println!(
"[DEBUG] LlmPilot::guide_start() - LLM returned: confidence={:.2}, ranked_candidates={}, reasoning='{}'",
decision.confidence,
decision.ranked_candidates.len(),
decision.reasoning.chars().take(100).collect::<String>()
);

// Debug: show top ranked candidates
for (i, rc) in decision.ranked_candidates.iter().enumerate().take(3) {
if let Some(node) = tree.get(rc.node_id) {
println!(
"[DEBUG] Ranked {}: node_id={:?}, score={:.3}, title='{}'",
i, rc.node_id, rc.score, node.title
);
}
}

info!(
"Pilot start guidance: confidence={}, candidates={}",
"Pilot start guidance: confidence={:.2}, candidates={}",
decision.confidence,
decision.ranked_candidates.len()
);
Expand Down Expand Up @@ -764,8 +706,13 @@ impl Pilot for LlmPilot {

fn reset(&self) {
self.budget.reset();
*self.pipeline_budget.write() = None;
debug!("LlmPilot reset for new query");
}

fn as_any(&self) -> &dyn std::any::Any {
self
}
}

#[cfg(test)]
Expand Down
8 changes: 8 additions & 0 deletions rust/src/retrieval/pilot/trait.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,14 @@ pub trait Pilot: Send + Sync {
/// Called at the start of each new search to reset
/// budget counters, caches, and other per-query state.
fn reset(&self);

/// Downcast support for shared budget injection.
///
/// Default implementation returns a dummy Any.
fn as_any(&self) -> &dyn std::any::Any {
// Default: no downcast support
&()
}
}

/// Extension trait for Pilot with utility methods.
Expand Down
9 changes: 7 additions & 2 deletions rust/src/retrieval/pipeline/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,8 @@ pub struct PipelineContext {
/// Optional Pilot for navigation guidance.
pub pilot: Option<Arc<dyn Pilot>>,
/// Adaptive token budget controller for the entire pipeline.
pub budget_controller: RetrievalBudgetController,
/// Shared via Arc so Pilot can read/check the same budget.
pub budget_controller: Arc<RetrievalBudgetController>,
/// Tiered reasoning cache (L1 exact, L2 path pattern, L3 strategy score).
pub reasoning_cache: Arc<ReasoningCache>,

Expand Down Expand Up @@ -259,6 +260,9 @@ pub struct PipelineContext {
/// Fingerprint of candidate node IDs from previous evaluate call.
/// Used to detect stagnant loops (same candidates → same evaluation).
pub prev_candidate_fingerprint: Option<u64>,
/// Per-node content cache to avoid duplicate computation.
/// Populated by `aggregate_content()`, read by `build_response()`.
pub node_content_cache: HashMap<NodeId, String>,

// ============ Final Result ============
/// Final retrieval response.
Expand All @@ -282,7 +286,7 @@ impl PipelineContext {
) -> Self {
// Build retrieval index for efficient operations
let retrieval_index = Some(tree.build_retrieval_index());
let budget_controller = RetrievalBudgetController::new(options.max_tokens);
let budget_controller = Arc::new(RetrievalBudgetController::new(options.max_tokens));

Self {
query: query.into(),
Expand Down Expand Up @@ -311,6 +315,7 @@ impl PipelineContext {
accumulated_content: String::new(),
token_count: 0,
prev_candidate_fingerprint: None,
node_content_cache: HashMap::new(),
result: None,
stage_results: HashMap::new(),
metrics: RetrievalMetrics::default(),
Expand Down
2 changes: 1 addition & 1 deletion rust/src/retrieval/pipeline/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ mod orchestrator;
mod outcome;
mod stage;

pub use budget::BudgetStatus;
pub use budget::{BudgetStatus, RetrievalBudgetController};
pub use context::{CandidateNode, PipelineContext, SearchAlgorithm, SearchConfig};
pub use orchestrator::RetrievalOrchestrator;
pub use outcome::StageOutcome;
Expand Down
Loading