diff --git a/.github/workflows/scip-import.yml b/.github/workflows/scip-import.yml new file mode 100644 index 0000000..ff47c90 --- /dev/null +++ b/.github/workflows/scip-import.yml @@ -0,0 +1,154 @@ +name: SCIP Import + +on: + workflow_call: + inputs: + scip-tool: + description: "SCIP indexer to run" + required: true + type: string # rust | typescript | python + scip-file-path: + description: "Path to the SCIP index file" + required: false + type: string + default: ".scip/index.scip" + daemon-socket: + description: "Unix socket path for the LIP daemon" + required: false + type: string + default: "/tmp/lip-ci.sock" + confidence: + description: "Confidence score for imported symbols (1-100)" + required: false + type: number + default: 100 + + workflow_dispatch: + inputs: + scip-tool: + description: "SCIP indexer to run" + required: true + type: choice + options: + - rust + - typescript + - python + scip-file-path: + description: "Path to the SCIP index file" + required: false + type: string + default: ".scip/index.scip" + daemon-socket: + description: "Unix socket path for the LIP daemon" + required: false + type: string + default: "/tmp/lip-ci.sock" + confidence: + description: "Confidence score for imported symbols (1-100)" + required: false + type: number + default: 100 + +env: + CARGO_TERM_COLOR: always + CARGO_INCREMENTAL: 0 + +jobs: + scip-import: + name: SCIP Import (${{ inputs.scip-tool }}) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + # ── Install lip CLI ────────────────────────────────────────────────────── + - uses: actions/cache@v4 + id: lip-cache + with: + path: ~/.cargo/bin/lip + key: lip-cli-${{ runner.os }}-${{ hashFiles('.github/workflows/scip-import.yml') }} + + - name: Install lip CLI + if: steps.lip-cache.outputs.cache-hit != 'true' + run: cargo install lip-cli --locked + + # ── Start LIP daemon ───────────────────────────────────────────────────── + - name: Start LIP daemon + run: | + lip daemon --socket ${{ inputs.daemon-socket }} & + DAEMON_PID=$! + echo "DAEMON_PID=$DAEMON_PID" >> "$GITHUB_ENV" + # Wait for socket to appear + for i in $(seq 1 30); do + [ -S "${{ inputs.daemon-socket }}" ] && break + sleep 0.2 + done + if [ ! -S "${{ inputs.daemon-socket }}" ]; then + echo "::error::Daemon socket did not appear within 6s" + exit 1 + fi + + # ── Install and run SCIP indexer ───────────────────────────────────────── + - name: Install Rust toolchain + if: inputs.scip-tool == 'rust' + uses: dtolnay/rust-toolchain@stable + with: + components: rust-analyzer + + - name: Run scip-rust indexer + if: inputs.scip-tool == 'rust' + run: | + mkdir -p "$(dirname '${{ inputs.scip-file-path }}')" + rust-analyzer scip . + # rust-analyzer writes to index.scip in cwd; move if needed + if [ "${{ inputs.scip-file-path }}" != "index.scip" ] && [ -f index.scip ]; then + mv index.scip "${{ inputs.scip-file-path }}" + fi + + - name: Setup Node.js + if: inputs.scip-tool == 'typescript' + uses: actions/setup-node@v4 + with: + node-version: "20" + + - name: Run scip-typescript indexer + if: inputs.scip-tool == 'typescript' + run: | + npm install -g @sourcegraph/scip-typescript + mkdir -p "$(dirname '${{ inputs.scip-file-path }}')" + scip-typescript index --output "${{ inputs.scip-file-path }}" + + - name: Setup Python + if: inputs.scip-tool == 'python' + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Run scip-python indexer + if: inputs.scip-tool == 'python' + run: | + pip install scip-python + mkdir -p "$(dirname '${{ inputs.scip-file-path }}')" + scip-python index --output "${{ inputs.scip-file-path }}" + + # ── Import into daemon ─────────────────────────────────────────────────── + - name: Import SCIP index into LIP daemon + run: | + lip import \ + --from-scip "${{ inputs.scip-file-path }}" \ + --push-to-daemon "${{ inputs.daemon-socket }}" \ + --confidence ${{ inputs.confidence }} + + # ── Verify import ──────────────────────────────────────────────────────── + - name: Verify index status + run: | + echo '[{"type":"query_index_status"}]' | \ + lip query --socket "${{ inputs.daemon-socket }}" batch + + # ── Cleanup ────────────────────────────────────────────────────────────── + - name: Stop daemon + if: always() + run: | + if [ -n "$DAEMON_PID" ]; then + kill "$DAEMON_PID" 2>/dev/null || true + wait "$DAEMON_PID" 2>/dev/null || true + fi diff --git a/CHANGELOG.md b/CHANGELOG.md index f7f10d9..1379e53 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,8 +6,41 @@ All notable changes to this project are documented here. ## [Unreleased] +--- + +## [2.2.0] — 2026-04-21 + ### Added +- **`NearestItem.embedding_model`** — every nearest-neighbour hit now carries the model name that produced its stored embedding. Field is optional / `skip_serializing_if = None`; older clients see no change. Populated by `nearest_by_vector`, `nearest_symbol_by_vector`, and `outliers`. Useful for debugging mixed-model indexes and confirming which model was used for a specific result. + +- **Function-level blast radius** (`QueryBlastRadiusBatch`) — semantic enrichment now uses per-symbol embeddings when available. If `EmbeddingBatch` has been called with `lip://` URIs (function-level chunks), `semantic_items[].symbol_uri` is populated and results are at function granularity. Falls back to file-level embeddings when no symbol embeddings exist, so the upgrade is transparent. + +- **`ReindexStale`** — atomic "reindex if stale" operation. Accepts `uris` and `max_age_seconds`; re-reads from disk only the URIs that are not indexed or whose last-indexed timestamp exceeds the threshold. Returns `ReindexStaleResult { reindexed, skipped }`. Pass `max_age_seconds = 0` to force unconditional reindex. Replaces the manual `QueryFileStatus` → `ReindexFiles` race. + +- **`BatchFileStatus`** — query index status for multiple files in one round-trip. Equivalent to issuing `QueryFileStatus` inside a `Batch`, but without message-per-file overhead. Batchable. Returns `BatchFileStatusResult { entries: Vec }`. + +- **`QueryAbiHash`** — stable hex hash (SHA-256) over a file's exported API surface (exported symbol URIs + kinds + signatures, sorted). A change in hash means the public interface changed — safe as a downstream recompilation or re-verification trigger (Kotlin IC model). Returns `AbiHashResult { uri, hash: Option }`. Batchable. + +- **Tier 1.5 Datalog inference** — `LipDatabase::run_tier1_5_inference()` runs a fixed-point inference loop applying two rules: (1) if every direct caller of a symbol is at confidence ≥ 80 (Tier 2 / SCIP quality), raise the callee to confidence 65; (2) exported symbols with no local callers are raised by 5 points (capped at 65). Never lowers confidence; never exceeds the Tier 1.5 ceiling, leaving headroom for Tier 2. + +- **Tier 2 backoff recovery** — language server backends now recover from transient crashes with exponential backoff (2–300 s, up to 8 failures) instead of being permanently disabled for the session lifetime. `disabled_*` flags are kept for hard failures (binary not installed). A `BackoffState` struct tracks `failure_count` and `available_after` per backend. Tests: `backoff_fresh_is_available`, `backoff_fail_makes_unavailable`, `backoff_reset_clears_state`, `backoff_permanent_after_8_failures`, `backoff_not_permanent_before_8_failures`. + +- **`FileStatusEntry`** — new public struct carrying the same fields as `FileStatusResult` but suitable for use inside `BatchFileStatusResult`. + +- **`QueryBlastRadiusBatch`** — batch blast radius for all exported symbols in changed files, with optional semantic enrichment via file embeddings. Accepts `changed_file_uris` and optional `min_score` threshold. Resolves symbols server-side (filtered to Function, Method, Class, Interface, Constructor, Macro), runs structural BFS per symbol, and when `min_score` is set, augments results with cosine-similarity neighbours from the file embedding index. Each semantic hit carries a `source` field (`"semantic"` or `"both"`) so consumers can distinguish certainty tiers. Spec §8.1.1. +- **`QueryInvalidatedFiles`** — name-based dependency tracking query. Given a set of changed symbol URIs, returns file URIs that consumed those names externally (Kotlin-IC inspired). Enables symbol-level re-verification without full reindex. +- **`JournalEntry::UpsertFilePrecomputed`** — journal variant that persists pre-computed symbols, occurrences, and CPG edges from SCIP imports. Fixes data loss on daemon restart for SCIP-imported files. + +### Fixed + +- **SCIP proto field numbers** — `SymbolInformation.relationships` (2→4), `kind` (4→5), `display_name` (5→6) aligned with upstream SCIP. Fixes protobuf decode crash (`LengthDelimited where Varint expected`) when importing any index produced by a spec-compliant SCIP emitter. +- **SCIP proto `Relationship.is_override`** → `is_definition` to match upstream field 5 semantics. +- **SCIP import pre-computed symbol persistence** — Delta handler now routes pre-computed documents through `upsert_file_precomputed`, populating sym_cache, occ_cache, def_index, name_to_symbols, and call-edge indexes. Previously, SCIP-imported symbols were silently dropped. +- **Journal replay for SCIP imports** — pre-computed symbols now survive daemon restart via `UpsertFilePrecomputed` journal entry. +- **Merkle stale_files** — uses stored `content_hash` instead of hashing empty text for pre-computed files. Fixes infinite re-sync loop. +- **file_source_text** — falls back to disk read for precomputed `file://` URIs. Fixes stream_context, embeddings, and explain-match for SCIP-imported files. + - **`EndStreamReason::CursorOutOfRange`** and **`EndStreamReason::FileNotIndexed`** — split the previously-conflated `Error + "cursor_out_of_range"` emission into two typed reasons. Before, a cursor past EOF and a URI the daemon had never indexed both surfaced as `reason: error, error: "cursor_out_of_range"`; clients could not distinguish "user gave bad coordinates" from "daemon has nothing for this path." Now: - `CursorOutOfRange` — the file is indexed but the cursor line is outside its range. Error message reports the actual line count. - `FileNotIndexed` — the daemon has no record of the URI. Error message names the URI. Callers should upsert or reindex, then retry. diff --git a/Cargo.toml b/Cargo.toml index 92c6f53..1e5eeb9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ members = [ ] [workspace.package] -version = "2.1.1" +version = "2.2.0" edition = "2021" rust-version = "1.78" authors = ["Lisa Welsch "] diff --git a/bindings/rust/src/daemon/journal.rs b/bindings/rust/src/daemon/journal.rs index 3de1c29..f3c0e1c 100644 --- a/bindings/rust/src/daemon/journal.rs +++ b/bindings/rust/src/daemon/journal.rs @@ -33,7 +33,7 @@ use serde::{Deserialize, Serialize}; use tracing::warn; use crate::query_graph::LipDatabase; -use crate::schema::OwnedAnnotationEntry; +use crate::schema::{OwnedAnnotationEntry, OwnedGraphEdge, OwnedOccurrence, OwnedSymbolInfo}; /// Compact the journal when it has accumulated this many entries. /// Below this threshold the overhead of compaction isn't worth it. @@ -49,6 +49,14 @@ pub enum JournalEntry { text: String, language: String, }, + UpsertFilePrecomputed { + uri: String, + language: String, + content_hash: String, + symbols: Vec, + occurrences: Vec, + edges: Vec, + }, RemoveFile { uri: String, }, @@ -168,9 +176,25 @@ pub fn compact(path: &Path, db: &LipDatabase) -> anyhow::Result { })?; } - // One UpsertFile per tracked file. + // One UpsertFile (or UpsertFilePrecomputed) per tracked file. for uri in db.tracked_uris() { - if let (Some(text), Some(lang)) = (db.file_text(&uri), db.file_language(&uri)) { + let Some(lang) = db.file_language(&uri) else { + continue; + }; + if db.is_precomputed(&uri) { + let content_hash = db.file_content_hash(&uri).unwrap_or_default().to_owned(); + let symbols = db.cached_symbols(&uri).as_ref().clone(); + let occurrences = db.cached_occurrences(&uri).as_ref().clone(); + let edges = db.file_call_edges_raw(&uri); + write_entry(&JournalEntry::UpsertFilePrecomputed { + uri, + language: lang.to_owned(), + content_hash, + symbols, + occurrences, + edges, + })?; + } else if let Some(text) = db.file_text(&uri) { write_entry(&JournalEntry::UpsertFile { uri, text: text.to_owned(), @@ -208,6 +232,23 @@ pub fn replay(entries: &[JournalEntry], db: &mut LipDatabase) { } => { db.upsert_file(uri.clone(), text.clone(), language.clone()); } + JournalEntry::UpsertFilePrecomputed { + uri, + language, + content_hash, + symbols, + occurrences, + edges, + } => { + db.upsert_file_precomputed( + uri.clone(), + language.clone(), + content_hash.clone(), + symbols.clone(), + occurrences.clone(), + edges.clone(), + ); + } JournalEntry::RemoveFile { uri } => { db.remove_file(uri); } @@ -355,6 +396,85 @@ mod tests { assert_eq!(db2.current_merkle_root(), Some("abc")); } + #[test] + fn precomputed_survives_compact_replay() { + use crate::schema::{OwnedOccurrence, OwnedRange, OwnedSymbolInfo, Role, SymbolKind}; + + let tmp = NamedTempFile::new().unwrap(); + let path = tmp.path().to_owned(); + + let sym = OwnedSymbolInfo { + uri: "lip://local/lib.rs#Foo".into(), + display_name: "Foo".into(), + kind: SymbolKind::Function, + documentation: None, + signature: None, + confidence_score: 90, + relationships: vec![], + runtime_p99_ms: None, + call_rate_per_s: None, + taint_labels: vec![], + blast_radius: 0, + is_exported: false, + }; + let occ = OwnedOccurrence { + symbol_uri: "lip://local/lib.rs#Foo".into(), + range: OwnedRange { + start_line: 0, + start_char: 0, + end_line: 0, + end_char: 3, + }, + confidence_score: 90, + role: Role::Definition, + override_doc: None, + }; + + // Write a precomputed entry. + let (mut j, _) = Journal::open(&path).unwrap(); + j.append(&JournalEntry::UpsertFilePrecomputed { + uri: "file:///project/lib.rs".into(), + language: "rust".into(), + content_hash: "abc123".into(), + symbols: vec![sym], + occurrences: vec![occ], + edges: vec![], + }) + .unwrap(); + drop(j); + + // Replay into db1. + let (_, entries) = Journal::open(&path).unwrap(); + let mut db1 = LipDatabase::new(); + replay(&entries, &mut db1); + assert_eq!(db1.file_count(), 1); + assert!(db1.is_precomputed("file:///project/lib.rs")); + let syms = db1.file_symbols("file:///project/lib.rs"); + assert_eq!(syms.len(), 1, "precomputed symbol must survive replay"); + + // Compact and replay into db2. + compact(&path, &db1).unwrap(); + let (_, compacted) = Journal::open(&path).unwrap(); + let mut db2 = LipDatabase::new(); + replay(&compacted, &mut db2); + assert_eq!(db2.file_count(), 1); + assert!(db2.is_precomputed("file:///project/lib.rs")); + let syms2 = db2.file_symbols("file:///project/lib.rs"); + assert_eq!( + syms2.len(), + 1, + "precomputed symbol must survive compact + replay" + ); + assert_eq!(syms2[0].display_name, "Foo"); + + let results = db2.workspace_symbols("Foo", 10); + assert_eq!( + results.len(), + 1, + "precomputed symbol must be searchable after compact + replay" + ); + } + #[test] fn open_append_creates_file_if_absent() { let dir = tempfile::tempdir().unwrap(); diff --git a/bindings/rust/src/daemon/session.rs b/bindings/rust/src/daemon/session.rs index 59748d2..eba70e1 100644 --- a/bindings/rust/src/daemon/session.rs +++ b/bindings/rust/src/daemon/session.rs @@ -227,9 +227,34 @@ impl Session { let lang = document.language.clone(); let source_opt = document.source_text.clone(); + let has_precomputed = document.source_text.is_none() + && (!document.symbols.is_empty() || !document.occurrences.is_empty()); + let content_hash = document.content_hash.clone(); + let symbols = document.symbols.clone(); + let occurrences = document.occurrences.clone(); + let edges = document.edges.clone(); + let workspace_root = { let mut db = self.db.lock().await; match action { + Action::Upsert if has_precomputed => { + self.journal_write(JournalEntry::UpsertFilePrecomputed { + uri: uri.clone(), + language: lang.clone(), + content_hash: content_hash.clone(), + symbols: symbols.clone(), + occurrences: occurrences.clone(), + edges: edges.clone(), + }); + db.upsert_file_precomputed( + uri.clone(), + lang.clone(), + content_hash, + symbols, + occurrences, + edges, + ); + } Action::Upsert => { let text = source_opt.clone().unwrap_or_default(); self.journal_write(JournalEntry::UpsertFile { @@ -265,6 +290,11 @@ impl Session { } // Enqueue Tier 2 verification for supported languages on upsert. + // Skipped for pre-computed imports (SCIP): source_opt is None so + // the (Some(tx), Some(source)) guard below won't fire. This is + // intentional — SCIP emitters are authoritative; re-verifying via + // a local language server would be redundant and may not have the + // right project context. if matches!(action, Action::Upsert) { let needs_tier2 = lang == "rust" || uri.ends_with(".rs") @@ -369,6 +399,15 @@ impl Session { ServerMessage::BlastRadiusResult(result) } + ClientMessage::QueryBlastRadiusBatch { + changed_file_uris, + min_score, + } => { + let mut db = self.db.lock().await; + let results = db.blast_radius_batch(&changed_file_uris, min_score); + ServerMessage::BlastRadiusBatchResult { results } + } + ClientMessage::QueryWorkspaceSymbols { query, limit } => { let limit = limit.unwrap_or(100); let mut db = self.db.lock().await; @@ -388,6 +427,14 @@ impl Session { ServerMessage::DeadSymbolsResult { symbols } } + ClientMessage::QueryInvalidatedFiles { + changed_symbol_uris, + } => { + let db = self.db.lock().await; + let file_uris = db.invalidated_files_for(&changed_symbol_uris); + ServerMessage::InvalidatedFilesResult { file_uris } + } + // ── Annotations ─────────────────────────────────────────────── ClientMessage::AnnotationSet { symbol_uri, @@ -579,7 +626,7 @@ impl Session { }; let vectors: Vec>> = cached_hits .into_iter() - .zip(texts_needed.into_iter()) + .zip(texts_needed) .map(|(cached, needed)| { if let Some(v) = cached { Some(v) @@ -1114,6 +1161,7 @@ impl Session { Some(crate::query_graph::types::NearestItem { uri: c.clone(), score, + embedding_model: None, }) }) .collect(); @@ -1319,6 +1367,7 @@ impl Session { Some(crate::query_graph::types::NearestItem { uri: store_uri.clone(), score, + embedding_model: None, }) }) .collect(); @@ -1490,7 +1539,7 @@ impl Session { let q_norm: f32 = query_vec.iter().map(|x| x * x).sum::().sqrt(); let mut scored: Vec = raw_chunks .into_iter() - .zip(chunk_vecs.into_iter()) + .zip(chunk_vecs) .filter_map(|((start_line, end_line, chunk_text), vec)| { if vec.len() != query_vec.len() || q_norm == 0.0 { return None; @@ -1561,6 +1610,73 @@ impl Session { error: None, } } + + // ── v2.2 features ───────────────────────────────────────────── + ClientMessage::ReindexStale { + uris, + max_age_seconds, + } => { + let mut reindexed = Vec::new(); + let mut skipped = Vec::new(); + for uri in &uris { + let is_stale = { + let db = self.db.lock().await; + let (indexed, _, age_seconds) = db.file_status(uri); + !indexed || age_seconds.map(|age| age > max_age_seconds).unwrap_or(true) + }; + if is_stale { + let Some(path) = uri_to_path(uri) else { + skipped.push(uri.clone()); + continue; + }; + let Ok(text) = std::fs::read_to_string(&path) else { + warn!("ReindexStale: could not read {}", path.display()); + skipped.push(uri.clone()); + continue; + }; + let lang = { + use crate::indexer::language::Language; + Language::detect(uri, "").as_str().to_owned() + }; + let mut db = self.db.lock().await; + db.upsert_file(uri.clone(), text, lang); + reindexed.push(uri.clone()); + } else { + skipped.push(uri.clone()); + } + } + debug!( + "ReindexStale: reindexed {}/{} files", + reindexed.len(), + uris.len() + ); + ServerMessage::ReindexStaleResult { reindexed, skipped } + } + + ClientMessage::BatchFileStatus { uris } => { + let db = self.db.lock().await; + let entries = uris + .into_iter() + .map(|uri| { + let (indexed, has_embedding, age_seconds) = db.file_status(&uri); + let embedding_model = db.file_embedding_model(&uri).map(str::to_owned); + crate::query_graph::types::FileStatusEntry { + uri, + indexed, + has_embedding, + age_seconds, + embedding_model, + } + }) + .collect(); + ServerMessage::BatchFileStatusResult { entries } + } + + ClientMessage::QueryAbiHash { uri } => { + let mut db = self.db.lock().await; + let hash = db.abi_hash(&uri); + ServerMessage::AbiHashResult { uri, hash } + } } } @@ -1848,6 +1964,14 @@ fn process_query_sync( ok(ServerMessage::BlastRadiusResult(result)) } + ClientMessage::QueryBlastRadiusBatch { + changed_file_uris, + min_score, + } => { + let results = db.blast_radius_batch(&changed_file_uris, min_score); + ok(ServerMessage::BlastRadiusBatchResult { results }) + } + ClientMessage::QueryWorkspaceSymbols { query, limit } => { let limit = limit.unwrap_or(100); let syms = db.workspace_symbols(&query, limit); @@ -1864,6 +1988,13 @@ fn process_query_sync( ok(ServerMessage::DeadSymbolsResult { symbols }) } + ClientMessage::QueryInvalidatedFiles { + changed_symbol_uris, + } => { + let file_uris = db.invalidated_files_for(&changed_symbol_uris); + ok(ServerMessage::InvalidatedFilesResult { file_uris }) + } + // ── Annotations ─────────────────────────────────────────────────── ClientMessage::AnnotationSet { symbol_uri, @@ -2185,6 +2316,7 @@ fn process_query_sync( Some(crate::query_graph::types::NearestItem { uri: c.clone(), score, + embedding_model: None, }) }) .collect(); @@ -2280,6 +2412,7 @@ fn process_query_sync( Some(crate::query_graph::types::NearestItem { uri: su.clone(), score, + embedding_model: None, }) }) .collect(); @@ -2327,6 +2460,34 @@ fn process_query_sync( ClientMessage::RegisterTier3Source { .. } => { err("RegisterTier3Source is a mutation; not permitted in BatchQuery") } + + // ── v2.2: new variants ─────────────────────────────────────────────── + ClientMessage::ReindexStale { .. } => { + err("ReindexStale requires filesystem I/O; not permitted in BatchQuery") + } + + ClientMessage::BatchFileStatus { uris } => { + let entries = uris + .into_iter() + .map(|uri| { + let (indexed, has_embedding, age_seconds) = db.file_status(&uri); + let embedding_model = db.file_embedding_model(&uri).map(str::to_owned); + crate::query_graph::types::FileStatusEntry { + uri, + indexed, + has_embedding, + age_seconds, + embedding_model, + } + }) + .collect(); + ok(ServerMessage::BatchFileStatusResult { entries }) + } + + ClientMessage::QueryAbiHash { uri } => { + let hash = db.abi_hash(&uri); + ok(ServerMessage::AbiHashResult { uri, hash }) + } } } diff --git a/bindings/rust/src/daemon/tier2_manager.rs b/bindings/rust/src/daemon/tier2_manager.rs index b021476..e2e80e7 100644 --- a/bindings/rust/src/daemon/tier2_manager.rs +++ b/bindings/rust/src/daemon/tier2_manager.rs @@ -11,6 +11,7 @@ use std::path::PathBuf; use std::sync::Arc; +use std::time::{Duration, Instant}; use tokio::sync::{broadcast, mpsc, Mutex}; use tracing::{debug, error, info, warn}; @@ -44,35 +45,79 @@ pub struct VerificationJob { // ─── Per-language backend state ─────────────────────────────────────────────── +/// Exponential backoff state for a single backend. +/// +/// On each failure `fail()` is called — it schedules the backend to be +/// unavailable for `2^failure_count` seconds, capped at 5 minutes. On +/// success `reset()` is called — failure count drops to zero. +#[derive(Default)] +struct BackoffState { + failure_count: u8, + available_after: Option, +} + +impl BackoffState { + fn is_available(&self) -> bool { + self.available_after + .map(|t| Instant::now() >= t) + .unwrap_or(true) + } + + fn fail(&mut self) { + self.failure_count = self.failure_count.saturating_add(1); + let secs = (1u64 << self.failure_count.min(8)).min(300); // 2s … 300s + self.available_after = Some(Instant::now() + Duration::from_secs(secs)); + } + + fn reset(&mut self) { + self.failure_count = 0; + self.available_after = None; + } + + fn is_permanent_failure(&self) -> bool { + // Treat as permanent only after 8+ consecutive failures (~5-min blackout). + self.failure_count >= 8 + } +} + /// Holds an optional instance of each language server backend. /// -/// `None` means either "not yet started" OR "permanently disabled" (spawn -/// failed). The `disabled_*` sentinels distinguish the two states so we don't -/// retry a binary that is not installed. +/// `None` means "not yet started". The `backoff_*` fields track consecutive +/// failures; a backend is retried after an exponential delay rather than +/// being permanently disabled, so a transient spawn failure or crash +/// recovers automatically. struct Tier2Backends { rust: Option, - rust_ws: Option, // workspace last used to init rust backend - rust_disabled: bool, + rust_ws: Option, + rust_backoff: BackoffState, + rust_disabled: bool, // binary not installed (spawn returned ENOENT / similar) typescript: Option, + typescript_backoff: BackoffState, typescript_disabled: bool, python: Option, + python_backoff: BackoffState, python_disabled: bool, dart: Option, + dart_backoff: BackoffState, dart_disabled: bool, clangd: Option, + clangd_backoff: BackoffState, clangd_disabled: bool, gopls: Option, + gopls_backoff: BackoffState, gopls_disabled: bool, kotlin: Option, + kotlin_backoff: BackoffState, kotlin_disabled: bool, swift: Option, + swift_backoff: BackoffState, swift_disabled: bool, } @@ -81,20 +126,28 @@ impl Tier2Backends { Self { rust: None, rust_ws: None, + rust_backoff: BackoffState::default(), rust_disabled: false, typescript: None, + typescript_backoff: BackoffState::default(), typescript_disabled: false, python: None, + python_backoff: BackoffState::default(), python_disabled: false, dart: None, + dart_backoff: BackoffState::default(), dart_disabled: false, clangd: None, + clangd_backoff: BackoffState::default(), clangd_disabled: false, gopls: None, + gopls_backoff: BackoffState::default(), gopls_disabled: false, kotlin: None, + kotlin_backoff: BackoffState::default(), kotlin_disabled: false, swift: None, + swift_backoff: BackoffState::default(), swift_disabled: false, } } @@ -174,6 +227,10 @@ impl Tier2Manager { if self.backends.rust_disabled { return; } + if !self.backends.rust_backoff.is_available() { + debug!("tier2: rust-analyzer in backoff, skipping {}", job.uri); + return; + } // If the workspace changed, tear down the old backend. if let Some(root) = &job.workspace_root { @@ -200,11 +257,17 @@ impl Tier2Manager { Ok(b) => { info!("tier2: rust-analyzer backend ready"); self.backends.rust = Some(b); + self.backends.rust_backoff.reset(); } Err(e) => { - warn!("tier2: rust-analyzer unavailable, disabling: {e}"); - self.backends.rust_disabled = true; - self.backends.rust_ws = None; + self.backends.rust_backoff.fail(); + if self.backends.rust_backoff.is_permanent_failure() { + warn!("tier2: rust-analyzer unavailable after repeated failures, disabling: {e}"); + self.backends.rust_disabled = true; + self.backends.rust_ws = None; + } else { + warn!("tier2: rust-analyzer spawn failed (will retry with backoff): {e}"); + } return; } } @@ -220,12 +283,13 @@ impl Tier2Manager { let mut db = self.db.lock().await; self.broadcast_upgrades(&result.uri, &result.symbols, &mut db); db.upgrade_file_symbols(&result.uri, &result.symbols); + self.backends.rust_backoff.reset(); debug!("tier2: upgraded {upgraded} symbols for {}", job.uri); } Err(e) => { error!("tier2: rust verification failed for {}: {e}", job.uri); - // Assume backend crashed; reset so we reinitialise on next job. self.backends.rust = None; + self.backends.rust_backoff.fail(); } } } @@ -233,7 +297,10 @@ impl Tier2Manager { // ── TypeScript ──────────────────────────────────────────────────────────── async fn ensure_ts_backend(&mut self) { - if self.backends.typescript.is_some() || self.backends.typescript_disabled { + if self.backends.typescript.is_some() + || self.backends.typescript_disabled + || !self.backends.typescript_backoff.is_available() + { return; } @@ -241,21 +308,27 @@ impl Tier2Manager { Ok(b) => { info!("tier2: typescript-language-server backend ready"); self.backends.typescript = Some(b); + self.backends.typescript_backoff.reset(); } Err(e) => { - warn!("tier2: typescript-language-server unavailable, disabling: {e}"); - self.backends.typescript_disabled = true; + self.backends.typescript_backoff.fail(); + if self.backends.typescript_backoff.is_permanent_failure() { + warn!("tier2: typescript-language-server unavailable, disabling: {e}"); + self.backends.typescript_disabled = true; + } else { + warn!("tier2: typescript-language-server spawn failed (will retry with backoff): {e}"); + } } } } async fn handle_typescript(&mut self, job: VerificationJob) { - if self.backends.typescript_disabled { + if self.backends.typescript_disabled || !self.backends.typescript_backoff.is_available() { return; } self.ensure_ts_backend().await; - if self.backends.typescript_disabled { + if self.backends.typescript.is_none() { return; } @@ -269,11 +342,13 @@ impl Tier2Manager { let mut db = self.db.lock().await; self.broadcast_upgrades(&result.uri, &result.symbols, &mut db); db.upgrade_file_symbols(&result.uri, &result.symbols); + self.backends.typescript_backoff.reset(); debug!("tier2: upgraded {upgraded} symbols for {}", job.uri); } Err(e) => { error!("tier2: typescript verification failed for {}: {e}", job.uri); self.backends.typescript = None; + self.backends.typescript_backoff.fail(); } } } @@ -281,7 +356,10 @@ impl Tier2Manager { // ── Python ──────────────────────────────────────────────────────────────── async fn ensure_python_backend(&mut self) { - if self.backends.python.is_some() || self.backends.python_disabled { + if self.backends.python.is_some() + || self.backends.python_disabled + || !self.backends.python_backoff.is_available() + { return; } @@ -289,21 +367,29 @@ impl Tier2Manager { Ok(b) => { info!("tier2: python language server backend ready"); self.backends.python = Some(b); + self.backends.python_backoff.reset(); } Err(e) => { - warn!("tier2: python language server unavailable, disabling: {e}"); - self.backends.python_disabled = true; + self.backends.python_backoff.fail(); + if self.backends.python_backoff.is_permanent_failure() { + warn!("tier2: python language server unavailable, disabling: {e}"); + self.backends.python_disabled = true; + } else { + warn!( + "tier2: python language server spawn failed (will retry with backoff): {e}" + ); + } } } } async fn handle_python(&mut self, job: VerificationJob) { - if self.backends.python_disabled { + if self.backends.python_disabled || !self.backends.python_backoff.is_available() { return; } self.ensure_python_backend().await; - if self.backends.python_disabled { + if self.backends.python.is_none() { return; } @@ -317,11 +403,13 @@ impl Tier2Manager { let mut db = self.db.lock().await; self.broadcast_upgrades(&result.uri, &result.symbols, &mut db); db.upgrade_file_symbols(&result.uri, &result.symbols); + self.backends.python_backoff.reset(); debug!("tier2: upgraded {upgraded} symbols for {}", job.uri); } Err(e) => { error!("tier2: python verification failed for {}: {e}", job.uri); self.backends.python = None; + self.backends.python_backoff.fail(); } } } @@ -329,7 +417,10 @@ impl Tier2Manager { // ── Dart ────────────────────────────────────────────────────────────────── async fn ensure_dart_backend(&mut self) { - if self.backends.dart.is_some() || self.backends.dart_disabled { + if self.backends.dart.is_some() + || self.backends.dart_disabled + || !self.backends.dart_backoff.is_available() + { return; } @@ -337,21 +428,29 @@ impl Tier2Manager { Ok(b) => { info!("tier2: dart language-server backend ready"); self.backends.dart = Some(b); + self.backends.dart_backoff.reset(); } Err(e) => { - warn!("tier2: dart language-server unavailable, disabling: {e}"); - self.backends.dart_disabled = true; + self.backends.dart_backoff.fail(); + if self.backends.dart_backoff.is_permanent_failure() { + warn!("tier2: dart language-server unavailable, disabling: {e}"); + self.backends.dart_disabled = true; + } else { + warn!( + "tier2: dart language-server spawn failed (will retry with backoff): {e}" + ); + } } } } async fn handle_dart(&mut self, job: VerificationJob) { - if self.backends.dart_disabled { + if self.backends.dart_disabled || !self.backends.dart_backoff.is_available() { return; } self.ensure_dart_backend().await; - if self.backends.dart_disabled { + if self.backends.dart.is_none() { return; } @@ -365,11 +464,13 @@ impl Tier2Manager { let mut db = self.db.lock().await; self.broadcast_upgrades(&result.uri, &result.symbols, &mut db); db.upgrade_file_symbols(&result.uri, &result.symbols); + self.backends.dart_backoff.reset(); debug!("tier2: upgraded {upgraded} symbols for {}", job.uri); } Err(e) => { error!("tier2: dart verification failed for {}: {e}", job.uri); self.backends.dart = None; + self.backends.dart_backoff.fail(); } } } @@ -377,7 +478,10 @@ impl Tier2Manager { // ── C / C++ ─────────────────────────────────────────────────────────────── async fn ensure_clangd_backend(&mut self, workspace_root: Option) { - if self.backends.clangd.is_some() || self.backends.clangd_disabled { + if self.backends.clangd.is_some() + || self.backends.clangd_disabled + || !self.backends.clangd_backoff.is_available() + { return; } @@ -385,21 +489,27 @@ impl Tier2Manager { Ok(b) => { info!("tier2: clangd backend ready"); self.backends.clangd = Some(b); + self.backends.clangd_backoff.reset(); } Err(e) => { - warn!("tier2: clangd unavailable, disabling: {e}"); - self.backends.clangd_disabled = true; + self.backends.clangd_backoff.fail(); + if self.backends.clangd_backoff.is_permanent_failure() { + warn!("tier2: clangd unavailable, disabling: {e}"); + self.backends.clangd_disabled = true; + } else { + warn!("tier2: clangd spawn failed (will retry with backoff): {e}"); + } } } } async fn handle_clangd(&mut self, job: VerificationJob) { - if self.backends.clangd_disabled { + if self.backends.clangd_disabled || !self.backends.clangd_backoff.is_available() { return; } self.ensure_clangd_backend(job.workspace_root.clone()).await; - if self.backends.clangd_disabled { + if self.backends.clangd.is_none() { return; } @@ -413,11 +523,13 @@ impl Tier2Manager { let mut db = self.db.lock().await; self.broadcast_upgrades(&result.uri, &result.symbols, &mut db); db.upgrade_file_symbols(&result.uri, &result.symbols); + self.backends.clangd_backoff.reset(); debug!("tier2: upgraded {upgraded} symbols for {}", job.uri); } Err(e) => { error!("tier2: clangd verification failed for {}: {e}", job.uri); self.backends.clangd = None; + self.backends.clangd_backoff.fail(); } } } @@ -425,7 +537,10 @@ impl Tier2Manager { // ── Go ──────────────────────────────────────────────────────────────────── async fn ensure_gopls_backend(&mut self, workspace_root: Option) { - if self.backends.gopls.is_some() || self.backends.gopls_disabled { + if self.backends.gopls.is_some() + || self.backends.gopls_disabled + || !self.backends.gopls_backoff.is_available() + { return; } @@ -433,21 +548,27 @@ impl Tier2Manager { Ok(b) => { info!("tier2: gopls backend ready"); self.backends.gopls = Some(b); + self.backends.gopls_backoff.reset(); } Err(e) => { - warn!("tier2: gopls unavailable, disabling: {e}"); - self.backends.gopls_disabled = true; + self.backends.gopls_backoff.fail(); + if self.backends.gopls_backoff.is_permanent_failure() { + warn!("tier2: gopls unavailable, disabling: {e}"); + self.backends.gopls_disabled = true; + } else { + warn!("tier2: gopls spawn failed (will retry with backoff): {e}"); + } } } } async fn handle_gopls(&mut self, job: VerificationJob) { - if self.backends.gopls_disabled { + if self.backends.gopls_disabled || !self.backends.gopls_backoff.is_available() { return; } self.ensure_gopls_backend(job.workspace_root.clone()).await; - if self.backends.gopls_disabled { + if self.backends.gopls.is_none() { return; } @@ -461,11 +582,13 @@ impl Tier2Manager { let mut db = self.db.lock().await; self.broadcast_upgrades(&result.uri, &result.symbols, &mut db); db.upgrade_file_symbols(&result.uri, &result.symbols); + self.backends.gopls_backoff.reset(); debug!("tier2: upgraded {upgraded} symbols for {}", job.uri); } Err(e) => { error!("tier2: gopls verification failed for {}: {e}", job.uri); self.backends.gopls = None; + self.backends.gopls_backoff.fail(); } } } @@ -473,7 +596,10 @@ impl Tier2Manager { // ── Kotlin ──────────────────────────────────────────────────────────────── async fn ensure_kotlin_backend(&mut self, workspace_root: Option) { - if self.backends.kotlin.is_some() || self.backends.kotlin_disabled { + if self.backends.kotlin.is_some() + || self.backends.kotlin_disabled + || !self.backends.kotlin_backoff.is_available() + { return; } @@ -481,21 +607,29 @@ impl Tier2Manager { Ok(b) => { info!("tier2: kotlin-language-server backend ready"); self.backends.kotlin = Some(b); + self.backends.kotlin_backoff.reset(); } Err(e) => { - warn!("tier2: kotlin-language-server unavailable, disabling: {e}"); - self.backends.kotlin_disabled = true; + self.backends.kotlin_backoff.fail(); + if self.backends.kotlin_backoff.is_permanent_failure() { + warn!("tier2: kotlin-language-server unavailable, disabling: {e}"); + self.backends.kotlin_disabled = true; + } else { + warn!( + "tier2: kotlin-language-server spawn failed (will retry with backoff): {e}" + ); + } } } } async fn handle_kotlin(&mut self, job: VerificationJob) { - if self.backends.kotlin_disabled { + if self.backends.kotlin_disabled || !self.backends.kotlin_backoff.is_available() { return; } self.ensure_kotlin_backend(job.workspace_root.clone()).await; - if self.backends.kotlin_disabled { + if self.backends.kotlin.is_none() { return; } @@ -509,11 +643,13 @@ impl Tier2Manager { let mut db = self.db.lock().await; self.broadcast_upgrades(&result.uri, &result.symbols, &mut db); db.upgrade_file_symbols(&result.uri, &result.symbols); + self.backends.kotlin_backoff.reset(); debug!("tier2: upgraded {upgraded} symbols for {}", job.uri); } Err(e) => { error!("tier2: kotlin verification failed for {}: {e}", job.uri); self.backends.kotlin = None; + self.backends.kotlin_backoff.fail(); } } } @@ -521,7 +657,10 @@ impl Tier2Manager { // ── Swift ───────────────────────────────────────────────────────────────── async fn ensure_swift_backend(&mut self, workspace_root: Option) { - if self.backends.swift.is_some() || self.backends.swift_disabled { + if self.backends.swift.is_some() + || self.backends.swift_disabled + || !self.backends.swift_backoff.is_available() + { return; } @@ -529,21 +668,27 @@ impl Tier2Manager { Ok(b) => { info!("tier2: sourcekit-lsp backend ready"); self.backends.swift = Some(b); + self.backends.swift_backoff.reset(); } Err(e) => { - warn!("tier2: sourcekit-lsp unavailable, disabling: {e}"); - self.backends.swift_disabled = true; + self.backends.swift_backoff.fail(); + if self.backends.swift_backoff.is_permanent_failure() { + warn!("tier2: sourcekit-lsp unavailable, disabling: {e}"); + self.backends.swift_disabled = true; + } else { + warn!("tier2: sourcekit-lsp spawn failed (will retry with backoff): {e}"); + } } } } async fn handle_swift(&mut self, job: VerificationJob) { - if self.backends.swift_disabled { + if self.backends.swift_disabled || !self.backends.swift_backoff.is_available() { return; } self.ensure_swift_backend(job.workspace_root.clone()).await; - if self.backends.swift_disabled { + if self.backends.swift.is_none() { return; } @@ -557,11 +702,13 @@ impl Tier2Manager { let mut db = self.db.lock().await; self.broadcast_upgrades(&result.uri, &result.symbols, &mut db); db.upgrade_file_symbols(&result.uri, &result.symbols); + self.backends.swift_backoff.reset(); debug!("tier2: upgraded {upgraded} symbols for {}", job.uri); } Err(e) => { error!("tier2: swift verification failed for {}: {e}", job.uri); self.backends.swift = None; + self.backends.swift_backoff.fail(); } } } @@ -604,3 +751,606 @@ impl Tier2Manager { } } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::schema::OwnedSymbolInfo; + use std::sync::Arc; + use tokio::sync::{broadcast, mpsc, Mutex}; + + // ── Helpers ────────────────────────────────────────────────────────────── + + /// Build a minimal `Tier2Manager` with all backends disabled. + /// + /// This is the primary test fixture: every backend is marked permanently + /// disabled so that `handle_*` returns immediately without attempting to + /// spawn a language server process. This lets us exercise routing, channel + /// behaviour and broadcast logic in isolation. + fn manager_all_disabled() -> (Tier2Manager, mpsc::Sender) { + let (tx, rx) = mpsc::channel(CHANNEL_CAPACITY); + let (notify_tx, _) = broadcast::channel(16); + let db = Arc::new(Mutex::new(LipDatabase::new())); + + let mut backends = Tier2Backends::new(); + backends.rust_disabled = true; + backends.typescript_disabled = true; + backends.python_disabled = true; + backends.dart_disabled = true; + backends.clangd_disabled = true; + backends.gopls_disabled = true; + backends.kotlin_disabled = true; + backends.swift_disabled = true; + + let mgr = Tier2Manager { + db, + rx, + backends, + notify_tx: Some(notify_tx), + }; + (mgr, tx) + } + + fn make_job(uri: &str) -> VerificationJob { + VerificationJob { + uri: uri.to_owned(), + source: String::new(), + workspace_root: None, + version: 1, + } + } + + fn make_symbol(uri: &str, confidence: u8) -> OwnedSymbolInfo { + OwnedSymbolInfo { + uri: uri.to_owned(), + display_name: uri.rsplit('#').next().unwrap_or(uri).to_owned(), + kind: crate::schema::SymbolKind::Function, + documentation: None, + signature: None, + confidence_score: confidence, + relationships: vec![], + runtime_p99_ms: None, + call_rate_per_s: None, + taint_labels: vec![], + blast_radius: 0, + is_exported: false, + } + } + + // ── Job routing ────────────────────────────────────────────────────────── + + /// Verify that `handle` dispatches to the correct backend for every + /// supported file extension. Because all backends are disabled, each + /// handler returns immediately — the absence of a panic proves the + /// routing path was reached (the disabled-flag early-return is the first + /// line in every `handle_*` method). + #[tokio::test] + async fn routing_dispatches_to_correct_backend() { + let (mut mgr, _tx) = manager_all_disabled(); + + // Rust + mgr.handle(make_job("file:///src/main.rs")).await; + + // TypeScript family + mgr.handle(make_job("file:///src/index.ts")).await; + mgr.handle(make_job("file:///src/App.tsx")).await; + mgr.handle(make_job("file:///src/util.js")).await; + mgr.handle(make_job("file:///src/App.jsx")).await; + mgr.handle(make_job("file:///src/esm.mjs")).await; + mgr.handle(make_job("file:///src/cjs.cjs")).await; + + // Python + mgr.handle(make_job("file:///src/app.py")).await; + + // Dart + mgr.handle(make_job("file:///lib/main.dart")).await; + + // C / C++ + mgr.handle(make_job("file:///src/main.c")).await; + mgr.handle(make_job("file:///src/lib.h")).await; + mgr.handle(make_job("file:///src/main.cpp")).await; + mgr.handle(make_job("file:///src/util.cc")).await; + mgr.handle(make_job("file:///src/core.cxx")).await; + mgr.handle(make_job("file:///src/api.hpp")).await; + mgr.handle(make_job("file:///src/api.hxx")).await; + + // Go + mgr.handle(make_job("file:///cmd/main.go")).await; + + // Kotlin + mgr.handle(make_job("file:///src/Main.kt")).await; + mgr.handle(make_job("file:///build.gradle.kts")).await; + + // Swift + mgr.handle(make_job("file:///Sources/App.swift")).await; + } + + /// Files with unknown extensions should be silently ignored — no panic, + /// no error, no backend touched. + #[tokio::test] + async fn routing_unknown_extension_is_noop() { + let (mut mgr, _tx) = manager_all_disabled(); + + mgr.handle(make_job("file:///README.md")).await; + mgr.handle(make_job("file:///data.json")).await; + mgr.handle(make_job("file:///Makefile")).await; + } + + // ── Channel behaviour ──────────────────────────────────────────────────── + + /// When the bounded channel is full, `try_send` must fail (Err) rather + /// than blocking the caller. + #[tokio::test] + async fn full_channel_drops_jobs() { + let (tx, _rx) = mpsc::channel::(CHANNEL_CAPACITY); + + // Fill the channel to capacity. + for i in 0..CHANNEL_CAPACITY { + let job = VerificationJob { + uri: format!("file:///src/file_{i}.rs"), + source: String::new(), + workspace_root: None, + version: 1, + }; + tx.try_send(job) + .expect("channel should accept up to capacity"); + } + + // The next try_send must fail — this is the documented contract. + let overflow = VerificationJob { + uri: "file:///src/overflow.rs".to_owned(), + source: String::new(), + workspace_root: None, + version: 1, + }; + assert!( + tx.try_send(overflow).is_err(), + "try_send on a full channel must return Err, not block" + ); + } + + // ── Backend unavailability ─────────────────────────────────────────────── + + /// When a backend's `disabled` flag is set (binary not found), calling + /// `handle` with a matching file must return gracefully — no panic, no + /// spawn attempt. + #[tokio::test] + async fn disabled_backend_skips_gracefully() { + let (mut mgr, _tx) = manager_all_disabled(); + + // Explicitly verify each disabled backend short-circuits. + assert!(mgr.backends.rust_disabled); + mgr.handle(make_job("file:///src/lib.rs")).await; + assert!(mgr.backends.rust.is_none(), "no backend should be created"); + + assert!(mgr.backends.typescript_disabled); + mgr.handle(make_job("file:///src/app.ts")).await; + assert!(mgr.backends.typescript.is_none()); + + assert!(mgr.backends.python_disabled); + mgr.handle(make_job("file:///src/app.py")).await; + assert!(mgr.backends.python.is_none()); + + assert!(mgr.backends.dart_disabled); + mgr.handle(make_job("file:///lib/main.dart")).await; + assert!(mgr.backends.dart.is_none()); + + assert!(mgr.backends.clangd_disabled); + mgr.handle(make_job("file:///src/main.c")).await; + assert!(mgr.backends.clangd.is_none()); + + assert!(mgr.backends.gopls_disabled); + mgr.handle(make_job("file:///cmd/main.go")).await; + assert!(mgr.backends.gopls.is_none()); + + assert!(mgr.backends.kotlin_disabled); + mgr.handle(make_job("file:///src/Main.kt")).await; + assert!(mgr.backends.kotlin.is_none()); + + assert!(mgr.backends.swift_disabled); + mgr.handle(make_job("file:///Sources/App.swift")).await; + assert!(mgr.backends.swift.is_none()); + } + + // ── Confidence elevation (broadcast) ───────────────────────────────────── + + /// When a Tier 2 upgrade raises a symbol's confidence, the manager must + /// broadcast a `SymbolUpgraded` message with the correct old/new scores. + #[tokio::test] + async fn broadcast_upgrades_fires_on_confidence_increase() { + let (notify_tx, mut notify_rx) = broadcast::channel(16); + let db = Arc::new(Mutex::new(LipDatabase::new())); + + let file_uri = "file:///src/lib.rs"; + let sym_uri = "lip://local//src/lib.rs#foo"; + + // Seed the database with a Tier 1 symbol at confidence 40. + { + let mut db = db.lock().await; + db.upsert_file_precomputed( + file_uri.to_owned(), + "rust".to_owned(), + "abc123".to_owned(), + vec![make_symbol(sym_uri, 40)], + vec![], + vec![], + ); + } + + let mgr = Tier2Manager { + db: db.clone(), + rx: mpsc::channel(1).1, + backends: Tier2Backends::new(), + notify_tx: Some(notify_tx), + }; + + // Simulate a Tier 2 upgrade to confidence 90. + let upgrades = vec![make_symbol(sym_uri, 90)]; + { + let mut db = db.lock().await; + mgr.broadcast_upgrades(file_uri, &upgrades, &mut db); + } + + let msg = notify_rx.try_recv().expect("should receive a broadcast"); + match msg { + ServerMessage::SymbolUpgraded { + uri, + old_confidence, + new_confidence, + } => { + assert_eq!(uri, sym_uri); + assert_eq!(old_confidence, 40); + assert_eq!(new_confidence, 90); + } + other => panic!("expected SymbolUpgraded, got {other:?}"), + } + } + + /// No broadcast should fire when the upgrade does NOT raise confidence + /// (e.g. a stale Tier 2 result arriving after a SCIP push already set + /// the symbol to confidence 95). + #[tokio::test] + async fn broadcast_upgrades_silent_when_confidence_not_raised() { + let (notify_tx, mut notify_rx) = broadcast::channel(16); + let db = Arc::new(Mutex::new(LipDatabase::new())); + + let file_uri = "file:///src/lib.rs"; + let sym_uri = "lip://local//src/lib.rs#bar"; + + // Seed at confidence 95 (SCIP push). + { + let mut db = db.lock().await; + db.upsert_file_precomputed( + file_uri.to_owned(), + "rust".to_owned(), + "abc123".to_owned(), + vec![make_symbol(sym_uri, 95)], + vec![], + vec![], + ); + } + + let mgr = Tier2Manager { + db: db.clone(), + rx: mpsc::channel(1).1, + backends: Tier2Backends::new(), + notify_tx: Some(notify_tx), + }; + + // "Upgrade" to 90 — this is actually a downgrade, no broadcast. + let upgrades = vec![make_symbol(sym_uri, 90)]; + { + let mut db = db.lock().await; + mgr.broadcast_upgrades(file_uri, &upgrades, &mut db); + } + + assert!( + notify_rx.try_recv().is_err(), + "no broadcast should fire when the upgrade does not raise confidence" + ); + } + + /// When there are no broadcast receivers, `broadcast_upgrades` must + /// short-circuit without reading from the db (the receiver_count check). + #[tokio::test] + async fn broadcast_upgrades_noop_without_receivers() { + let (notify_tx, _) = broadcast::channel::(16); + let db = Arc::new(Mutex::new(LipDatabase::new())); + + // Drop the only receiver so receiver_count == 0. + // (The `_` binding above was never subscribed to.) + drop(notify_tx.subscribe()); // subscribe then immediately drop + + let mgr = Tier2Manager { + db: db.clone(), + rx: mpsc::channel(1).1, + backends: Tier2Backends::new(), + notify_tx: Some(notify_tx), + }; + + let upgrades = vec![make_symbol("lip://local//src/lib.rs#baz", 90)]; + { + let mut db = db.lock().await; + // Should not panic even though "file:///src/lib.rs" is not in the db. + mgr.broadcast_upgrades("file:///src/lib.rs", &upgrades, &mut db); + } + } + + /// When `notify_tx` is `None`, `broadcast_upgrades` must be a no-op. + #[tokio::test] + async fn broadcast_upgrades_noop_when_notifications_disabled() { + let db = Arc::new(Mutex::new(LipDatabase::new())); + + let mgr = Tier2Manager { + db: db.clone(), + rx: mpsc::channel(1).1, + backends: Tier2Backends::new(), + notify_tx: None, + }; + + let upgrades = vec![make_symbol("lip://local//src/lib.rs#baz", 90)]; + { + let mut db = db.lock().await; + mgr.broadcast_upgrades("file:///src/lib.rs", &upgrades, &mut db); + } + // No panic = pass. + } + + // ── Symbol upgrade merging (LipDatabase::upgrade_file_symbols) ─────────── + + /// `upgrade_file_symbols` must raise confidence and merge signature, + /// documentation and relationships from Tier 2 results into existing + /// Tier 1 symbols. + #[tokio::test] + async fn upgrade_merges_signature_and_confidence() { + let mut db = LipDatabase::new(); + + let file_uri = "file:///src/lib.rs"; + let sym_uri = "lip://local//src/lib.rs#process"; + + let tier1 = OwnedSymbolInfo { + uri: sym_uri.to_owned(), + display_name: "process".to_owned(), + kind: crate::schema::SymbolKind::Function, + documentation: None, + signature: None, + confidence_score: 40, + relationships: vec![], + runtime_p99_ms: None, + call_rate_per_s: None, + taint_labels: vec![], + blast_radius: 0, + is_exported: false, + }; + + db.upsert_file_precomputed( + file_uri.to_owned(), + "rust".to_owned(), + "hash1".to_owned(), + vec![tier1], + vec![], + vec![], + ); + + // Simulate Tier 2 upgrade with signature and doc. + let upgrade = OwnedSymbolInfo { + uri: sym_uri.to_owned(), + display_name: "process".to_owned(), + kind: crate::schema::SymbolKind::Function, + documentation: Some("Process the input data.".to_owned()), + signature: Some("pub fn process(input: &[u8]) -> Result<()>".to_owned()), + confidence_score: 90, + relationships: vec![crate::schema::OwnedRelationship { + target_uri: "lip://local//src/types.rs#Result".to_owned(), + is_type_definition: true, + is_reference: false, + is_implementation: false, + is_override: false, + }], + runtime_p99_ms: None, + call_rate_per_s: None, + taint_labels: vec![], + blast_radius: 0, + is_exported: true, + }; + + db.upgrade_file_symbols(file_uri, &[upgrade]); + + let symbols = db.file_symbols(file_uri); + assert_eq!(symbols.len(), 1); + let sym = &symbols[0]; + assert_eq!(sym.confidence_score, 90, "confidence must be elevated"); + assert_eq!( + sym.signature.as_deref(), + Some("pub fn process(input: &[u8]) -> Result<()>"), + "signature must be merged from Tier 2" + ); + assert_eq!( + sym.documentation.as_deref(), + Some("Process the input data."), + "documentation must be merged from Tier 2" + ); + assert_eq!(sym.relationships.len(), 1, "relationships must be merged"); + assert!(sym.relationships[0].is_type_definition); + } + + /// `upgrade_file_symbols` must NOT downgrade a symbol that already has a + /// higher confidence (e.g. from a SCIP push at 95). + #[tokio::test] + async fn upgrade_does_not_downgrade_confidence() { + let mut db = LipDatabase::new(); + + let file_uri = "file:///src/lib.rs"; + let sym_uri = "lip://local//src/lib.rs#hi_conf"; + + let existing = OwnedSymbolInfo { + uri: sym_uri.to_owned(), + display_name: "hi_conf".to_owned(), + kind: crate::schema::SymbolKind::Function, + documentation: Some("Already documented.".to_owned()), + signature: Some("fn hi_conf() -> u32".to_owned()), + confidence_score: 95, + relationships: vec![], + runtime_p99_ms: None, + call_rate_per_s: None, + taint_labels: vec![], + blast_radius: 0, + is_exported: false, + }; + + db.upsert_file_precomputed( + file_uri.to_owned(), + "rust".to_owned(), + "hash2".to_owned(), + vec![existing], + vec![], + vec![], + ); + + // Tier 2 arrives late with a lower confidence. + let stale = OwnedSymbolInfo { + uri: sym_uri.to_owned(), + display_name: "hi_conf".to_owned(), + kind: crate::schema::SymbolKind::Function, + documentation: None, + signature: Some("fn hi_conf() -> u32".to_owned()), + confidence_score: 70, + relationships: vec![], + runtime_p99_ms: None, + call_rate_per_s: None, + taint_labels: vec![], + blast_radius: 0, + is_exported: false, + }; + + db.upgrade_file_symbols(file_uri, &[stale]); + + let symbols = db.file_symbols(file_uri); + let sym = &symbols[0]; + assert_eq!( + sym.confidence_score, 95, + "confidence must not be downgraded" + ); + assert_eq!( + sym.documentation.as_deref(), + Some("Already documented."), + "existing documentation must be preserved" + ); + } + + /// `upgrade_file_symbols` with an empty upgrade slice is a no-op. + #[tokio::test] + async fn upgrade_empty_is_noop() { + let mut db = LipDatabase::new(); + + let file_uri = "file:///src/lib.rs"; + db.upsert_file_precomputed( + file_uri.to_owned(), + "rust".to_owned(), + "hash3".to_owned(), + vec![make_symbol("lip://local//src/lib.rs#x", 40)], + vec![], + vec![], + ); + + db.upgrade_file_symbols(file_uri, &[]); + + let symbols = db.file_symbols(file_uri); + assert_eq!(symbols[0].confidence_score, 40, "nothing should change"); + } + + /// `upgrade_file_symbols` on a URI not in the database is a no-op. + #[tokio::test] + async fn upgrade_unknown_file_is_noop() { + let mut db = LipDatabase::new(); + let sym = make_symbol("lip://local//unknown.rs#foo", 90); + // Must not panic. + db.upgrade_file_symbols("file:///unknown.rs", &[sym]); + } + + // ── Tier2Backends default state ────────────────────────────────────────── + + /// Fresh `Tier2Backends` must have all backends as `None`, all + /// disabled flags as `false`, and all backoff states clear. + #[test] + fn backends_default_state() { + let b = Tier2Backends::new(); + assert!(b.rust.is_none()); + assert!(!b.rust_disabled); + assert!(b.rust_backoff.is_available()); + assert!(b.typescript.is_none()); + assert!(!b.typescript_disabled); + assert!(b.typescript_backoff.is_available()); + assert!(b.python.is_none()); + assert!(!b.python_disabled); + assert!(b.python_backoff.is_available()); + assert!(b.dart.is_none()); + assert!(!b.dart_disabled); + assert!(b.dart_backoff.is_available()); + assert!(b.clangd.is_none()); + assert!(!b.clangd_disabled); + assert!(b.clangd_backoff.is_available()); + assert!(b.gopls.is_none()); + assert!(!b.gopls_disabled); + assert!(b.gopls_backoff.is_available()); + assert!(b.kotlin.is_none()); + assert!(!b.kotlin_disabled); + assert!(b.kotlin_backoff.is_available()); + assert!(b.swift.is_none()); + assert!(!b.swift_disabled); + assert!(b.swift_backoff.is_available()); + } + + // ── Channel capacity constant ──────────────────────────────────────────── + + #[test] + fn channel_capacity_is_64() { + assert_eq!(CHANNEL_CAPACITY, 64); + } + + // ── BackoffState ───────────────────────────────────────────────────────── + + #[test] + fn backoff_fresh_is_available() { + let b = BackoffState::default(); + assert!(b.is_available()); + assert!(!b.is_permanent_failure()); + } + + #[test] + fn backoff_fail_makes_unavailable() { + let mut b = BackoffState::default(); + b.fail(); + assert!(!b.is_available()); + assert_eq!(b.failure_count, 1); + } + + #[test] + fn backoff_reset_clears_state() { + let mut b = BackoffState::default(); + b.fail(); + b.fail(); + b.reset(); + assert!(b.is_available()); + assert_eq!(b.failure_count, 0); + assert!(!b.is_permanent_failure()); + } + + #[test] + fn backoff_permanent_after_8_failures() { + let mut b = BackoffState::default(); + for _ in 0..8 { + b.fail(); + } + assert!(b.is_permanent_failure()); + } + + #[test] + fn backoff_not_permanent_before_8_failures() { + let mut b = BackoffState::default(); + for _ in 0..7 { + b.fail(); + } + assert!(!b.is_permanent_failure()); + } +} diff --git a/bindings/rust/src/query_graph/db.rs b/bindings/rust/src/query_graph/db.rs index 8166bd8..10e28f6 100644 --- a/bindings/rust/src/query_graph/db.rs +++ b/bindings/rust/src/query_graph/db.rs @@ -13,12 +13,13 @@ use std::sync::Arc; use crate::indexer::{language::Language, Tier1Indexer}; use crate::query_graph::types::{ - ApiSurface, BlastRadiusResult, ImpactItem, RiskLevel, SimilarSymbol, + ApiSurface, BlastRadiusResult, EnrichedBlastRadius, ImpactItem, ImpactSource, RiskLevel, + SemanticImpactItem, SimilarSymbol, }; use crate::schema::EdgeKind; use crate::schema::{ - sha256_hex, OwnedAnnotationEntry, OwnedDependencySlice, OwnedOccurrence, OwnedRange, - OwnedSymbolInfo, Role, + sha256_hex, OwnedAnnotationEntry, OwnedDependencySlice, OwnedGraphEdge, OwnedOccurrence, + OwnedRange, OwnedSymbolInfo, Role, }; // ─── Helpers ───────────────────────────────────────────────────────────────── @@ -94,6 +95,12 @@ struct FileInput { language: String, /// Revision at which this input was last changed. revision: u64, + /// `true` when symbols/occurrences were supplied externally (SCIP import) + /// rather than derived from `text` by Tier 1. + precomputed: bool, + /// Content hash supplied by the caller (e.g. from `OwnedDocument.content_hash`). + /// Used by `stale_files` so Merkle sync works even when `text` is empty. + content_hash: String, } #[derive(Debug)] @@ -220,6 +227,153 @@ impl LipDatabase { out } + // ── ABI surface fingerprinting ──────────────────────────────────────── + + /// Compute a stable hash over the file's exported API surface. + /// + /// The hash is SHA-256 (hex) over the newline-joined list of + /// `"URI|kind|signature"` entries for all exported symbols in `uri`, + /// sorted by URI for determinism. Returns `None` when the file is not + /// in the daemon's index. + /// + /// A change in hash means the public interface changed — safe as a + /// downstream recompilation / re-verification trigger (Kotlin IC model). + pub fn abi_hash(&mut self, uri: &str) -> Option { + if !self.file_inputs.contains_key(uri) { + return None; + } + let syms = self.file_symbols(uri); + let mut surface: Vec = syms + .iter() + .filter(|s| s.is_exported) + .map(|s| { + format!( + "{}|{}|{}", + s.uri, + s.kind as u8, + s.signature.as_deref().unwrap_or("") + ) + }) + .collect(); + surface.sort(); + let payload = surface.join("\n"); + Some(sha256_hex(payload.as_bytes())) + } + + // ── Datalog Tier 1.5 inference ──────────────────────────────────────── + + /// Run a single fixed-point inference pass and return the number of + /// symbols whose confidence was raised. + /// + /// Rules applied (one iteration; caller loops to fixpoint): + /// + /// **Rule 1 — Callee elevation**: if every direct caller of a symbol + /// has confidence ≥ 80 (Tier 2 / SCIP quality), and the symbol itself + /// is below 65, raise it to 65 (Tier 1.5 level). The intuition: if + /// all callers have been verified to compiler accuracy, the callee is + /// unlikely to have been left dangling; the call site itself acts as + /// implicit type evidence. + /// + /// **Rule 2 — Exported leaf stability**: an exported symbol with no + /// callers in the local graph is a stable leaf if its confidence is + /// ≥ 40. Raise it by 5 points (capped at 65) — exported with no + /// internal callers means it is part of the public API, which is + /// typically more carefully maintained than internal helpers. + /// + /// Both rules are conservative: they never lower confidence and never + /// exceed the Tier 1.5 ceiling (65), leaving room for Tier 2 / SCIP + /// to raise further. + fn inference_step(&mut self) -> usize { + const TIER2_THRESHOLD: u8 = 80; + const TIER1_5_CEILING: u8 = 65; + + // Snapshot caller confidence per symbol before mutating. + // Build: callee_uri → vec of caller confidence scores. + let mut callee_caller_confs: HashMap> = HashMap::new(); + let all_file_uris: Vec = self.file_inputs.keys().cloned().collect(); + for file_uri in &all_file_uris { + let syms = self.file_symbols(file_uri).to_vec(); + for sym in &syms { + // For each callee edge, record this caller's confidence. + if let Some(callers) = self.callee_to_callers.get(&sym.uri).cloned() { + for caller_uri in callers { + // Look up confidence of the caller symbol. + if let Some((caller_file, _)) = self.def_index.get(&caller_uri).cloned() { + let caller_syms = self.file_symbols(&caller_file.clone()).to_vec(); + if let Some(caller_sym) = + caller_syms.iter().find(|s| s.uri == caller_uri) + { + callee_caller_confs + .entry(sym.uri.clone()) + .or_default() + .push(caller_sym.confidence_score); + } + } + } + } + } + } + + // Apply rules and collect upgrades. + let mut upgrades: Vec<(String, String, u8)> = Vec::new(); // (file_uri, sym_uri, new_conf) + for file_uri in &all_file_uris { + let syms = self.file_symbols(file_uri).to_vec(); + for sym in &syms { + if sym.confidence_score >= TIER1_5_CEILING { + continue; + } + let caller_confs = callee_caller_confs.get(&sym.uri); + let new_conf = if let Some(confs) = caller_confs { + if !confs.is_empty() + && confs.iter().all(|&c| c >= TIER2_THRESHOLD) + && sym.confidence_score < TIER1_5_CEILING + { + // Rule 1: all callers are Tier 2+. + Some(TIER1_5_CEILING) + } else { + None + } + } else if sym.is_exported && sym.confidence_score >= 40 { + // Rule 2: exported leaf, no local callers. + Some((sym.confidence_score + 5).min(TIER1_5_CEILING)) + } else { + None + }; + if let Some(conf) = new_conf { + if conf > sym.confidence_score { + upgrades.push((file_uri.clone(), sym.uri.clone(), conf)); + } + } + } + } + + let updated = upgrades.len(); + for (file_uri, sym_uri, new_conf) in upgrades { + let syms = self.file_symbols(&file_uri).to_vec(); + if let Some(sym) = syms.iter().find(|s| s.uri == sym_uri) { + let mut upgraded = sym.clone(); + upgraded.confidence_score = new_conf; + self.upgrade_file_symbols(&file_uri, &[upgraded]); + } + } + updated + } + + /// Run the Tier 1.5 Datalog inference loop to fixpoint. + /// + /// Returns the total number of symbol confidence scores raised. + pub fn run_tier1_5_inference(&mut self) -> usize { + let mut total = 0; + loop { + let changed = self.inference_step(); + total += changed; + if changed == 0 { + break; + } + } + total + } + // ── Mutations ───────────────────────────────────────────────────────── /// Register or update a file. Bumps the global revision and invalidates @@ -227,12 +381,15 @@ impl LipDatabase { pub fn upsert_file(&mut self, uri: String, text: String, language: String) { self.revision += 1; let rev = self.revision; + let content_hash = sha256_hex(text.as_bytes()); self.file_inputs.insert( uri.clone(), FileInput { text, language, revision: rev, + precomputed: false, + content_hash, }, ); // Invalidate the direct derived caches. api_cache is intentionally kept @@ -339,6 +496,120 @@ impl LipDatabase { self.file_embeddings.remove(&uri); } + /// Upsert a file whose symbols and occurrences are already computed + /// (e.g. SCIP import). Populates the same indexes as `upsert_file` but + /// skips the Tier 1 parser since the caller already provides the data. + pub fn upsert_file_precomputed( + &mut self, + uri: String, + language: String, + content_hash: String, + symbols: Vec, + occurrences: Vec, + edges: Vec, + ) { + self.revision += 1; + let rev = self.revision; + self.file_inputs.insert( + uri.clone(), + FileInput { + text: String::new(), + language, + revision: rev, + precomputed: true, + content_hash, + }, + ); + + // Clear stale caches + def_index entries for this file. + self.sym_cache.remove(&uri); + self.occ_cache.remove(&uri); + let stale_defs: Vec = self + .def_index + .iter() + .filter(|(_, (furi, _))| furi == &uri) + .map(|(sym_uri, _)| sym_uri.clone()) + .collect(); + for sym_uri in &stale_defs { + let name = extract_name(sym_uri); + if let Some(uris) = self.name_to_symbols.get_mut(name) { + uris.retain(|u| u != sym_uri); + if uris.is_empty() { + self.name_to_symbols.remove(name); + } + } + } + self.def_index.retain(|_, (furi, _)| furi != &uri); + + // Build def_index + name_to_symbols from pre-computed occurrences. + let occs = Arc::new(occurrences); + for occ in occs.iter() { + if occ.role == Role::Definition { + self.def_index + .insert(occ.symbol_uri.clone(), (uri.clone(), occ.range.clone())); + let name = extract_name(&occ.symbol_uri).to_owned(); + if !name.is_empty() { + self.name_to_symbols + .entry(name) + .or_default() + .push(occ.symbol_uri.clone()); + } + } + } + self.occ_cache + .insert(uri.clone(), Cached::new(occs.clone(), rev)); + + // Seed sym_cache so file_symbols() returns the pre-computed symbols. + let syms = Arc::new(symbols); + self.sym_cache.insert(uri.clone(), Cached::new(syms, rev)); + + // Consumed-names index (same as upsert_file). + { + let mut consumed: HashSet = HashSet::new(); + for occ in occs.iter().filter(|o| o.role == Role::Reference) { + let name = extract_name(&occ.symbol_uri); + if name.is_empty() { + continue; + } + let is_external = self + .def_index + .get(&occ.symbol_uri) + .map(|(def_file, _)| def_file != &uri) + .unwrap_or(true); + if is_external { + consumed.insert(name.to_owned()); + } + } + self.file_consumed_names.insert(uri.clone(), consumed); + } + + // Call-edge indexes from pre-computed edges. + self.remove_file_call_edges(&uri); + let mut pairs: Vec<(String, String)> = Vec::new(); + for edge in edges.iter().filter(|e| e.kind == EdgeKind::Calls) { + self.callee_to_callers + .entry(edge.to_uri.clone()) + .or_default() + .push(edge.from_uri.clone()); + let callee_name = extract_name(&edge.to_uri).to_owned(); + if !callee_name.is_empty() { + self.callee_name_to_callers + .entry(callee_name) + .or_default() + .push(edge.from_uri.clone()); + } + pairs.push((edge.from_uri.clone(), edge.to_uri.clone())); + } + self.file_call_edges.insert(uri.clone(), pairs); + + let now_ms = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_millis() as i64) + .unwrap_or(0); + self.file_indexed_at.insert(uri.clone(), now_ms); + self.file_embeddings.remove(&uri); + } + pub fn remove_file(&mut self, uri: &str) { self.revision += 1; self.file_inputs.remove(uri); @@ -402,7 +673,16 @@ impl LipDatabase { /// Returns the source text stored for `uri`, or `None` if not indexed. pub fn file_source_text(&self, uri: &str) -> Option { - self.file_inputs.get(uri).map(|f| f.text.clone()) + let fi = self.file_inputs.get(uri)?; + if fi.precomputed && fi.text.is_empty() { + if let Some(path) = uri.strip_prefix("file://") { + if let Ok(text) = std::fs::read_to_string(path) { + return Some(text); + } + } + return None; + } + Some(fi.text.clone()) } pub fn set_workspace_root(&mut self, root: PathBuf) { @@ -475,6 +755,48 @@ impl LipDatabase { self.file_inputs.keys().cloned().collect() } + pub fn is_precomputed(&self, uri: &str) -> bool { + self.file_inputs.get(uri).is_some_and(|f| f.precomputed) + } + + pub fn file_content_hash(&self, uri: &str) -> Option<&str> { + self.file_inputs.get(uri).map(|f| f.content_hash.as_str()) + } + + /// Read-only access to cached symbols (for journal compaction). + pub fn cached_symbols(&self, uri: &str) -> Arc> { + self.sym_cache + .get(uri) + .map(|c| c.value.clone()) + .unwrap_or_default() + } + + /// Read-only access to cached occurrences (for journal compaction). + pub fn cached_occurrences(&self, uri: &str) -> Arc> { + self.occ_cache + .get(uri) + .map(|c| c.value.clone()) + .unwrap_or_default() + } + + /// Return stored call-edge pairs for a file (for journal compaction). + pub fn file_call_edges_raw(&self, uri: &str) -> Vec { + self.file_call_edges + .get(uri) + .map(|pairs| { + pairs + .iter() + .map(|(from, to)| OwnedGraphEdge { + from_uri: from.clone(), + to_uri: to.clone(), + kind: EdgeKind::Calls, + at_range: OwnedRange::default(), + }) + .collect() + }) + .unwrap_or_default() + } + /// Merkle sync probe: given a slice of `(uri, client_content_hash)` pairs, /// returns URIs that are stale (daemon hash ≠ client hash) or unknown to /// the daemon (never indexed). The client should re-Delta each returned URI. @@ -484,7 +806,7 @@ impl LipDatabase { .filter(|(uri, client_hash)| { match self.file_inputs.get(uri) { None => true, // daemon has never seen this file - Some(fi) => sha256_hex(fi.text.as_bytes()) != *client_hash, + Some(fi) => fi.content_hash != *client_hash, } }) .map(|(uri, _)| uri.clone()) @@ -847,6 +1169,116 @@ impl LipDatabase { } } + /// Batch blast-radius for all symbols defined in the given files, + /// optionally enriched with embedding-based semantic coupling. + /// + /// When `min_score` is `Some(threshold)`, each changed file's embedding + /// is compared against the index and neighbours above the threshold are + /// returned as `semantic_items`. Omit to get static-only results. + pub fn blast_radius_batch( + &mut self, + changed_file_uris: &[String], + min_score: Option, + ) -> Vec { + let mut results = Vec::new(); + let mut seen_symbols: HashSet = HashSet::new(); + let threshold = min_score.unwrap_or(0.6); + + // Only resolve symbols whose kind produces meaningful blast-radius + // results. Variables, constants, parameters, and type aliases are + // excluded — they're dominated by framework wiring noise. + use crate::schema::SymbolKind; + let interesting = |k: SymbolKind| { + matches!( + k, + SymbolKind::Function + | SymbolKind::Method + | SymbolKind::Class + | SymbolKind::Interface + | SymbolKind::Constructor + | SymbolKind::Macro + ) + }; + + for file_uri in changed_file_uris { + let syms = self.file_symbols(file_uri); + for sym in syms.iter() { + if !interesting(sym.kind) { + continue; + } + if !seen_symbols.insert(sym.uri.clone()) { + continue; + } + let static_result = self.blast_radius_for(&sym.uri); + + let mut semantic_items = Vec::new(); + if min_score.is_some() { + let static_files: HashSet = + static_result.affected_files.iter().cloned().collect(); + + // Prefer per-symbol embeddings (function-level granularity) when + // available. Fall back to file-level embeddings when the symbol has + // no stored vector. This degrades gracefully for callers that have + // not yet run `EmbeddingBatch` with `lip://` URIs. + if let Some(sym_embedding) = self.symbol_embeddings.get(&sym.uri).cloned() { + let sym_neighbours = + self.nearest_symbol_by_vector(&sym_embedding, 20, Some(&sym.uri), None); + for n in sym_neighbours { + if n.score < threshold { + continue; + } + // Map symbol hit back to its defining file. + let hit_file = self + .def_index + .get(&n.uri) + .map(|(f, _)| f.clone()) + .unwrap_or_else(|| n.uri.clone()); + let source = if static_files.contains(&hit_file) { + ImpactSource::Both + } else { + ImpactSource::Semantic + }; + semantic_items.push(SemanticImpactItem { + file_uri: hit_file, + symbol_uri: n.uri, + similarity: n.score, + source, + }); + } + } else if let Some(file_embedding) = self.file_embeddings.get(file_uri).cloned() + { + let neighbours = self.nearest_by_vector( + &file_embedding, + 20, + Some(file_uri), + None, + Some(threshold), + ); + for neighbour in neighbours { + let source = if static_files.contains(&neighbour.uri) { + ImpactSource::Both + } else { + ImpactSource::Semantic + }; + semantic_items.push(SemanticImpactItem { + file_uri: neighbour.uri, + symbol_uri: String::new(), + similarity: neighbour.score, + source, + }); + } + } + } + + results.push(EnrichedBlastRadius { + static_result, + semantic_items, + }); + } + } + results + } + /// Find the symbol URI whose occurrence range contains `(line, col)` in `uri`. /// /// Returns `None` if no occurrence covers the given position. @@ -877,6 +1309,26 @@ impl LipDatabase { .collect() } + /// Given a set of changed symbol URIs, return the deduplicated set of file + /// URIs that need re-verification because they consume at least one of the + /// changed names. + /// + /// This is the public entry-point for symbol-level invalidation (Kotlin IC + /// model). It extracts the display name from each symbol URI via + /// `extract_name`, then delegates to the `file_consumed_names` index. + pub fn invalidated_files_for(&self, changed_symbol_uris: &[String]) -> Vec { + let names: HashSet<&str> = changed_symbol_uris + .iter() + .map(|uri| extract_name(uri)) + .filter(|n| !n.is_empty()) + .collect(); + if names.is_empty() { + return vec![]; + } + let name_refs: Vec<&str> = names.into_iter().collect(); + self.files_consuming_names(&name_refs) + } + // ── Embedding / observability ───────────────────────────────────────── /// Store a pre-computed embedding vector for a file, recording which model produced it. @@ -997,7 +1449,14 @@ impl LipDatabase { scored .into_iter() .take(top_k) - .map(|(uri, score)| crate::query_graph::types::NearestItem { uri, score }) + .map(|(uri, score)| { + let embedding_model = self.symbol_embedding_models.get(&uri).cloned(); + crate::query_graph::types::NearestItem { + uri, + score, + embedding_model, + } + }) .collect() } @@ -1068,7 +1527,14 @@ impl LipDatabase { scored .into_iter() .take(top_k) - .map(|(uri, score)| crate::query_graph::types::NearestItem { uri, score }) + .map(|(uri, score)| { + let embedding_model = self.file_embedding_models.get(&uri).cloned(); + crate::query_graph::types::NearestItem { + uri, + score, + embedding_model, + } + }) .collect() } @@ -1174,9 +1640,16 @@ impl LipDatabase { return vec![]; } if pairs.len() == 1 { + let uri = pairs[0].0.to_owned(); + let embedding_model = if uri.starts_with("lip://") { + self.symbol_embedding_models.get(&uri).cloned() + } else { + self.file_embedding_models.get(&uri).cloned() + }; return vec![NearestItem { - uri: pairs[0].0.to_owned(), + uri, score: 0.0, + embedding_model, }]; } @@ -1214,7 +1687,18 @@ impl LipDatabase { scores .into_iter() .take(top_k) - .map(|(uri, score)| NearestItem { uri, score }) + .map(|(uri, score)| { + let embedding_model = if uri.starts_with("lip://") { + self.symbol_embedding_models.get(&uri).cloned() + } else { + self.file_embedding_models.get(&uri).cloned() + }; + NearestItem { + uri, + score, + embedding_model, + } + }) .collect() } @@ -3376,4 +3860,280 @@ impl Greeter { let (total, _, _) = db.coverage("/project/src"); assert_eq!(total, 1, "should only count files under /project/src"); } + + // ── Precomputed upsert (SCIP import path) ──────────────────────────── + + #[test] + fn precomputed_symbols_appear_in_search() { + let mut db = LipDatabase::new(); + let uri = "file:///project/lib.rs".to_owned(); + let sym_uri = "lip://local/lib.rs#MyStruct".to_owned(); + let symbols = vec![OwnedSymbolInfo { + uri: sym_uri.clone(), + display_name: "MyStruct".into(), + kind: SymbolKind::Class, + documentation: None, + signature: None, + confidence_score: 90, + relationships: vec![], + runtime_p99_ms: None, + call_rate_per_s: None, + taint_labels: vec![], + blast_radius: 0, + is_exported: false, + }]; + let occurrences = vec![OwnedOccurrence { + symbol_uri: sym_uri.clone(), + range: OwnedRange { + start_line: 0, + start_char: 0, + end_line: 0, + end_char: 8, + }, + confidence_score: 90, + role: Role::Definition, + override_doc: None, + }]; + + db.upsert_file_precomputed( + uri.clone(), + "rust".into(), + "hash123".into(), + symbols, + occurrences, + vec![], + ); + + let syms = db.file_symbols(&uri); + assert_eq!(syms.len(), 1); + assert_eq!(syms[0].display_name, "MyStruct"); + + let results = db.workspace_symbols("MyStruct", 10); + assert_eq!( + results.len(), + 1, + "pre-computed symbol must appear in workspace search" + ); + + assert!( + db.symbol_definition_location(&sym_uri).is_some(), + "pre-computed definition must be resolvable" + ); + } + + #[test] + fn precomputed_upsert_is_idempotent() { + let mut db = LipDatabase::new(); + let uri = "file:///project/lib.rs".to_owned(); + let sym = OwnedSymbolInfo { + uri: "lip://local/lib.rs#Foo".into(), + display_name: "Foo".into(), + kind: SymbolKind::Function, + documentation: None, + signature: None, + confidence_score: 90, + relationships: vec![], + runtime_p99_ms: None, + call_rate_per_s: None, + taint_labels: vec![], + blast_radius: 0, + is_exported: false, + }; + let occ = OwnedOccurrence { + symbol_uri: "lip://local/lib.rs#Foo".into(), + range: OwnedRange { + start_line: 0, + start_char: 0, + end_line: 0, + end_char: 3, + }, + confidence_score: 90, + role: Role::Definition, + override_doc: None, + }; + + db.upsert_file_precomputed( + uri.clone(), + "rust".into(), + "hash1".into(), + vec![sym.clone()], + vec![occ.clone()], + vec![], + ); + db.upsert_file_precomputed( + uri.clone(), + "rust".into(), + "hash1".into(), + vec![sym], + vec![occ], + vec![], + ); + + let results = db.workspace_symbols("Foo", 10); + assert_eq!(results.len(), 1, "re-upsert must not duplicate symbols"); + } + + // ── Symbol-level invalidation ──────────────────────────────────────── + + #[test] + fn invalidated_files_for_returns_consumers() { + // File A defines `fn foo()`, File B references `foo`. + // Changing `foo` must invalidate B. + let mut db = LipDatabase::new(); + + // File A: defines foo + let uri_a = "lip://local/a.rs".to_owned(); + let sym_foo = OwnedSymbolInfo::new("lip://local/a.rs#foo", "foo"); + let occ_def = OwnedOccurrence { + symbol_uri: "lip://local/a.rs#foo".into(), + range: OwnedRange { + start_line: 0, + start_char: 0, + end_line: 0, + end_char: 3, + }, + confidence_score: 90, + role: Role::Definition, + override_doc: None, + }; + db.upsert_file_precomputed( + uri_a.clone(), + "rust".into(), + "h1".into(), + vec![sym_foo], + vec![occ_def], + vec![], + ); + + // File B: references foo (defined in A → external) + let uri_b = "lip://local/b.rs".to_owned(); + let occ_ref = OwnedOccurrence { + symbol_uri: "lip://local/a.rs#foo".into(), + range: OwnedRange { + start_line: 0, + start_char: 0, + end_line: 0, + end_char: 3, + }, + confidence_score: 80, + role: Role::Reference, + override_doc: None, + }; + db.upsert_file_precomputed( + uri_b.clone(), + "rust".into(), + "h2".into(), + vec![], + vec![occ_ref], + vec![], + ); + + let invalidated = db.invalidated_files_for(&["lip://local/a.rs#foo".into()]); + assert_eq!(invalidated, vec![uri_b]); + } + + #[test] + fn invalidated_files_for_unreferenced_symbol() { + // File C defines `fn bar()`, no one references it. + // Changing `bar` invalidates nothing. + let mut db = LipDatabase::new(); + + let uri_c = "lip://local/c.rs".to_owned(); + let sym_bar = OwnedSymbolInfo::new("lip://local/c.rs#bar", "bar"); + let occ_def = OwnedOccurrence { + symbol_uri: "lip://local/c.rs#bar".into(), + range: OwnedRange { + start_line: 0, + start_char: 0, + end_line: 0, + end_char: 3, + }, + confidence_score: 90, + role: Role::Definition, + override_doc: None, + }; + db.upsert_file_precomputed( + uri_c.clone(), + "rust".into(), + "h1".into(), + vec![sym_bar], + vec![occ_def], + vec![], + ); + + let invalidated = db.invalidated_files_for(&["lip://local/c.rs#bar".into()]); + assert!( + invalidated.is_empty(), + "unreferenced symbol should invalidate nothing" + ); + } + + #[test] + fn remove_file_clears_consumed_names() { + // After removing a file, its consumed-names entries must be gone, + // so it no longer appears in invalidation results. + let mut db = LipDatabase::new(); + + // File A: defines foo + let uri_a = "lip://local/a.rs".to_owned(); + let sym_foo = OwnedSymbolInfo::new("lip://local/a.rs#foo", "foo"); + let occ_def = OwnedOccurrence { + symbol_uri: "lip://local/a.rs#foo".into(), + range: OwnedRange { + start_line: 0, + start_char: 0, + end_line: 0, + end_char: 3, + }, + confidence_score: 90, + role: Role::Definition, + override_doc: None, + }; + db.upsert_file_precomputed( + uri_a.clone(), + "rust".into(), + "h1".into(), + vec![sym_foo], + vec![occ_def], + vec![], + ); + + // File B: references foo + let uri_b = "lip://local/b.rs".to_owned(); + let occ_ref = OwnedOccurrence { + symbol_uri: "lip://local/a.rs#foo".into(), + range: OwnedRange { + start_line: 0, + start_char: 0, + end_line: 0, + end_char: 3, + }, + confidence_score: 80, + role: Role::Reference, + override_doc: None, + }; + db.upsert_file_precomputed( + uri_b.clone(), + "rust".into(), + "h2".into(), + vec![], + vec![occ_ref], + vec![], + ); + + // Sanity: B is invalidated before removal + assert_eq!( + db.invalidated_files_for(&["lip://local/a.rs#foo".into()]), + vec![uri_b.clone()], + ); + + // Remove B — its consumed-names entry should be cleaned up + db.remove_file(&uri_b); + + let invalidated = db.invalidated_files_for(&["lip://local/a.rs#foo".into()]); + assert!( + invalidated.is_empty(), + "removed file must not appear in invalidation results" + ); + } } diff --git a/bindings/rust/src/query_graph/types.rs b/bindings/rust/src/query_graph/types.rs index d0447f4..a385fa5 100644 --- a/bindings/rust/src/query_graph/types.rs +++ b/bindings/rust/src/query_graph/types.rs @@ -63,6 +63,40 @@ impl ImpactItem { } } +/// How an impact item was discovered. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum ImpactSource { + /// Discovered via static call graph / dependency analysis. + Static, + /// Discovered via embedding similarity (semantic coupling). + Semantic, + /// Confirmed by both static analysis and semantic similarity. + Both, +} + +/// A single entry in a batch blast-radius result. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct EnrichedBlastRadius { + /// The static blast-radius result. + #[serde(flatten)] + pub static_result: BlastRadiusResult, + /// Semantically coupled files/symbols not in the static call graph. + /// Empty when `include_semantic` was false or embeddings are unavailable. + pub semantic_items: Vec, +} + +/// An impact item discovered through embedding similarity rather than +/// static call-graph edges. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SemanticImpactItem { + pub file_uri: String, + pub symbol_uri: String, + /// Cosine similarity in [0.0, 1.0]. + pub similarity: f32, + pub source: ImpactSource, +} + /// A single nearest-neighbor hit returned by `ServerMessage::NearestResult`. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct NearestItem { @@ -70,6 +104,20 @@ pub struct NearestItem { pub uri: String, /// Cosine similarity in [0.0, 1.0] — higher is more similar. pub score: f32, + /// Model that produced the stored embedding for this item. + /// `None` when the item has no embedding or the model is unknown. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub embedding_model: Option, +} + +/// Per-file entry inside [`ServerMessage::BatchFileStatusResult`]. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct FileStatusEntry { + pub uri: String, + pub indexed: bool, + pub has_embedding: bool, + pub age_seconds: Option, + pub embedding_model: Option, } /// A line-range chunk boundary returned by [`ServerMessage::BoundariesResult`]. @@ -194,6 +242,9 @@ pub enum ServerMessage { symbol: Option, }, BlastRadiusResult(BlastRadiusResult), + BlastRadiusBatchResult { + results: Vec, + }, WorkspaceSymbolsResult { symbols: Vec, }, @@ -203,6 +254,13 @@ pub enum ServerMessage { DeadSymbolsResult { symbols: Vec, }, + /// Response to [`ClientMessage::QueryInvalidatedFiles`]. + /// + /// Contains the deduplicated set of file URIs that consume at least one of + /// the changed symbol names and therefore need re-verification. + InvalidatedFilesResult { + file_uris: Vec, + }, AnnotationAck, AnnotationValue { value: Option, @@ -516,6 +574,28 @@ pub enum ServerMessage { #[serde(skip_serializing_if = "Option::is_none")] error: Option, }, + + // ── v2.2 features ──────────────────────────────────────────────────── + /// Response to [`ClientMessage::ReindexStale`]. + ReindexStaleResult { + /// URIs that were re-indexed from disk. + reindexed: Vec, + /// URIs that were within the age threshold and were skipped. + skipped: Vec, + }, + /// Response to [`ClientMessage::BatchFileStatus`]. + BatchFileStatusResult { + entries: Vec, + }, + /// Response to [`ClientMessage::QueryAbiHash`]. + /// + /// The hash is a hex-encoded SHA-256 over the file's exported symbols + /// sorted by URI. A change in hash means the public interface changed. + AbiHashResult { + uri: String, + /// `None` when the file is not in the daemon's index. + hash: Option, + }, } /// Provenance record for a Tier 3 ingestion source (typically a SCIP @@ -668,6 +748,21 @@ pub enum ClientMessage { QueryBlastRadius { symbol_uri: String, }, + /// Batch blast-radius for all symbols defined in the given files. + /// Optionally enriched with embedding-based semantic coupling. + /// Returns `BlastRadiusBatchResult`. + /// + /// When `min_score` is present, semantic enrichment is enabled: + /// each changed file's embedding is compared against the index and + /// neighbours above the threshold are included as `semantic_items`. + /// Omit or set to `null` to skip semantic enrichment. + QueryBlastRadiusBatch { + changed_file_uris: Vec, + /// Minimum cosine similarity for semantic hits (default: 0.6). + /// Presence enables semantic enrichment. + #[serde(default)] + min_score: Option, + }, QueryWorkspaceSymbols { query: String, limit: Option, @@ -678,6 +773,12 @@ pub enum ClientMessage { QueryDeadSymbols { limit: Option, }, + /// Given a list of changed symbol URIs, return the file URIs that consume + /// those symbols and need re-verification (Kotlin IC model). + /// Returns `InvalidatedFilesResult`. + QueryInvalidatedFiles { + changed_symbol_uris: Vec, + }, AnnotationSet { symbol_uri: String, key: String, @@ -1094,6 +1195,33 @@ pub enum ClientMessage { RegisterTier3Source { source: Tier3Source, }, + + // ── v2.2 features ──────────────────────────────────────────────────── + /// Re-index stale files atomically. For each URI, if the file is + /// not indexed or was last indexed more than `max_age_seconds` ago, + /// it is re-read from disk and re-indexed. URIs within the threshold + /// are skipped. Pass `max_age_seconds = 0` to force re-index of all + /// listed URIs regardless of age. Returns `ReindexStaleResult`. + ReindexStale { + uris: Vec, + /// Files older than this threshold are re-indexed. + max_age_seconds: u64, + }, + /// Query the index status of multiple files in a single round-trip. + /// Equivalent to issuing `QueryFileStatus` once per URI inside a + /// `Batch`, but without the overhead of individual messages. + /// Returns `BatchFileStatusResult`. + BatchFileStatus { + uris: Vec, + }, + /// Query the ABI surface hash for a file. The hash is a stable hex + /// string computed over the file's exported symbols sorted by URI, + /// including their signatures and kinds. A change in hash means the + /// public interface changed — useful as a recompilation trigger. + /// Returns `AbiHashResult`. + QueryAbiHash { + uri: String, + }, } impl ClientMessage { @@ -1111,9 +1239,11 @@ impl ClientMessage { "query_references", "query_hover", "query_blast_radius", + "query_blast_radius_batch", "query_workspace_symbols", "query_document_symbols", "query_dead_symbols", + "query_invalidated_files", "annotation_set", "annotation_get", "annotation_list", @@ -1155,6 +1285,9 @@ impl ClientMessage { "embed_text", "stream_context", "register_tier3_source", + "reindex_stale", + "batch_file_status", + "query_abi_hash", ] .iter() .map(|s| (*s).to_owned()) @@ -1180,9 +1313,11 @@ impl ClientMessage { ClientMessage::QueryReferences { .. } => "query_references", ClientMessage::QueryHover { .. } => "query_hover", ClientMessage::QueryBlastRadius { .. } => "query_blast_radius", + ClientMessage::QueryBlastRadiusBatch { .. } => "query_blast_radius_batch", ClientMessage::QueryWorkspaceSymbols { .. } => "query_workspace_symbols", ClientMessage::QueryDocumentSymbols { .. } => "query_document_symbols", ClientMessage::QueryDeadSymbols { .. } => "query_dead_symbols", + ClientMessage::QueryInvalidatedFiles { .. } => "query_invalidated_files", ClientMessage::AnnotationSet { .. } => "annotation_set", ClientMessage::AnnotationGet { .. } => "annotation_get", ClientMessage::AnnotationList { .. } => "annotation_list", @@ -1224,6 +1359,9 @@ impl ClientMessage { ClientMessage::EmbedText { .. } => "embed_text", ClientMessage::StreamContext { .. } => "stream_context", ClientMessage::RegisterTier3Source { .. } => "register_tier3_source", + ClientMessage::ReindexStale { .. } => "reindex_stale", + ClientMessage::BatchFileStatus { .. } => "batch_file_status", + ClientMessage::QueryAbiHash { .. } => "query_abi_hash", } } @@ -1246,6 +1384,7 @@ impl ClientMessage { | ClientMessage::PruneDeleted | ClientMessage::QueryStaleEmbeddings { .. } | ClientMessage::ExplainMatch { .. } + | ClientMessage::ReindexStale { .. } ) } } @@ -1390,6 +1529,146 @@ mod tests { assert!(tier3_sources.is_empty()); } + // ── v2.2 round-trip tests ───────────────────────────────────────── + + #[test] + fn reindex_stale_round_trips() { + let msg = ClientMessage::ReindexStale { + uris: vec!["file:///src/main.rs".into()], + max_age_seconds: 300, + }; + let rt = round_trip_client(&msg); + let ClientMessage::ReindexStale { + uris, + max_age_seconds, + } = rt + else { + panic!("wrong variant"); + }; + assert_eq!(uris, ["file:///src/main.rs"]); + assert_eq!(max_age_seconds, 300); + } + + #[test] + fn reindex_stale_not_batchable() { + assert!(!ClientMessage::ReindexStale { + uris: vec![], + max_age_seconds: 0 + } + .is_batchable()); + } + + #[test] + fn batch_file_status_round_trips() { + let msg = ClientMessage::BatchFileStatus { + uris: vec!["file:///a.rs".into(), "file:///b.rs".into()], + }; + let rt = round_trip_client(&msg); + let ClientMessage::BatchFileStatus { uris } = rt else { + panic!("wrong variant"); + }; + assert_eq!(uris.len(), 2); + } + + #[test] + fn batch_file_status_is_batchable() { + assert!(ClientMessage::BatchFileStatus { uris: vec![] }.is_batchable()); + } + + #[test] + fn query_abi_hash_round_trips() { + let msg = ClientMessage::QueryAbiHash { + uri: "file:///src/lib.rs".into(), + }; + let rt = round_trip_client(&msg); + let ClientMessage::QueryAbiHash { uri } = rt else { + panic!("wrong variant"); + }; + assert_eq!(uri, "file:///src/lib.rs"); + } + + #[test] + fn query_abi_hash_is_batchable() { + assert!(ClientMessage::QueryAbiHash { uri: String::new() }.is_batchable()); + } + + #[test] + fn reindex_stale_result_round_trips() { + let msg = ServerMessage::ReindexStaleResult { + reindexed: vec!["file:///src/a.rs".into()], + skipped: vec!["file:///src/b.rs".into()], + }; + let rt = round_trip_server(&msg); + let ServerMessage::ReindexStaleResult { reindexed, skipped } = rt else { + panic!("wrong variant"); + }; + assert_eq!(reindexed, ["file:///src/a.rs"]); + assert_eq!(skipped, ["file:///src/b.rs"]); + } + + #[test] + fn batch_file_status_result_round_trips() { + let msg = ServerMessage::BatchFileStatusResult { + entries: vec![FileStatusEntry { + uri: "file:///src/main.rs".into(), + indexed: true, + has_embedding: false, + age_seconds: Some(42), + embedding_model: None, + }], + }; + let rt = round_trip_server(&msg); + let ServerMessage::BatchFileStatusResult { entries } = rt else { + panic!("wrong variant"); + }; + assert_eq!(entries.len(), 1); + assert!(entries[0].indexed); + assert_eq!(entries[0].age_seconds, Some(42)); + } + + #[test] + fn abi_hash_result_round_trips() { + let msg = ServerMessage::AbiHashResult { + uri: "file:///src/lib.rs".into(), + hash: Some("deadbeef".into()), + }; + let rt = round_trip_server(&msg); + let ServerMessage::AbiHashResult { uri, hash } = rt else { + panic!("wrong variant"); + }; + assert_eq!(uri, "file:///src/lib.rs"); + assert_eq!(hash.as_deref(), Some("deadbeef")); + } + + #[test] + fn nearest_item_embedding_model_round_trips() { + let msg = ServerMessage::NearestResult { + results: vec![NearestItem { + uri: "file:///src/auth.rs".into(), + score: 0.95, + embedding_model: Some("text-embedding-3-small".into()), + }], + }; + let rt = round_trip_server(&msg); + let ServerMessage::NearestResult { results } = rt else { + panic!("wrong variant"); + }; + assert_eq!( + results[0].embedding_model.as_deref(), + Some("text-embedding-3-small") + ); + } + + #[test] + fn nearest_item_missing_embedding_model_deserializes_as_none() { + let json = r#"{"type":"nearest_result","results":[{"uri":"file:///a.rs","score":0.9}]}"#; + let msg: ServerMessage = serde_json::from_str(json).unwrap(); + let ServerMessage::NearestResult { results } = msg else { + panic!("wrong variant"); + }; + assert!(results[0].embedding_model.is_none()); + } + /// Drift guard: every tag produced by [`ClientMessage::variant_tag`] /// must also appear in [`ClientMessage::supported_messages`], and /// the two lists must be the same size. Combined with the @@ -1439,12 +1718,19 @@ mod tests { ClientMessage::QueryBlastRadius { symbol_uri: String::new(), }, + ClientMessage::QueryBlastRadiusBatch { + changed_file_uris: vec![], + min_score: None, + }, ClientMessage::QueryWorkspaceSymbols { query: String::new(), limit: None, }, ClientMessage::QueryDocumentSymbols { uri: String::new() }, ClientMessage::QueryDeadSymbols { limit: None }, + ClientMessage::QueryInvalidatedFiles { + changed_symbol_uris: vec![], + }, ClientMessage::AnnotationSet { symbol_uri: String::new(), key: String::new(), @@ -1614,6 +1900,12 @@ mod tests { imported_at_ms: 0, }, }, + ClientMessage::ReindexStale { + uris: vec![], + max_age_seconds: 0, + }, + ClientMessage::BatchFileStatus { uris: vec![] }, + ClientMessage::QueryAbiHash { uri: String::new() }, ]; let supported = ClientMessage::supported_messages(); @@ -1756,6 +2048,7 @@ mod tests { vec![NearestItem { uri: "file:///a.rs".into(), score: 0.9, + embedding_model: None, }], vec![], ], @@ -1942,6 +2235,7 @@ mod tests { outliers: vec![NearestItem { uri: "file:///src/billing.go".into(), score: 0.12, + embedding_model: None, }], }; let rt = round_trip_server(&msg); @@ -2148,6 +2442,7 @@ mod tests { moving_toward: vec![NearestItem { uri: "file:///src/auth.rs".into(), score: 0.91, + embedding_model: None, }], }; let rt = round_trip_server(&msg); diff --git a/bindings/rust/tests/integration.rs b/bindings/rust/tests/integration.rs index 5285bce..19a1e8d 100644 --- a/bindings/rust/tests/integration.rs +++ b/bindings/rust/tests/integration.rs @@ -6,7 +6,10 @@ use tokio::net::UnixStream; use lip_core::daemon::LipDaemon; use lip_core::query_graph::{ClientMessage, ErrorCode, ServerMessage}; -use lip_core::schema::{Action, IndexingState, OwnedDocument}; +use lip_core::schema::{ + Action, IndexingState, OwnedDocument, OwnedOccurrence, OwnedRange, OwnedSymbolInfo, Role, + SymbolKind, +}; // ─── Framing helpers (client side) ─────────────────────────────────────────── @@ -886,3 +889,145 @@ async fn unknown_variant_returns_unknown_message_and_keeps_connection() { task.abort(); let _ = task.await; } + +// ─── SCIP import: pre-computed symbols via Delta ───────────────────────────── + +/// Regression test for the SCIP import path. When a client sends a Delta with +/// `source_text: None` and pre-computed `symbols` + `occurrences`, the daemon +/// must store them verbatim (via `upsert_file_precomputed`) and make them +/// queryable through both `WorkspaceSymbols` and `QueryDefinition`. +#[tokio::test] +async fn scip_import_precomputed_symbols_searchable() { + let dir = tempfile::tempdir().expect("tempdir"); + let socket_path = dir.path().join("lip_scip.sock"); + + let daemon = LipDaemon::new(&socket_path); + let task = tokio::spawn(async move { daemon.run().await.ok() }); + tokio::time::sleep(Duration::from_millis(20)).await; + + let mut client = UnixStream::connect(&socket_path).await.expect("connect"); + + // ── Build a pre-computed document (SCIP-style: no source_text) ─────────── + let uri = "lip://local/dep@1.0/scip_mod.rs"; + let symbol_uri = format!("{uri}#ScipWidget"); + + let doc = OwnedDocument { + uri: uri.to_owned(), + content_hash: "cafebabe01234567".to_owned(), + language: "rust".to_owned(), + symbols: vec![OwnedSymbolInfo { + uri: symbol_uri.clone(), + display_name: "ScipWidget".to_owned(), + kind: SymbolKind::Class, + documentation: Some("A widget from SCIP import.".to_owned()), + signature: Some("pub struct ScipWidget".to_owned()), + confidence_score: 100, + relationships: vec![], + runtime_p99_ms: None, + call_rate_per_s: None, + taint_labels: vec![], + blast_radius: 0, + is_exported: true, + }], + occurrences: vec![OwnedOccurrence { + symbol_uri: symbol_uri.clone(), + range: OwnedRange { + start_line: 0, + start_char: 11, + end_line: 0, + end_char: 21, + }, + confidence_score: 100, + role: Role::Definition, + override_doc: None, + }], + merkle_path: uri.to_owned(), + edges: vec![], + source_text: None, // <-- key: SCIP imports have no source + }; + + // ── Send the Delta ─────────────────────────────────────────────────────── + send( + &mut client, + &ClientMessage::Delta { + seq: 100, + action: Action::Upsert, + document: doc, + }, + ) + .await + .expect("send scip delta"); + + let resp = recv(&mut client).await.expect("recv scip delta ack"); + match resp { + ServerMessage::DeltaAck { seq, accepted, .. } => { + assert_eq!(seq, 100); + assert!(accepted, "daemon rejected pre-computed delta"); + } + other => panic!("expected DeltaAck, got {other:?}"), + } + + // ── WorkspaceSymbols: the pre-computed symbol must be discoverable ─────── + send( + &mut client, + &ClientMessage::QueryWorkspaceSymbols { + query: "ScipWidget".to_owned(), + limit: Some(10), + }, + ) + .await + .expect("send workspace symbols query"); + + let resp = recv(&mut client).await.expect("recv workspace symbols"); + match resp { + ServerMessage::WorkspaceSymbolsResult { symbols } => { + assert!( + !symbols.is_empty(), + "expected ScipWidget in workspace symbols, got none" + ); + assert!( + symbols.iter().any(|s| s.display_name == "ScipWidget"), + "ScipWidget not found in results: {symbols:?}" + ); + } + other => panic!("expected WorkspaceSymbolsResult, got {other:?}"), + } + + // ── QueryDefinition: the Definition-role occurrence must resolve ───────── + send( + &mut client, + &ClientMessage::QueryDefinition { + uri: uri.to_owned(), + line: 0, + col: 15, // inside the occurrence range [11..21] + }, + ) + .await + .expect("send query definition"); + + let resp = recv(&mut client).await.expect("recv definition result"); + match resp { + ServerMessage::DefinitionResult { + symbol, + location_uri, + .. + } => { + assert!( + symbol.is_some(), + "expected symbol info for ScipWidget, got None" + ); + let sym = symbol.unwrap(); + assert_eq!(sym.display_name, "ScipWidget"); + assert_eq!( + location_uri.as_deref(), + Some(uri), + "definition should resolve to the same file" + ); + } + other => panic!("expected DefinitionResult, got {other:?}"), + } + + // ── Cleanup ────────────────────────────────────────────────────────────── + task.abort(); + let _ = task.await; +} diff --git a/docs/LIP_SPEC.mdx b/docs/LIP_SPEC.mdx index 3ecbc18..b36707d 100644 --- a/docs/LIP_SPEC.mdx +++ b/docs/LIP_SPEC.mdx @@ -662,6 +662,7 @@ lip.query.definition(uri: string, position: Range) → SymbolInfo lip.query.references(symbol_uri: string, limit?: int) → [Occurrence] lip.query.hover(uri: string, position: Range) → HoverResult lip.query.blast_radius(symbol_uri: string) → BlastRadiusResult +lip.query.blast_radius_batch(changed_file_uris: [string], min_score?: f32) → BlastRadiusBatchResult lip.query.subgraph(symbol_uri: string, depth: int) → SymbolGraph lip.query.taint(symbol_uri: string) → [TaintPath] lip.query.workspace_symbols(query: string, limit?: int) → [SymbolInfo] @@ -758,6 +759,48 @@ lip.query.blast_radius(symbol_uri) → { Available as a pre-commit hook: "Changing this interface will affect 47 call sites across 12 files and 3 microservices." +#### 8.1.1 Batch blast radius with semantic enrichment + +``` +lip.query.blast_radius_batch(changed_file_uris, min_score?) → { + results: [{ + symbol_uri: string, + direct_dependents: int, + transitive_dependents: int, + affected_files: [string], + direct_items: [ImpactItem], + transitive_items: [ImpactItem], + risk_level: "low" | "medium" | "high", + truncated: bool, + semantic_items: [SemanticItem], // only when min_score is set + }] +} +``` + +Accepts changed **files** and resolves exported symbols server-side — one round-trip +regardless of symbol count. Only symbols with kind `Function`, `Method`, `Class`, +`Interface`, `Constructor`, or `Macro` are resolved; variables, constants, and +parameters are excluded to avoid framework-wiring noise. + +When `min_score` is present, the daemon runs each changed file's embedding against +the file embedding index and includes neighbours above the threshold as +`semantic_items`. Each semantic item carries a `source` field: + +- **`"semantic"`** — found only via embedding similarity, not in the structural + call graph. Catches dynamic dispatch, macro-expanded call sites, and template + instantiation chains that SCIP's static graph cannot resolve. +- **`"both"`** — found in both the structural graph and the embedding search, + confirming a structural edge with semantic evidence. + +**Embedding scope:** Semantic enrichment currently operates at file-level granularity +(searching `file_embeddings`). Per-function chunked embeddings are not yet integrated +into this path; when they are, the `symbol_uri` field in `SemanticItem` (currently +empty) will carry the matched symbol, with no wire format change required. + +**Consumer contract (CKB):** `UniqueCallerCount` (which drives review thresholds) +stays structural-only. Semantic callers are additive — they inform the human reviewer +("8 callers (+3 semantically coupled)") but do not inflate risk scores. + ### 8.2 Taint tracking Symbols can be annotated with `taint_labels` (e.g. `["PII", "UNSAFE_IO"]`). LIP diff --git a/tools/lip-cli/Cargo.toml b/tools/lip-cli/Cargo.toml index 13fa242..fe17f30 100644 --- a/tools/lip-cli/Cargo.toml +++ b/tools/lip-cli/Cargo.toml @@ -18,7 +18,7 @@ name = "lip" path = "src/main.rs" [dependencies] -lip = { package = "lip-core", path = "../../bindings/rust", version = "2.1.1" } +lip = { package = "lip-core", path = "../../bindings/rust", version = "2.2.0" } clap = { version = "4", features = ["derive", "env"] } tokio = { version = "1", features = ["full"] } tower-lsp = "0.20" diff --git a/tools/lip-cli/src/cmd/export.rs b/tools/lip-cli/src/cmd/export.rs index 2c952b9..dd1e453 100644 --- a/tools/lip-cli/src/cmd/export.rs +++ b/tools/lip-cli/src/cmd/export.rs @@ -91,9 +91,11 @@ fn convert_document(doc: OwnedDocument) -> scip::Document { let occurrences: Vec = doc.occurrences.iter().map(convert_occurrence).collect(); - // Note: scip::Document has no `text` field in the generated proto; source - // text is not part of the SCIP wire format at this schema version. - let _ = doc.source_text; // present in LIP, absent in SCIP + // SCIP has no representation for source text or CPG edges; both are + // LIP-only. A SCIP round-trip (import → export) is therefore lossy for + // call-graph / blast-radius data. + let _ = doc.source_text; + let _ = doc.edges; scip::Document { language: doc.language, relative_path: uri_to_relative_path(&doc.uri), @@ -111,7 +113,7 @@ fn convert_symbol_info(sym: &OwnedSymbolInfo) -> scip::SymbolInformation { is_reference: r.is_reference, is_implementation: r.is_implementation, is_type_definition: r.is_type_definition, - is_override: r.is_override, + is_definition: r.is_override, }) .collect(); diff --git a/tools/lip-cli/src/proto/scip.proto b/tools/lip-cli/src/proto/scip.proto index 0106348..ef77107 100644 --- a/tools/lip-cli/src/proto/scip.proto +++ b/tools/lip-cli/src/proto/scip.proto @@ -131,7 +131,7 @@ message Relationship { bool is_reference = 2; bool is_implementation = 3; bool is_type_definition = 4; - bool is_override = 5; + bool is_definition = 5; } message Occurrence { diff --git a/tools/lip-registry/Cargo.toml b/tools/lip-registry/Cargo.toml index ba4c695..72c5219 100644 --- a/tools/lip-registry/Cargo.toml +++ b/tools/lip-registry/Cargo.toml @@ -18,7 +18,7 @@ name = "lip-registry" path = "src/main.rs" [dependencies] -lip = { package = "lip-core", path = "../../bindings/rust", version = "2.1.1" } +lip = { package = "lip-core", path = "../../bindings/rust", version = "2.2.0" } axum = "0.7" tokio = { version = "1", features = ["full"] } tower-http = { version = "0.5", features = ["fs", "trace"] } diff --git a/website/src/pages/docs/daemon.mdx b/website/src/pages/docs/daemon.mdx index bf6e527..de908c1 100644 --- a/website/src/pages/docs/daemon.mdx +++ b/website/src/pages/docs/daemon.mdx @@ -93,6 +93,7 @@ Each connection handles one request/response pair. The protocol is synchronous p | `QueryReferences` | Find all references to a symbol URI | | `QueryHover` | Hover info at (uri, line, col) | | `QueryBlastRadius` | Blast radius for a symbol URI | +| `QueryBlastRadiusBatch` | Batch blast radius for changed files, with optional semantic enrichment (§8.1.1) | | `QueryWorkspaceSymbols` | Search symbols by name | | `QueryDocumentSymbols` | List symbols in a file | | `QueryDeadSymbols` | Find unreferenced symbols |