Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
154 changes: 154 additions & 0 deletions .github/workflows/scip-import.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
name: SCIP Import

on:
workflow_call:
inputs:
scip-tool:
description: "SCIP indexer to run"
required: true
type: string # rust | typescript | python
scip-file-path:
description: "Path to the SCIP index file"
required: false
type: string
default: ".scip/index.scip"
daemon-socket:
description: "Unix socket path for the LIP daemon"
required: false
type: string
default: "/tmp/lip-ci.sock"
confidence:
description: "Confidence score for imported symbols (1-100)"
required: false
type: number
default: 100

workflow_dispatch:
inputs:
scip-tool:
description: "SCIP indexer to run"
required: true
type: choice
options:
- rust
- typescript
- python
scip-file-path:
description: "Path to the SCIP index file"
required: false
type: string
default: ".scip/index.scip"
daemon-socket:
description: "Unix socket path for the LIP daemon"
required: false
type: string
default: "/tmp/lip-ci.sock"
confidence:
description: "Confidence score for imported symbols (1-100)"
required: false
type: number
default: 100

env:
CARGO_TERM_COLOR: always
CARGO_INCREMENTAL: 0

jobs:
scip-import:
name: SCIP Import (${{ inputs.scip-tool }})
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

# ── Install lip CLI ──────────────────────────────────────────────────────
- uses: actions/cache@v4
id: lip-cache
with:
path: ~/.cargo/bin/lip
key: lip-cli-${{ runner.os }}-${{ hashFiles('.github/workflows/scip-import.yml') }}

- name: Install lip CLI
if: steps.lip-cache.outputs.cache-hit != 'true'
run: cargo install lip-cli --locked

# ── Start LIP daemon ─────────────────────────────────────────────────────
- name: Start LIP daemon
run: |
lip daemon --socket ${{ inputs.daemon-socket }} &
DAEMON_PID=$!
echo "DAEMON_PID=$DAEMON_PID" >> "$GITHUB_ENV"
# Wait for socket to appear
for i in $(seq 1 30); do
[ -S "${{ inputs.daemon-socket }}" ] && break
sleep 0.2
done
if [ ! -S "${{ inputs.daemon-socket }}" ]; then
echo "::error::Daemon socket did not appear within 6s"
exit 1
fi

# ── Install and run SCIP indexer ─────────────────────────────────────────
- name: Install Rust toolchain
if: inputs.scip-tool == 'rust'
uses: dtolnay/rust-toolchain@stable
with:
components: rust-analyzer

- name: Run scip-rust indexer
if: inputs.scip-tool == 'rust'
run: |
mkdir -p "$(dirname '${{ inputs.scip-file-path }}')"
rust-analyzer scip .
# rust-analyzer writes to index.scip in cwd; move if needed
if [ "${{ inputs.scip-file-path }}" != "index.scip" ] && [ -f index.scip ]; then
mv index.scip "${{ inputs.scip-file-path }}"
fi

- name: Setup Node.js
if: inputs.scip-tool == 'typescript'
uses: actions/setup-node@v4
with:
node-version: "20"

- name: Run scip-typescript indexer
if: inputs.scip-tool == 'typescript'
run: |
npm install -g @sourcegraph/scip-typescript
mkdir -p "$(dirname '${{ inputs.scip-file-path }}')"
scip-typescript index --output "${{ inputs.scip-file-path }}"

- name: Setup Python
if: inputs.scip-tool == 'python'
uses: actions/setup-python@v5
with:
python-version: "3.12"

- name: Run scip-python indexer
if: inputs.scip-tool == 'python'
run: |
pip install scip-python
mkdir -p "$(dirname '${{ inputs.scip-file-path }}')"
scip-python index --output "${{ inputs.scip-file-path }}"

# ── Import into daemon ───────────────────────────────────────────────────
- name: Import SCIP index into LIP daemon
run: |
lip import \
--from-scip "${{ inputs.scip-file-path }}" \
--push-to-daemon "${{ inputs.daemon-socket }}" \
--confidence ${{ inputs.confidence }}

# ── Verify import ────────────────────────────────────────────────────────
- name: Verify index status
run: |
echo '[{"type":"query_index_status"}]' | \
lip query --socket "${{ inputs.daemon-socket }}" batch

# ── Cleanup ──────────────────────────────────────────────────────────────
- name: Stop daemon
if: always()
run: |
if [ -n "$DAEMON_PID" ]; then
kill "$DAEMON_PID" 2>/dev/null || true
wait "$DAEMON_PID" 2>/dev/null || true
fi
33 changes: 33 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,41 @@ All notable changes to this project are documented here.

## [Unreleased]

---

## [2.2.0] — 2026-04-21

### Added

- **`NearestItem.embedding_model`** — every nearest-neighbour hit now carries the model name that produced its stored embedding. Field is optional / `skip_serializing_if = None`; older clients see no change. Populated by `nearest_by_vector`, `nearest_symbol_by_vector`, and `outliers`. Useful for debugging mixed-model indexes and confirming which model was used for a specific result.

- **Function-level blast radius** (`QueryBlastRadiusBatch`) — semantic enrichment now uses per-symbol embeddings when available. If `EmbeddingBatch` has been called with `lip://` URIs (function-level chunks), `semantic_items[].symbol_uri` is populated and results are at function granularity. Falls back to file-level embeddings when no symbol embeddings exist, so the upgrade is transparent.

- **`ReindexStale`** — atomic "reindex if stale" operation. Accepts `uris` and `max_age_seconds`; re-reads from disk only the URIs that are not indexed or whose last-indexed timestamp exceeds the threshold. Returns `ReindexStaleResult { reindexed, skipped }`. Pass `max_age_seconds = 0` to force unconditional reindex. Replaces the manual `QueryFileStatus` → `ReindexFiles` race.

- **`BatchFileStatus`** — query index status for multiple files in one round-trip. Equivalent to issuing `QueryFileStatus` inside a `Batch`, but without message-per-file overhead. Batchable. Returns `BatchFileStatusResult { entries: Vec<FileStatusEntry> }`.

- **`QueryAbiHash`** — stable hex hash (SHA-256) over a file's exported API surface (exported symbol URIs + kinds + signatures, sorted). A change in hash means the public interface changed — safe as a downstream recompilation or re-verification trigger (Kotlin IC model). Returns `AbiHashResult { uri, hash: Option<String> }`. Batchable.

- **Tier 1.5 Datalog inference** — `LipDatabase::run_tier1_5_inference()` runs a fixed-point inference loop applying two rules: (1) if every direct caller of a symbol is at confidence ≥ 80 (Tier 2 / SCIP quality), raise the callee to confidence 65; (2) exported symbols with no local callers are raised by 5 points (capped at 65). Never lowers confidence; never exceeds the Tier 1.5 ceiling, leaving headroom for Tier 2.

- **Tier 2 backoff recovery** — language server backends now recover from transient crashes with exponential backoff (2–300 s, up to 8 failures) instead of being permanently disabled for the session lifetime. `disabled_*` flags are kept for hard failures (binary not installed). A `BackoffState` struct tracks `failure_count` and `available_after` per backend. Tests: `backoff_fresh_is_available`, `backoff_fail_makes_unavailable`, `backoff_reset_clears_state`, `backoff_permanent_after_8_failures`, `backoff_not_permanent_before_8_failures`.

- **`FileStatusEntry`** — new public struct carrying the same fields as `FileStatusResult` but suitable for use inside `BatchFileStatusResult`.

- **`QueryBlastRadiusBatch`** — batch blast radius for all exported symbols in changed files, with optional semantic enrichment via file embeddings. Accepts `changed_file_uris` and optional `min_score` threshold. Resolves symbols server-side (filtered to Function, Method, Class, Interface, Constructor, Macro), runs structural BFS per symbol, and when `min_score` is set, augments results with cosine-similarity neighbours from the file embedding index. Each semantic hit carries a `source` field (`"semantic"` or `"both"`) so consumers can distinguish certainty tiers. Spec §8.1.1.
- **`QueryInvalidatedFiles`** — name-based dependency tracking query. Given a set of changed symbol URIs, returns file URIs that consumed those names externally (Kotlin-IC inspired). Enables symbol-level re-verification without full reindex.
- **`JournalEntry::UpsertFilePrecomputed`** — journal variant that persists pre-computed symbols, occurrences, and CPG edges from SCIP imports. Fixes data loss on daemon restart for SCIP-imported files.

### Fixed

- **SCIP proto field numbers** — `SymbolInformation.relationships` (2→4), `kind` (4→5), `display_name` (5→6) aligned with upstream SCIP. Fixes protobuf decode crash (`LengthDelimited where Varint expected`) when importing any index produced by a spec-compliant SCIP emitter.
- **SCIP proto `Relationship.is_override`** → `is_definition` to match upstream field 5 semantics.
- **SCIP import pre-computed symbol persistence** — Delta handler now routes pre-computed documents through `upsert_file_precomputed`, populating sym_cache, occ_cache, def_index, name_to_symbols, and call-edge indexes. Previously, SCIP-imported symbols were silently dropped.
- **Journal replay for SCIP imports** — pre-computed symbols now survive daemon restart via `UpsertFilePrecomputed` journal entry.
- **Merkle stale_files** — uses stored `content_hash` instead of hashing empty text for pre-computed files. Fixes infinite re-sync loop.
- **file_source_text** — falls back to disk read for precomputed `file://` URIs. Fixes stream_context, embeddings, and explain-match for SCIP-imported files.

- **`EndStreamReason::CursorOutOfRange`** and **`EndStreamReason::FileNotIndexed`** — split the previously-conflated `Error + "cursor_out_of_range"` emission into two typed reasons. Before, a cursor past EOF and a URI the daemon had never indexed both surfaced as `reason: error, error: "cursor_out_of_range"`; clients could not distinguish "user gave bad coordinates" from "daemon has nothing for this path." Now:
- `CursorOutOfRange` — the file is indexed but the cursor line is outside its range. Error message reports the actual line count.
- `FileNotIndexed` — the daemon has no record of the URI. Error message names the URI. Callers should upsert or reindex, then retry.
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ members = [
]

[workspace.package]
version = "2.1.1"
version = "2.2.0"
edition = "2021"
rust-version = "1.78"
authors = ["Lisa Welsch <lisa@nyxcore.cloud>"]
Expand Down
126 changes: 123 additions & 3 deletions bindings/rust/src/daemon/journal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ use serde::{Deserialize, Serialize};
use tracing::warn;

use crate::query_graph::LipDatabase;
use crate::schema::OwnedAnnotationEntry;
use crate::schema::{OwnedAnnotationEntry, OwnedGraphEdge, OwnedOccurrence, OwnedSymbolInfo};

/// Compact the journal when it has accumulated this many entries.
/// Below this threshold the overhead of compaction isn't worth it.
Expand All @@ -49,6 +49,14 @@ pub enum JournalEntry {
text: String,
language: String,
},
UpsertFilePrecomputed {
uri: String,
language: String,
content_hash: String,
symbols: Vec<OwnedSymbolInfo>,
occurrences: Vec<OwnedOccurrence>,
edges: Vec<OwnedGraphEdge>,
},
RemoveFile {
uri: String,
},
Expand Down Expand Up @@ -168,9 +176,25 @@ pub fn compact(path: &Path, db: &LipDatabase) -> anyhow::Result<usize> {
})?;
}

// One UpsertFile per tracked file.
// One UpsertFile (or UpsertFilePrecomputed) per tracked file.
for uri in db.tracked_uris() {
if let (Some(text), Some(lang)) = (db.file_text(&uri), db.file_language(&uri)) {
let Some(lang) = db.file_language(&uri) else {
continue;
};
if db.is_precomputed(&uri) {
let content_hash = db.file_content_hash(&uri).unwrap_or_default().to_owned();
let symbols = db.cached_symbols(&uri).as_ref().clone();
let occurrences = db.cached_occurrences(&uri).as_ref().clone();
let edges = db.file_call_edges_raw(&uri);
write_entry(&JournalEntry::UpsertFilePrecomputed {
uri,
language: lang.to_owned(),
content_hash,
symbols,
occurrences,
edges,
})?;
} else if let Some(text) = db.file_text(&uri) {
write_entry(&JournalEntry::UpsertFile {
uri,
text: text.to_owned(),
Expand Down Expand Up @@ -208,6 +232,23 @@ pub fn replay(entries: &[JournalEntry], db: &mut LipDatabase) {
} => {
db.upsert_file(uri.clone(), text.clone(), language.clone());
}
JournalEntry::UpsertFilePrecomputed {
uri,
language,
content_hash,
symbols,
occurrences,
edges,
} => {
db.upsert_file_precomputed(
uri.clone(),
language.clone(),
content_hash.clone(),
symbols.clone(),
occurrences.clone(),
edges.clone(),
);
}
JournalEntry::RemoveFile { uri } => {
db.remove_file(uri);
}
Expand Down Expand Up @@ -355,6 +396,85 @@ mod tests {
assert_eq!(db2.current_merkle_root(), Some("abc"));
}

#[test]
fn precomputed_survives_compact_replay() {
use crate::schema::{OwnedOccurrence, OwnedRange, OwnedSymbolInfo, Role, SymbolKind};

let tmp = NamedTempFile::new().unwrap();
let path = tmp.path().to_owned();

let sym = OwnedSymbolInfo {
uri: "lip://local/lib.rs#Foo".into(),
display_name: "Foo".into(),
kind: SymbolKind::Function,
documentation: None,
signature: None,
confidence_score: 90,
relationships: vec![],
runtime_p99_ms: None,
call_rate_per_s: None,
taint_labels: vec![],
blast_radius: 0,
is_exported: false,
};
let occ = OwnedOccurrence {
symbol_uri: "lip://local/lib.rs#Foo".into(),
range: OwnedRange {
start_line: 0,
start_char: 0,
end_line: 0,
end_char: 3,
},
confidence_score: 90,
role: Role::Definition,
override_doc: None,
};

// Write a precomputed entry.
let (mut j, _) = Journal::open(&path).unwrap();
j.append(&JournalEntry::UpsertFilePrecomputed {
uri: "file:///project/lib.rs".into(),
language: "rust".into(),
content_hash: "abc123".into(),
symbols: vec![sym],
occurrences: vec![occ],
edges: vec![],
})
.unwrap();
drop(j);

// Replay into db1.
let (_, entries) = Journal::open(&path).unwrap();
let mut db1 = LipDatabase::new();
replay(&entries, &mut db1);
assert_eq!(db1.file_count(), 1);
assert!(db1.is_precomputed("file:///project/lib.rs"));
let syms = db1.file_symbols("file:///project/lib.rs");
assert_eq!(syms.len(), 1, "precomputed symbol must survive replay");

// Compact and replay into db2.
compact(&path, &db1).unwrap();
let (_, compacted) = Journal::open(&path).unwrap();
let mut db2 = LipDatabase::new();
replay(&compacted, &mut db2);
assert_eq!(db2.file_count(), 1);
assert!(db2.is_precomputed("file:///project/lib.rs"));
let syms2 = db2.file_symbols("file:///project/lib.rs");
assert_eq!(
syms2.len(),
1,
"precomputed symbol must survive compact + replay"
);
assert_eq!(syms2[0].display_name, "Foo");

let results = db2.workspace_symbols("Foo", 10);
assert_eq!(
results.len(),
1,
"precomputed symbol must be searchable after compact + replay"
);
}

#[test]
fn open_append_creates_file_if_absent() {
let dir = tempfile::tempdir().unwrap();
Expand Down
Loading
Loading