From 2e42bdcf45a9f01cd81730c1b8cb1de578a6fda6 Mon Sep 17 00:00:00 2001 From: userFRM Date: Tue, 14 Apr 2026 12:09:47 +0200 Subject: [PATCH 01/19] fix: recommend sub-agent dispatch for large-scope lifting (v0.8.3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Claude Code (and other MCP clients) were processing 1500-entity lifting jobs directly in the foreground because the server's NEXT STEP guidance said "Call get_entities_for_lifting(scope='*') to start lifting" with no awareness of scale. Each batch returns ~12K tokens of source — 150 batches burns ~1.8M tokens before the agent can do anything else. The guidance now branches on remaining count: - <100 entities: lift directly (same as before). - >=100 entities: NEXT STEP describes the delegation pattern and gives a Claude Code Task() example, while calling out that Gemini CLI, Codex, Cursor, opencode, Windsurf etc. have their own equivalents. get_entities_for_lifting also emits a dispatch note on batch 0 when >=10 batches are queued, in case the caller skipped lifting_status. Runtime-agnostic phrasing throughout — the server doesn't know if the connected agent is Claude, Gemini, GPT, or anything else. It describes the pattern ("delegate to a sub-agent or cheaper model") and provides Claude's Task()+Haiku syntax as one example among several runtimes. server_instructions.md large-scope guidance also simplified from "parallel subagents per area" to "one sub-agent drains it" — same result, simpler caller reasoning. Bumps to v0.8.3. 651 tests pass. Co-Authored-By: Claude Opus 4.6 (1M context) --- .gemini/extensions/rpg/gemini-extension.json | 2 +- CHANGELOG.md | 22 ++++++++ Cargo.toml | 2 +- .../src/prompts/server_instructions.md | 50 +++++++++++++------ crates/rpg-mcp/src/server.rs | 28 ++++++++++- crates/rpg-mcp/src/tools.rs | 18 +++++++ npm/package.json | 2 +- server.json | 4 +- 8 files changed, 105 insertions(+), 23 deletions(-) diff --git a/.gemini/extensions/rpg/gemini-extension.json b/.gemini/extensions/rpg/gemini-extension.json index 6d05a36..fb5093e 100644 --- a/.gemini/extensions/rpg/gemini-extension.json +++ b/.gemini/extensions/rpg/gemini-extension.json @@ -1,6 +1,6 @@ { "name": "rpg-encoder", - "version": "0.8.2", + "version": "0.8.3", "description": "Build and query semantic code graphs (Repository Planning Graphs) for AI-assisted code understanding. Provides entity search, dependency exploration, and autonomous LLM-driven semantic lifting.", "mcpServers": { "rpg-encoder": { diff --git a/CHANGELOG.md b/CHANGELOG.md index 7d9c6a0..3c3ed2b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,28 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/), and this project adheres to [Semantic Versioning](https://semver.org/). +## [0.8.3] - 2026-04-14 + +### Changed + +- **`lifting_status` NEXT STEP now recommends sub-agent dispatch when the + remaining entity count is ≥100.** The previous guidance was to call + `get_entities_for_lifting(scope="*")` directly in the foreground regardless + of size — which silently burns ~12K tokens per batch of the caller's + context window. On a repo with 1500 unlifted entities that's ~150 batches + = ~1.8M tokens of grunt work before any real help begins. +- **`get_entities_for_lifting` batch-0 response now includes a dispatch note + when 10+ batches are queued**, in case the caller skipped `lifting_status` + and jumped straight to pulling batches. +- **Guidance is runtime-agnostic.** Mentions Claude Code's `Task(model="haiku")` + as one example but calls out Gemini CLI, Codex, Cursor, opencode, and + Windsurf as needing their own equivalent dispatch mechanism. Falls back to + `rpg-encoder lift --provider anthropic` (CLI, uses external API key) when + the runtime has no sub-agent concept at all. +- `server_instructions.md` large-scope guidance simplified from "parallel + subagents per area" to "one sub-agent drains it" — same result, simpler + caller reasoning. + ## [0.8.2] - 2026-04-14 ### Added diff --git a/Cargo.toml b/Cargo.toml index 209878e..2e12004 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,7 +11,7 @@ members = [ ] [workspace.package] -version = "0.8.2" +version = "0.8.3" edition = "2024" license = "MIT" authors = ["userFRM"] diff --git a/crates/rpg-mcp/src/prompts/server_instructions.md b/crates/rpg-mcp/src/prompts/server_instructions.md index 6375bcc..1674a96 100644 --- a/crates/rpg-mcp/src/prompts/server_instructions.md +++ b/crates/rpg-mcp/src/prompts/server_instructions.md @@ -82,22 +82,40 @@ Process all batches in the current conversation: - Call `finalize_lifting` then `get_files_for_synthesis` + `submit_file_syntheses`. - Call `build_semantic_hierarchy` + `submit_hierarchy`. -### Large scope (100+ entities): Dispatch parallel subagents -A single conversation CANNOT lift a large repo — context will overflow. -Instead, split the work across fresh subagent conversations: - -1. Call `lifting_status` to see per-area coverage and unlifted files. -2. For each area, dispatch a **foreground** subagent (Task tool, subagent_type="general-purpose"): - - Each subagent scope: a file glob like `"crates/rpg-core/**"` or `"src/auth/**"` - - Each subagent runs: get_entities_for_lifting -> analyze -> submit_lift_results (loop) - - Each subagent gets a FRESH context window — no accumulation across areas -3. After all subagents complete, call `lifting_status` to verify coverage. -4. Call `finalize_lifting` then `get_files_for_synthesis` + `submit_file_syntheses`. -5. Call `build_semantic_hierarchy` + `submit_hierarchy`. - -**Why subagents?** Each `get_entities_for_lifting` batch returns source code that stays in the -conversation. After ~300 entities, the context fills up and the chat breaks. Subagents solve -this by giving each chunk its own fresh context window. +### Large scope (100+ entities): Delegate — DO NOT LIFT DIRECTLY + +A single conversation CANNOT lift a large repo in the foreground — each batch returns +~12K tokens of source code that stays in your context. 100 entities ≈ 10 batches ≈ +120K tokens burned on grunt work before you even start helping the user. + +**Delegate the loop to a sub-agent / cheaper model** using whatever dispatch mechanism +your runtime exposes. Feature extraction is pattern-matching (a small verb-object phrase +per entity) — a fast cheap model handles it fine. The delegated prompt should loop: + +``` +lifting_status -> get_entities_for_lifting(scope="*") -> analyze batch -> submit_lift_results +``` + +Repeat until DONE, then call `finalize_lifting`. Return to the caller. + +**Runtime-specific examples:** + +- **Claude Code**: `Task(subagent_type="general-purpose", model="haiku", prompt=...)` +- **Gemini CLI / Codex / Cursor / opencode / Windsurf**: use the equivalent sub-agent + or "dispatch to cheaper model" mechanism your client exposes. + +If your runtime has no sub-agent mechanism at all, lift in the foreground but expect +context pressure — and consider telling the user to run `rpg-encoder lift --provider +anthropic` from their terminal instead (uses a direct API key, no agent subscription). + +After delegation returns: +- Call `lifting_status` to verify coverage. +- Call `get_files_for_synthesis` + `submit_file_syntheses`. +- Call `build_semantic_hierarchy` + `submit_hierarchy`. + +**You only lift directly in the foreground if the total is under 100 entities.** At +that size, context cost is manageable and dispatching is overhead. `lifting_status` +gives you an explicit NEXT STEP with the right guidance when delegation is warranted. ## ERROR RECOVERY diff --git a/crates/rpg-mcp/src/server.rs b/crates/rpg-mcp/src/server.rs index 4d56a37..dc5b24c 100644 --- a/crates/rpg-mcp/src/server.rs +++ b/crates/rpg-mcp/src/server.rs @@ -575,10 +575,34 @@ impl RpgServer { } } - // NEXT STEP — state machine guidance, staleness takes priority + // NEXT STEP — state machine guidance, staleness takes priority. + // `subagent_threshold` (100) is the entity count above which direct + // lifting will exhaust the caller's context — each batch returns + // ~12K tokens of source, so ~100 entities ≈ ~120K tokens burned. out.push('\n'); + let subagent_threshold = 100usize; + let remaining = total.saturating_sub(lifted); + if stale_detail.is_some() { out.push_str("NEXT STEP: Graph is stale. Call update_rpg to sync with code changes, then lift any new entities.\n"); + } else if remaining >= subagent_threshold { + // Large repo — recommend delegating to a sub-agent/cheaper model + // so the caller doesn't exhaust its own context on grunt work. + out.push_str(&format!( + "NEXT STEP: {} entities to lift. Each batch returns ~12K tokens of source — lifting this directly in your current conversation will exhaust context before completion.\n\n\ + Delegate this to a fresh sub-agent / cheaper model if your runtime supports it. The worker's prompt should loop:\n\n \ + lifting_status -> get_entities_for_lifting(scope=\"*\") -> analyze batch -> submit_lift_results\n\n \ + Repeat until DONE, then call finalize_lifting.\n\n\ + Example dispatch in Claude Code (substitute your runtime's equivalent):\n\n \ + Task(\n \ + description: \"Lift RPG\",\n \ + subagent_type: \"general-purpose\",\n \ + model: \"haiku\", // or \"sonnet\" for higher quality\n \ + prompt: \"\"\n \ + )\n\n\ + Gemini CLI, Codex, Cursor, opencode, Windsurf, and others have their own dispatch mechanisms — use whichever one sends the work to a fresh context window. If your runtime has no such mechanism, you can lift in the foreground but expect to run out of context partway through.\n", + remaining, + )); } else if lifted == 0 { out.push_str( "NEXT STEP: Call get_entities_for_lifting(scope=\"*\") to start lifting.\n", @@ -586,7 +610,7 @@ impl RpgServer { } else if lifted < total { out.push_str(&format!( "NEXT STEP: {} entities remaining. Call get_entities_for_lifting(scope=\"*\") to continue lifting.\n", - total - lifted, + remaining, )); } else if !graph.metadata.semantic_hierarchy { out.push_str( diff --git a/crates/rpg-mcp/src/tools.rs b/crates/rpg-mcp/src/tools.rs index 84e9d7e..8d76288 100644 --- a/crates/rpg-mcp/src/tools.rs +++ b/crates/rpg-mcp/src/tools.rs @@ -1156,6 +1156,24 @@ impl RpgServer { // Only include repo context and full instructions on batch 0 to save context space if batch_index == 0 { + // Size-aware dispatch warning — don't let the agent burn its context + // on 50 batches when a sub-agent or cheaper model would do the work. + const SUBAGENT_BATCH_THRESHOLD: usize = 10; + if total_batches >= SUBAGENT_BATCH_THRESHOLD { + output.push_str(&format!( + "\nNOTE: {} batches ahead (~{}K tokens of source total). Processing \ + this directly will exhaust your context partway through. If your \ + runtime supports sub-agent dispatch or a cheaper model, delegate \ + this work instead — abort this call and call lifting_status for \ + the recommended dispatch pattern. In Claude Code that's a Task \ + with model=\"haiku\"; in Gemini CLI / Codex / Cursor / opencode / \ + Windsurf, use the equivalent mechanism. Continue directly only if \ + no dispatch mechanism is available.\n\n", + total_batches, + total_batches * 12, + )); + } + if auto_lifted_count > 0 { output.push_str(&format!( "AUTO-LIFTED: {} trivial entities (getters/setters/constructors). Override by re-submitting features.\n\n", diff --git a/npm/package.json b/npm/package.json index 1b03f79..46d92cf 100644 --- a/npm/package.json +++ b/npm/package.json @@ -1,6 +1,6 @@ { "name": "rpg-encoder", - "version": "0.8.2", + "version": "0.8.3", "mcpName": "io.github.userFRM/rpg-encoder", "description": "RPG-Encoder — semantic code graph for AI-assisted code understanding", "license": "MIT", diff --git a/server.json b/server.json index 0b35c82..ce7ae53 100644 --- a/server.json +++ b/server.json @@ -6,12 +6,12 @@ "url": "https://github.com/userFRM/rpg-encoder", "source": "github" }, - "version": "0.8.2", + "version": "0.8.3", "packages": [ { "registryType": "npm", "identifier": "rpg-encoder", - "version": "0.8.2", + "version": "0.8.3", "transport": { "type": "stdio" } From 939e8e60ddf62a5a5df00a85c10c79d3a886efa9 Mon Sep 17 00:00:00 2001 From: userFRM Date: Tue, 14 Apr 2026 12:57:20 +0200 Subject: [PATCH 02/19] address Codex audit: single-line NEXT STEP, live config, runtime-neutral MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex review of the original PR #87 flagged six issues. All addressed: HIGH — NEXT STEP must remain a single parseable line. Restored. Detail lives in labeled blocks below (LOOP:, DISPATCH:, FALLBACK:, FALLBACK:) so regex-based consumers keep working. MEDIUM — "~12K tokens per batch" was stale. Token budget is now read from self.config.encoding.max_batch_tokens (default 8000) at call time, and estimates scale correctly when the user overrides batch_tokens. MEDIUM — runtime-agnostic claim was overstated. Dropped Claude Code's Task() syntax from the core response. Callers use whatever sub-agent mechanism their runtime provides. Nothing in the response anchors on Anthropic/Haiku. MEDIUM — no-key fallback was a dead end. Now spells out two concrete fallbacks: scoped lifting (get_entities_for_lifting with a file glob) for callers with nothing at all, and CLI lift (rpg-encoder lift --provider ...) for callers with an API key. LOW — handshake prompt grew. server_instructions.md Large-scope section rewritten and trimmed from +105 words to +22 words over the pre-PR baseline. LOW — README didn't mention delegation. Added a short paragraph explaining the 100+ entity path and the CLI autonomous lift. NIT — magic numbers 100 and 10 were duplicated across server.rs and tools.rs. Moved to crate-level LARGE_SCOPE_ENTITIES and LARGE_SCOPE_BATCHES constants in main.rs. All 651 workspace tests still pass. fmt/clippy clean. Co-Authored-By: Claude Opus 4.6 (1M context) --- CHANGELOG.md | 50 +++++++++++++------ Cargo.lock | 14 +++--- README.md | 2 + crates/rpg-mcp/src/main.rs | 13 +++++ .../src/prompts/server_instructions.md | 43 +++++++--------- crates/rpg-mcp/src/server.rs | 41 +++++++-------- crates/rpg-mcp/src/tools.rs | 23 ++++----- 7 files changed, 104 insertions(+), 82 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c3ed2b..d008886 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,22 +10,40 @@ and this project adheres to [Semantic Versioning](https://semver.org/). ### Changed - **`lifting_status` NEXT STEP now recommends sub-agent dispatch when the - remaining entity count is ≥100.** The previous guidance was to call - `get_entities_for_lifting(scope="*")` directly in the foreground regardless - of size — which silently burns ~12K tokens per batch of the caller's - context window. On a repo with 1500 unlifted entities that's ~150 batches - = ~1.8M tokens of grunt work before any real help begins. -- **`get_entities_for_lifting` batch-0 response now includes a dispatch note - when 10+ batches are queued**, in case the caller skipped `lifting_status` - and jumped straight to pulling batches. -- **Guidance is runtime-agnostic.** Mentions Claude Code's `Task(model="haiku")` - as one example but calls out Gemini CLI, Codex, Cursor, opencode, and - Windsurf as needing their own equivalent dispatch mechanism. Falls back to - `rpg-encoder lift --provider anthropic` (CLI, uses external API key) when - the runtime has no sub-agent concept at all. -- `server_instructions.md` large-scope guidance simplified from "parallel - subagents per area" to "one sub-agent drains it" — same result, simpler - caller reasoning. + remaining entity count is ≥`LARGE_SCOPE_ENTITIES` (100).** Previously the + guidance was to call `get_entities_for_lifting(scope="*")` directly in + the foreground regardless of size, which silently consumed the caller's + context window — on a 1500-entity repo, ~150 batches of source code + before any real user work could begin. +- **`NEXT STEP:` is still a single parseable line.** Dispatch detail is + emitted in labeled blocks (`LOOP:`, `DISPATCH:`, `FALLBACK:`) immediately + below, so existing line-based consumers continue to work. +- **Batch-size estimates are read from the live `max_batch_tokens` config** + (default 8000) instead of the previous hard-coded `~12K` figure. The + total-token estimate scales correctly when the user overrides the budget. +- **Guidance is runtime-neutral.** No specific runtime's dispatch syntax + appears in the core response — callers use whatever sub-agent or + cheaper-model mechanism their runtime exposes. Two fallbacks are listed + explicitly: scoped lifting (`get_entities_for_lifting(scope="src/area/**")`) + for callers with no delegation mechanism and no API key, and + `rpg-encoder lift --provider anthropic|openai` for callers with an API key + and no sub-agent support. +- **`get_entities_for_lifting` batch-0 response** includes a one-line + dispatch hint when ≥`LARGE_SCOPE_BATCHES` (10) batches are queued, + directing the caller back to `lifting_status` for the full delegation + recommendation. Kept deliberately short so it doesn't duplicate detail. +- `server_instructions.md` large-scope section rewritten and shortened — + net prompt growth ≤30 tokens over pre-PR baseline. +- New workspace-visible constants `LARGE_SCOPE_ENTITIES` and + `LARGE_SCOPE_BATCHES` in `rpg-mcp` replace duplicated magic numbers + across server.rs and tools.rs. + +### Documentation + +- README delegation guidance: added a short paragraph explaining the + large-repo path (sub-agent dispatch or CLI autonomous lift) so users + aren't surprised when `lifting_status` starts returning delegation + recommendations instead of direct lift instructions. ## [0.8.2] - 2026-04-14 diff --git a/Cargo.lock b/Cargo.lock index dff9862..3e069de 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2679,7 +2679,7 @@ dependencies = [ [[package]] name = "rpg-cli" -version = "0.8.2" +version = "0.8.3" dependencies = [ "anyhow", "chrono", @@ -2700,7 +2700,7 @@ dependencies = [ [[package]] name = "rpg-core" -version = "0.8.2" +version = "0.8.3" dependencies = [ "anyhow", "chrono", @@ -2716,7 +2716,7 @@ dependencies = [ [[package]] name = "rpg-encoder" -version = "0.8.2" +version = "0.8.3" dependencies = [ "anyhow", "chrono", @@ -2736,7 +2736,7 @@ dependencies = [ [[package]] name = "rpg-lift" -version = "0.8.2" +version = "0.8.3" dependencies = [ "globset", "indicatif 0.18.3", @@ -2753,7 +2753,7 @@ dependencies = [ [[package]] name = "rpg-mcp" -version = "0.8.2" +version = "0.8.3" dependencies = [ "anyhow", "globset", @@ -2774,7 +2774,7 @@ dependencies = [ [[package]] name = "rpg-nav" -version = "0.8.2" +version = "0.8.3" dependencies = [ "anyhow", "criterion", @@ -2793,7 +2793,7 @@ dependencies = [ [[package]] name = "rpg-parser" -version = "0.8.2" +version = "0.8.3" dependencies = [ "anyhow", "criterion", diff --git a/README.md b/README.md index 38c6ef0..1211f63 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,8 @@ Then open any repo and tell your agent: Your agent handles everything: indexes entities (seconds), reads each function and adds intent-level features (a few minutes), organizes them into a semantic hierarchy, and commits `.rpg/graph.json` for your team. +For repos with ~100+ entities, `lifting_status` will tell your agent to delegate the lifting loop to a sub-agent or a cheaper model — feature extraction is pattern-matching, not novel reasoning. If your runtime has no sub-agent mechanism, run `rpg-encoder lift --provider anthropic|openai` from the terminal with an API key — the CLI drives an external LLM directly with no agent involvement. + Once lifted, try: - *"What handles authentication?"* — finds code even when nothing is named "auth" diff --git a/crates/rpg-mcp/src/main.rs b/crates/rpg-mcp/src/main.rs index 1ee462b..705e28b 100644 --- a/crates/rpg-mcp/src/main.rs +++ b/crates/rpg-mcp/src/main.rs @@ -9,6 +9,19 @@ mod server; mod tools; mod types; +/// Entity count above which direct foreground lifting is discouraged. +/// Caller-side context cost grows linearly with entity count × batch token +/// budget; past this threshold the caller should delegate to a sub-agent +/// or cheaper model rather than loop locally. +pub(crate) const LARGE_SCOPE_ENTITIES: usize = 100; + +/// Batch count above which the batch-0 response includes a dispatch note. +/// Derived from `LARGE_SCOPE_ENTITIES` assuming ~10 entities per token-aware +/// batch at default config (batch_size=25, max_batch_tokens=8000). Kept as a +/// separate constant because the auto-lifter shrinks the LLM-needed set +/// before batching, so the ratio is conservative. +pub(crate) const LARGE_SCOPE_BATCHES: usize = 10; + use anyhow::Result; use rmcp::ServiceExt; use rpg_core::storage; diff --git a/crates/rpg-mcp/src/prompts/server_instructions.md b/crates/rpg-mcp/src/prompts/server_instructions.md index 1674a96..a71ee60 100644 --- a/crates/rpg-mcp/src/prompts/server_instructions.md +++ b/crates/rpg-mcp/src/prompts/server_instructions.md @@ -82,40 +82,33 @@ Process all batches in the current conversation: - Call `finalize_lifting` then `get_files_for_synthesis` + `submit_file_syntheses`. - Call `build_semantic_hierarchy` + `submit_hierarchy`. -### Large scope (100+ entities): Delegate — DO NOT LIFT DIRECTLY +### Large scope (100+ entities): Delegate, do not lift directly -A single conversation CANNOT lift a large repo in the foreground — each batch returns -~12K tokens of source code that stays in your context. 100 entities ≈ 10 batches ≈ -120K tokens burned on grunt work before you even start helping the user. +Each batch returns a large chunk of source code that stays in your context. At the +default config ~10 batches is already ~80K tokens burned on grunt work. Feature +extraction is pattern-matching — a cheaper or delegated model handles it fine. -**Delegate the loop to a sub-agent / cheaper model** using whatever dispatch mechanism -your runtime exposes. Feature extraction is pattern-matching (a small verb-object phrase -per entity) — a fast cheap model handles it fine. The delegated prompt should loop: +Delegated worker loop (run in a fresh context): ``` -lifting_status -> get_entities_for_lifting(scope="*") -> analyze batch -> submit_lift_results +get_entities_for_lifting(scope="*") -> analyze -> submit_lift_results (repeat) +finalize_lifting ``` -Repeat until DONE, then call `finalize_lifting`. Return to the caller. +Use whatever sub-agent or cheaper-model mechanism your runtime exposes. The graph +persists to disk, so the worker's tool calls update the same state the caller reads. -**Runtime-specific examples:** +Fallbacks when no delegation mechanism is available: +- **Scoped lifting**: narrow each call, e.g. `get_entities_for_lifting(scope="src/auth/**")`, + then `finalize_lifting`. Each scope fits in foreground context. +- **CLI autonomous lift**: `rpg-encoder lift --provider anthropic|openai` uses an + external API key directly — no agent subscription involvement. -- **Claude Code**: `Task(subagent_type="general-purpose", model="haiku", prompt=...)` -- **Gemini CLI / Codex / Cursor / opencode / Windsurf**: use the equivalent sub-agent - or "dispatch to cheaper model" mechanism your client exposes. +After delegation returns, call `get_files_for_synthesis` + `submit_file_syntheses`, +then `build_semantic_hierarchy` + `submit_hierarchy`. -If your runtime has no sub-agent mechanism at all, lift in the foreground but expect -context pressure — and consider telling the user to run `rpg-encoder lift --provider -anthropic` from their terminal instead (uses a direct API key, no agent subscription). - -After delegation returns: -- Call `lifting_status` to verify coverage. -- Call `get_files_for_synthesis` + `submit_file_syntheses`. -- Call `build_semantic_hierarchy` + `submit_hierarchy`. - -**You only lift directly in the foreground if the total is under 100 entities.** At -that size, context cost is manageable and dispatching is overhead. `lifting_status` -gives you an explicit NEXT STEP with the right guidance when delegation is warranted. +Call `lifting_status` whenever you need the NEXT STEP with a concrete recommendation +for the current state. ## ERROR RECOVERY diff --git a/crates/rpg-mcp/src/server.rs b/crates/rpg-mcp/src/server.rs index dc5b24c..6a378d8 100644 --- a/crates/rpg-mcp/src/server.rs +++ b/crates/rpg-mcp/src/server.rs @@ -576,33 +576,34 @@ impl RpgServer { } // NEXT STEP — state machine guidance, staleness takes priority. - // `subagent_threshold` (100) is the entity count above which direct - // lifting will exhaust the caller's context — each batch returns - // ~12K tokens of source, so ~100 entities ≈ ~120K tokens burned. + // `LARGE_SCOPE_ENTITIES` is the threshold above which direct + // foreground lifting is not recommended. See also the matching + // check in `get_entities_for_lifting` which expresses the same + // heuristic in terms of batches. out.push('\n'); - let subagent_threshold = 100usize; let remaining = total.saturating_sub(lifted); if stale_detail.is_some() { out.push_str("NEXT STEP: Graph is stale. Call update_rpg to sync with code changes, then lift any new entities.\n"); - } else if remaining >= subagent_threshold { - // Large repo — recommend delegating to a sub-agent/cheaper model - // so the caller doesn't exhaust its own context on grunt work. + } else if remaining >= crate::LARGE_SCOPE_ENTITIES { + // Large repo — recommend delegating the mechanical loop so the + // caller doesn't exhaust its own context. Keep NEXT STEP on one + // line; follow-up detail is in labeled blocks below. + let batch_tokens = self.config.read().await.encoding.max_batch_tokens; out.push_str(&format!( - "NEXT STEP: {} entities to lift. Each batch returns ~12K tokens of source — lifting this directly in your current conversation will exhaust context before completion.\n\n\ - Delegate this to a fresh sub-agent / cheaper model if your runtime supports it. The worker's prompt should loop:\n\n \ - lifting_status -> get_entities_for_lifting(scope=\"*\") -> analyze batch -> submit_lift_results\n\n \ - Repeat until DONE, then call finalize_lifting.\n\n\ - Example dispatch in Claude Code (substitute your runtime's equivalent):\n\n \ - Task(\n \ - description: \"Lift RPG\",\n \ - subagent_type: \"general-purpose\",\n \ - model: \"haiku\", // or \"sonnet\" for higher quality\n \ - prompt: \"\"\n \ - )\n\n\ - Gemini CLI, Codex, Cursor, opencode, Windsurf, and others have their own dispatch mechanisms — use whichever one sends the work to a fresh context window. If your runtime has no such mechanism, you can lift in the foreground but expect to run out of context partway through.\n", - remaining, + "NEXT STEP: Delegate lifting to a sub-agent or cheaper model — {} entities remain, each batch returns ~{}K tokens of source.\n", + remaining, batch_tokens.div_ceil(1000), )); + out.push_str( + "\nLOOP (run in the delegated context):\n \ + get_entities_for_lifting(scope=\"*\") -> analyze batch -> submit_lift_results -> repeat until DONE -> finalize_lifting\n\ + \nDISPATCH:\n \ + Use whatever sub-agent / cheaper-model mechanism your runtime provides. The MCP graph is persisted to disk, so the worker's tool calls update the same state the caller reads.\n\ + \nFALLBACK (no sub-agent mechanism, no API key):\n \ + Scope the lift to one subtree at a time — e.g. get_entities_for_lifting(scope=\"src/auth/**\") — and call finalize_lifting after each. Each scoped batch fits in foreground context.\n\ + \nFALLBACK (no sub-agent mechanism, API key available):\n \ + Run `rpg-encoder lift --provider anthropic` (or `openai`) from the terminal — the CLI drives an external LLM directly with no agent involvement.\n", + ); } else if lifted == 0 { out.push_str( "NEXT STEP: Call get_entities_for_lifting(scope=\"*\") to start lifting.\n", diff --git a/crates/rpg-mcp/src/tools.rs b/crates/rpg-mcp/src/tools.rs index 8d76288..8d383c2 100644 --- a/crates/rpg-mcp/src/tools.rs +++ b/crates/rpg-mcp/src/tools.rs @@ -1156,21 +1156,16 @@ impl RpgServer { // Only include repo context and full instructions on batch 0 to save context space if batch_index == 0 { - // Size-aware dispatch warning — don't let the agent burn its context - // on 50 batches when a sub-agent or cheaper model would do the work. - const SUBAGENT_BATCH_THRESHOLD: usize = 10; - if total_batches >= SUBAGENT_BATCH_THRESHOLD { + // Size-aware dispatch hint — if the queue is large, point the + // caller at `lifting_status` where the full dispatch guidance + // lives (kept there to avoid duplicating detail in the per-batch + // response, which ships with every batch's source payload). + if total_batches >= crate::LARGE_SCOPE_BATCHES { + let batch_tokens = self.config.read().await.encoding.max_batch_tokens; + let approx_total_k = (total_batches * batch_tokens).div_ceil(1000); output.push_str(&format!( - "\nNOTE: {} batches ahead (~{}K tokens of source total). Processing \ - this directly will exhaust your context partway through. If your \ - runtime supports sub-agent dispatch or a cheaper model, delegate \ - this work instead — abort this call and call lifting_status for \ - the recommended dispatch pattern. In Claude Code that's a Task \ - with model=\"haiku\"; in Gemini CLI / Codex / Cursor / opencode / \ - Windsurf, use the equivalent mechanism. Continue directly only if \ - no dispatch mechanism is available.\n\n", - total_batches, - total_batches * 12, + "\nNOTE: {} batches queued (~{}K tokens of source total). If your runtime supports sub-agent dispatch or a cheaper model, abort this call and invoke `lifting_status` for the delegation pattern. Continue here only if no dispatch is available.\n\n", + total_batches, approx_total_k, )); } From c3eefa66ebea55939365eed6a6b2fbbdf3c1b183 Mon Sep 17 00:00:00 2001 From: userFRM Date: Tue, 14 Apr 2026 13:42:13 +0200 Subject: [PATCH 03/19] =?UTF-8?q?docs:=20"RPG=20first"=20=E2=80=94=20count?= =?UTF-8?q?er=20the=20grep/cat/Read=20reflex=20across=20every=20surface?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Agents consistently reached for grep/cat/Read on codebase questions even when RPG was built, lifted, and fully indexed — because the server's prompt and tool descriptions never actively countered the training default. Users had to manually remind every session "why don't you use the RPG?". Fix is a consistent directive across every surface the agent reads: server_instructions.md: New top section "USE RPG FIRST — BEFORE grep / cat / Read / find" with a concrete mapping table (12 rows). Placed before LIFTING FLOW so the directive is read before any workflow detail. Tool descriptions (12 tools): Each of search_node, fetch_node, explore_rpg, rpg_info, semantic_snapshot, context_pack, impact_radius, plan_change, analyze_health, detect_cycles, find_paths, slice_between now opens with a "PREFER THIS OVER ..." marker naming the shell command or workflow it replaces. Tool descriptions are what agents weigh when choosing which tool to invoke — making the displacement explicit at that layer is what actually sticks. .claude/skills/rpg/SKILL.md: Same mapping table scoped to the CLI surface. Explicit hint to prefer MCP tools when both are available. README: Dedicated "Use RPG before grep/cat/find" section placed above "How It Works". Same mapping as the server prompt so human readers see the same positioning the agent does. No tests affected. All 651 pass. fmt/clippy clean. Co-Authored-By: Claude Opus 4.6 (1M context) --- .claude/skills/rpg/SKILL.md | 31 +++++++++++++++-- CHANGELOG.md | 23 +++++++++++++ README.md | 23 +++++++++++++ .../src/prompts/server_instructions.md | 34 +++++++++++++++++++ crates/rpg-mcp/src/tools.rs | 24 ++++++------- 5 files changed, 121 insertions(+), 14 deletions(-) diff --git a/.claude/skills/rpg/SKILL.md b/.claude/skills/rpg/SKILL.md index 760d7c8..01c31df 100644 --- a/.claude/skills/rpg/SKILL.md +++ b/.claude/skills/rpg/SKILL.md @@ -1,10 +1,37 @@ --- name: rpg -description: 'Build and query semantic code graphs using RPG-Encoder. Use when the user wants to understand codebase structure, search for code by intent, explore dependencies, analyze change impact, or perform semantic lifting.' +description: 'Build and query semantic code graphs using RPG-Encoder. Use BEFORE grep/cat/find for any question about code structure, behavior, relationships, impact, dependencies, or cross-file patterns.' allowed-tools: Bash(rpg-encoder *), Read, Glob, Grep --- -# RPG-Encoder CLI Skill +# RPG-Encoder — USE BEFORE grep/cat/find + +## Reach for RPG first, shell commands second + +Whenever the user asks about code structure, behavior, relationships, impact, +dependencies, or cross-file patterns, reach for RPG commands BEFORE falling +back to `grep`, `cat`, `find`, `wc`, or `Read`. RPG is indexed, semantically +organized, and answers structural questions in one call that would otherwise +require dozens of chained text searches. + +| If you'd otherwise... | Use this instead | +|---|---| +| `grep -r` by intent | `rpg-encoder search "describe what it does"` | +| `grep -r` by name | `rpg-encoder search "name" --mode snippets` | +| `cat file` for a function | `rpg-encoder fetch "file:func"` | +| chained greps for callers | `rpg-encoder explore ENTITY --direction up` | +| chained greps for callees | `rpg-encoder explore ENTITY --direction down` | +| `wc -l` / `find` / `tree` | `rpg-encoder info` | +| reading many files | Use the MCP `semantic_snapshot` tool if available | + +Fall back to `grep` / `cat` / `Read` only when the query is about literal text +(string search, comments, TODOs, log messages) — not structure or semantics. + +If you have the RPG MCP server connected, prefer its tools (`search_node`, +`fetch_node`, `explore_rpg`, `impact_radius`, `plan_change`, `semantic_snapshot`, +`context_pack`) over the CLI — they're faster and return structured data. + +--- You have access to `rpg-encoder`, a CLI tool that builds semantic code graphs (Repository Planning Graphs) from any codebase. Use it to understand code structure, search by intent, trace dependencies, and perform autonomous semantic lifting. diff --git a/CHANGELOG.md b/CHANGELOG.md index d008886..4d2f276 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,6 +45,29 @@ and this project adheres to [Semantic Versioning](https://semver.org/). aren't surprised when `lifting_status` starts returning delegation recommendations instead of direct lift instructions. +### Tool-preference guidance (RPG first, grep second) + +Agents consistently fall back to `grep`/`cat`/`Read` for codebase questions +even when RPG is installed, because the server's prompt and tool descriptions +didn't actively counter the training default. + +- **`server_instructions.md`**: new top section "USE RPG FIRST — BEFORE + grep / cat / Read / find" with a concrete mapping table listing each + shell pattern and the RPG tool that replaces it. Placed before the + LIFTING FLOW so the directive is read before any workflow detail. +- **Tool descriptions**: each of `search_node`, `fetch_node`, `explore_rpg`, + `rpg_info`, `semantic_snapshot`, `context_pack`, `impact_radius`, + `plan_change`, `analyze_health`, `detect_cycles`, `find_paths`, + `slice_between` now opens with a "PREFER THIS OVER ..." marker naming + the shell command or workflow it replaces. Description tokens are + what agents weigh when choosing a tool — making the displacement + explicit is the intervention that sticks. +- **`.claude/skills/rpg/SKILL.md`**: same mapping table scoped to the + CLI surface, plus a note pointing to MCP tools when available. +- **README**: dedicated "Use RPG before grep/cat/find" section with + the same mapping table as the server prompt — so human readers see + the same positioning the agent does. + ## [0.8.2] - 2026-04-14 ### Added diff --git a/README.md b/README.md index 1211f63..a1de686 100644 --- a/README.md +++ b/README.md @@ -47,6 +47,29 @@ Once lifted, try: --- +## Use RPG before `grep`, `cat`, `find` + +The server instructions tell your agent to reach for RPG tools FIRST for any +question about code structure or behavior. That reflex matters — `grep`/`cat`/ +`Read` loops burn tokens and miss semantic relationships RPG already knows. + +| If you'd otherwise reach for... | Use this instead | +|---|---| +| `grep -r` / `rg` (by intent) | `search_node(query="...")` | +| `grep -r` / `rg` (by name) | `search_node(query="...", mode="snippets")` | +| `cat` / `Read` a function | `fetch_node(entity_id="file:name")` | +| chained greps for callers/callees | `explore_rpg(entity_id="...", direction="...")` | +| recursive grep for "what depends on X" | `impact_radius(entity_id="...")` | +| `wc -l` / `find` / `tree` | `rpg_info` | +| reading many files for context | `semantic_snapshot` | +| manual search → fetch → explore chains | `context_pack(query="...")` | +| "how do I refactor X safely" | `plan_change(goal="...")` | + +Fall back to `grep` / `cat` / `Read` only when the query is about literal text +(string search, comments, TODOs, log messages) — not about structure. + +--- + ## How It Works

diff --git a/crates/rpg-mcp/src/prompts/server_instructions.md b/crates/rpg-mcp/src/prompts/server_instructions.md index a71ee60..313e162 100644 --- a/crates/rpg-mcp/src/prompts/server_instructions.md +++ b/crates/rpg-mcp/src/prompts/server_instructions.md @@ -1,6 +1,40 @@ RPG-Encoder: Repository Planning Graph — semantic understanding of any codebase. No API keys or local LLMs needed. YOU are the LLM — you analyze the code directly. +## USE RPG FIRST — BEFORE grep / cat / Read / find + +Any user question about code structure, behavior, relationships, impact, +dependencies, or cross-file patterns — reach for RPG tools BEFORE falling back +to shell commands or file reads. RPG is indexed, semantically organized, and +gives one-call answers to questions that would otherwise require dozens of +chained greps. + +| If you'd otherwise reach for... | Use this instead | +|---|---| +| `grep -r` / `rg` (by intent) | `search_node(query="...")` — finds code by what it DOES | +| `grep -r` / `rg` (by name/path) | `search_node(query="...", mode="snippets")` | +| `cat file` / Read a function | `fetch_node(entity_id="file:name")` | +| chained greps for "what calls X" | `explore_rpg(entity_id="...", direction="upstream")` | +| recursive grep for "what depends on X" | `impact_radius(entity_id="...")` — with edge paths | +| `wc -l` / `find` / `tree` | `rpg_info` — counts, hierarchy, inter-area connectivity | +| reading the whole repo | `semantic_snapshot` — whole-repo view in one call | +| multi-step search + read + trace | `context_pack(query="...")` — 1 call, 44% fewer tokens | +| "how do I refactor X safely" | `plan_change(goal="...")` — ordered entities + blast radius | +| "find circular dependencies" | `detect_cycles` | +| "find god objects / unstable code" | `analyze_health` | +| "shortest path between A and B" | `find_paths(source, target)` | +| "minimal subgraph connecting these" | `slice_between(entity_ids=[...])` | + +**Fall back to grep / cat / Read only when the query is about LITERAL TEXT** +(string search, comments, TODO markers, log messages, license headers) — not +about structure or semantics. This holds even if your training predisposes you +toward shell tools; the RPG is cheaper, more accurate, and more complete for +every structural question. + +If a graph does not exist yet (`rpg_info` says "No RPG found"), run `build_rpg` +first. If entities are unlifted and the scope is large, see the LIFTING FLOW +below for delegation guidance. + ## LIFTING FLOW (step by step) 1. `build_rpg` — index the codebase (if no graph exists) diff --git a/crates/rpg-mcp/src/tools.rs b/crates/rpg-mcp/src/tools.rs index 8d383c2..64e633c 100644 --- a/crates/rpg-mcp/src/tools.rs +++ b/crates/rpg-mcp/src/tools.rs @@ -13,7 +13,7 @@ use crate::types::*; #[tool_router] impl RpgServer { #[tool( - description = "Search for code entities by intent or keywords. Returns entities with file paths, line numbers, and relevance scores. Use mode='features' for semantic intent search (use behavioral/functional phrases as query), 'snippets' for name/path matching (use file paths, qualified entities, or keywords as query), 'auto' (default) tries both.", + description = "PREFER THIS OVER grep/rg FOR ANY QUESTION ABOUT CODE BEHAVIOR OR NAMES. Search for code entities by intent or keywords. Returns entities with file paths, line numbers, and relevance scores. Use mode='features' for semantic intent search (e.g., 'validate user input') — finds code by what it DOES even when names don't match. Use mode='snippets' for name/path matching (e.g., 'FilterGroupManager' or 'src/auth/'). Use mode='auto' (default) to try both. This replaces grep/rg for every structural query.", annotations(read_only_hint = true, open_world_hint = false) )] async fn search_node( @@ -156,7 +156,7 @@ impl RpgServer { } #[tool( - description = "Fetch detailed metadata and source code for a known entity. Returns the entity's semantic features, dependencies (what it calls, what calls it), hierarchy position, and full source code.", + description = "PREFER THIS OVER cat/Read FOR A SINGLE ENTITY. Fetch detailed metadata and source code for a known entity by ID. Returns the entity's semantic features (what it does), dependencies (what it calls, what calls it), hierarchy position, and full source code. Use this instead of reading the whole file when you only need one function/class/method.", annotations(read_only_hint = true, open_world_hint = false) )] async fn fetch_node( @@ -194,7 +194,7 @@ impl RpgServer { } #[tool( - description = "Explore the dependency graph starting from an entity. Traverses import, invocation, inheritance, composition, render, state-read/state-write, and dispatch edges. Use direction='downstream' to see what the entity calls, 'upstream' to see what calls it, 'both' for full picture.", + description = "PREFER THIS OVER CHAINED GREPS FOR DEPENDENCY QUESTIONS. Explore the dependency graph starting from an entity. Traverses import, invocation, inheritance, composition, render, state-read/state-write, and dispatch edges. Use direction='downstream' to see what the entity calls, 'upstream' to see what calls it, 'both' for full picture. Replaces the manual \"grep for X, then grep each result, then grep those\" loop with one graph walk.", annotations(read_only_hint = true, open_world_hint = false) )] async fn explore_rpg( @@ -279,7 +279,7 @@ impl RpgServer { } #[tool( - description = "Get RPG statistics: entity count, file count, functional areas, dependency edges, containment edges, and hierarchy overview. Use this first to understand the codebase structure before searching.", + description = "PREFER THIS OVER wc/find/tree FOR CODEBASE OVERVIEW. RPG statistics: entity count, file count, functional areas, dependency edges, containment edges, inter-area connectivity, hierarchy overview. Call this first on any new codebase to orient yourself before searching.", annotations(read_only_hint = true, open_world_hint = false) )] async fn rpg_info(&self) -> Result { @@ -324,7 +324,7 @@ impl RpgServer { } #[tool( - description = "Generate a compact, token-efficient snapshot of the entire repository's semantic understanding. Designed for context window injection at session start. Includes: full hierarchy with aggregate features, all entities grouped by functional area with semantic features, condensed dependency skeleton, and coverage stats. Target: ~25-30K tokens for a 1000-entity codebase. Call this FIRST in any session to gain whole-repo awareness before using other tools.", + description = "PREFER THIS OVER READING MANY FILES FOR WHOLE-REPO CONTEXT. Compact, token-efficient snapshot of the entire repository's semantic understanding: full hierarchy with aggregate features, all entities grouped by functional area with semantic features, condensed dependency skeleton, and coverage stats. Target: ~25-30K tokens for a 1000-entity codebase. Call this at session start to gain whole-repo awareness in a single tool call — then use search_node/fetch_node for drill-down.", annotations(read_only_hint = true, open_world_hint = false) )] async fn semantic_snapshot( @@ -2596,7 +2596,7 @@ impl RpgServer { } #[tool( - description = "Build a focused context pack in a single call. Searches for entities matching your query, fetches their details and source code, expands neighbors to the specified depth (default 1), and trims to a token budget. Replaces the typical search→fetch→explore multi-step workflow. Returns primary entities with source + features + deps, plus neighborhood entities for broader context.", + description = "PREFER THIS OVER MANUAL search → fetch → explore CHAINS. Single-call context pack: searches for entities matching your query, fetches their details and source code, expands neighbors to the specified depth (default 1), and trims to a token budget. Returns primary entities with source + features + deps, plus neighborhood entities for broader context. Replaces 3-5 chained tool calls with 1, ~44% fewer tokens.", annotations(read_only_hint = true, open_world_hint = false) )] async fn context_pack( @@ -2649,7 +2649,7 @@ impl RpgServer { } #[tool( - description = "Compute the impact radius of an entity: find all entities reachable via dependency edges with edge paths. Use direction='upstream' to answer 'what depends on this?', 'downstream' for 'what does this depend on?'. Returns a flat list with depth, edge paths, and features — ideal for change impact analysis." + description = "PREFER THIS OVER RECURSIVE GREP FOR \"WHAT BREAKS IF I CHANGE X\". Computes the impact radius of an entity: all entities reachable via dependency edges with edge paths and depth. Use direction='upstream' for 'what depends on this?', 'downstream' for 'what does this depend on?'. Returns a flat list with depth, edge paths, and features — one call replaces a dependency trace you'd otherwise grep manually." )] async fn impact_radius( &self, @@ -2694,7 +2694,7 @@ impl RpgServer { } #[tool( - description = "Find multiple dependency paths between two entities (returns up to max_paths results of equal shortest length). Returns paths with entity IDs and edge kinds." + description = "PREFER THIS OVER MANUALLY TRACING CALLS. Finds shortest dependency paths between two entities (returns up to max_paths results of equal shortest length). Answers 'how does A reach B?' or 'is there any call chain from module X to module Y?' with entity IDs and edge kinds. Replaces the grep-follow-grep chain you'd otherwise walk by hand." )] async fn find_paths( &self, @@ -2773,7 +2773,7 @@ impl RpgServer { } #[tool( - description = "Extract minimal connecting subgraph between a set of entities. Returns entities and edges on shortest paths connecting the specified entities." + description = "PREFER THIS FOR MULTI-ENTITY SLICE ANALYSIS. Extracts the minimal connecting subgraph between a set of entities — returns entities and edges on shortest paths connecting them. Useful for 'show me just the code that connects A, B, and C' without dragging in the whole graph." )] async fn slice_between( &self, @@ -2832,7 +2832,7 @@ impl RpgServer { } #[tool( - description = "Plan code changes: find relevant entities, compute modification order, assess impact radius. Returns dependency-ordered entity list with blast radius analysis.", + description = "PREFER THIS BEFORE ANY REFACTOR OR CROSS-FILE EDIT. Plans code changes: finds relevant entities by intent, computes dependency-safe modification order, assesses impact radius per entity. Returns an ordered list of entities to touch with blast radius analysis so you know the minimal safe change set before you start editing.", annotations(read_only_hint = true, open_world_hint = false) )] async fn plan_change( @@ -3220,7 +3220,7 @@ impl RpgServer { } #[tool( - description = "Analyze code health metrics including coupling, instability, centrality, and potential god objects. Returns entities with architectural issues and recommendations for refactoring. Set include_duplication=true to detect code clones via Rabin-Karp fingerprinting (reads source files, slower). Set include_semantic_duplication=true to detect conceptual duplicates via Jaccard similarity on lifted features (in-memory, fast; requires entities to be lifted).", + description = "PREFER THIS OVER EYEBALLING FOR ARCHITECTURAL SMELLS. Analyzes code health: coupling, instability, centrality, god object detection, optional clone detection. Returns entities with architectural issues and refactoring recommendations. Use `include_duplication=true` for token-level Rabin-Karp clones (reads source, slower). Use `include_semantic_duplication=true` for Jaccard-similarity conceptual duplicates on lifted features (in-memory, fast). Replaces manual review of cross-file patterns.", annotations(read_only_hint = true, open_world_hint = false) )] async fn analyze_health( @@ -3255,7 +3255,7 @@ impl RpgServer { } #[tool( - description = "Detect circular dependencies (cycles) in the codebase. Cycles are architectural smells where A depends on B, B on C, and C back on A. Returns all detected cycles with their entity chains. First call returns summary + recommendations. Use parameters to filter results.", + description = "PREFER THIS OVER MANUAL CYCLE HUNTING. Detects circular dependencies: A→B→C→A chains anywhere in the graph. Returns cycles with entity chains, file counts, and cross-area filtering. First call returns summary + area breakdown; pass filters (area, min_cycle_length, cross_file_only) to get specific cycles. One call replaces hours of import-chain reading.", annotations(read_only_hint = true, open_world_hint = false) )] async fn detect_cycles( From 1b045b8f2caf2e26e53f45b18178a5d938543cef Mon Sep 17 00:00:00 2001 From: userFRM Date: Tue, 14 Apr 2026 14:19:35 +0200 Subject: [PATCH 04/19] address Opus audit (H1 + 3 LOW): fabricated metric, runtime-neutral phrasing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Opus audit of PR #87 found one HIGH issue and several LOW polish items. All addressed: H1 — "~44% fewer tokens" claim was unsubstantiated (no benchmark, no data). I had escalated a long-buried server-prompt phrase into the context_pack tool description and the new mapping tables. Removed across all surfaces: - tools.rs context_pack description - server_instructions.md mapping table row - server_instructions.md token-saving tips block This is the exact failure mode MEMORY.md's "NEVER ship perf code without benchmarks" rule prohibits — caught only because it was being shipped to every runtime via the description field. M1 — CHANGELOG claim "≤30 tokens prompt growth" was misleading. The figure described only the LIFTING FLOW sub-section; the new "USE RPG FIRST" section adds ~500 tokens. Reworded to scope the ≤30 to the right sub-section and acknowledge the deliberate +500 from the new top section. M2 — "abort this call" wording was misleading because the batch source payload has already been delivered by the time the agent reads the NOTE. Changed to "stop here — do not request batches 2..N" so the instruction matches what's actually possible. L1 — .gemini/extensions/rpg/CONTEXT.md got the same "use RPG first / fall back to grep only for literal text" guidance. Three other surfaces already had it; Gemini was the gap. L3 — Capitalized "Read" in runtime-neutral surfaces (server_instructions.md, README, tools.rs fetch_node) pattern-matched to Claude Code's Read tool. Replaced with "file reads" / "reading a function" / "cat" so non-Claude runtimes don't infer a tool name. Capital-R left intact in SKILL.md (correctly Claude-scoped). Skipped: M3 (test coverage for new dispatch branches — pre-existing gap, follow-up), L2 (rpg_info ↔ tree mapping is a slight stretch but defensible), L4 (reconstruct_plan/update_rpg correctly omitted), N1-N3 (stylistic). 651 tests still pass. fmt/clippy clean. Co-Authored-By: Claude Opus 4.6 (1M context) --- .gemini/extensions/rpg/CONTEXT.md | 13 +++++++++++++ CHANGELOG.md | 6 ++++-- README.md | 9 +++++---- crates/rpg-mcp/src/prompts/server_instructions.md | 10 +++++----- crates/rpg-mcp/src/tools.rs | 6 +++--- 5 files changed, 30 insertions(+), 14 deletions(-) diff --git a/.gemini/extensions/rpg/CONTEXT.md b/.gemini/extensions/rpg/CONTEXT.md index e661f1e..4891809 100644 --- a/.gemini/extensions/rpg/CONTEXT.md +++ b/.gemini/extensions/rpg/CONTEXT.md @@ -2,6 +2,19 @@ RPG-Encoder builds semantic code graphs (Repository Planning Graphs) for AI-assisted code understanding. +## Use RPG before grep / cat / find + +For any question about code structure, behavior, relationships, impact, or +cross-file patterns, reach for RPG tools (MCP or CLI) before shell commands. +RPG answers structural questions in one call that would otherwise require +dozens of chained text searches. + +Fall back to grep / cat / file reads only when the query is about literal +text (string search, comments, TODOs, log messages). + +See the MCP server instructions for the full mapping of shell patterns to +RPG tools — it's loaded automatically when the extension is active. + ## CLI Commands - `rpg-encoder build` — Index codebase, build graph (run once) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4d2f276..e6986d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,8 +32,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/). dispatch hint when ≥`LARGE_SCOPE_BATCHES` (10) batches are queued, directing the caller back to `lifting_status` for the full delegation recommendation. Kept deliberately short so it doesn't duplicate detail. -- `server_instructions.md` large-scope section rewritten and shortened — - net prompt growth ≤30 tokens over pre-PR baseline. +- `server_instructions.md` LIFTING FLOW sub-section rewritten and shortened + (net ≤30 tokens over pre-PR baseline for that sub-section). Note: the + separate "USE RPG FIRST" section below adds ~500 tokens to the handshake + prompt — the intended intervention. - New workspace-visible constants `LARGE_SCOPE_ENTITIES` and `LARGE_SCOPE_BATCHES` in `rpg-mcp` replace duplicated magic numbers across server.rs and tools.rs. diff --git a/README.md b/README.md index a1de686..4e71169 100644 --- a/README.md +++ b/README.md @@ -50,14 +50,15 @@ Once lifted, try: ## Use RPG before `grep`, `cat`, `find` The server instructions tell your agent to reach for RPG tools FIRST for any -question about code structure or behavior. That reflex matters — `grep`/`cat`/ -`Read` loops burn tokens and miss semantic relationships RPG already knows. +question about code structure or behavior. That reflex matters — `grep`, `cat`, +and ad-hoc file reads burn tokens and miss semantic relationships RPG already +knows. | If you'd otherwise reach for... | Use this instead | |---|---| | `grep -r` / `rg` (by intent) | `search_node(query="...")` | | `grep -r` / `rg` (by name) | `search_node(query="...", mode="snippets")` | -| `cat` / `Read` a function | `fetch_node(entity_id="file:name")` | +| `cat` / reading a function | `fetch_node(entity_id="file:name")` | | chained greps for callers/callees | `explore_rpg(entity_id="...", direction="...")` | | recursive grep for "what depends on X" | `impact_radius(entity_id="...")` | | `wc -l` / `find` / `tree` | `rpg_info` | @@ -65,7 +66,7 @@ question about code structure or behavior. That reflex matters — `grep`/`cat`/ | manual search → fetch → explore chains | `context_pack(query="...")` | | "how do I refactor X safely" | `plan_change(goal="...")` | -Fall back to `grep` / `cat` / `Read` only when the query is about literal text +Fall back to `grep`, `cat`, or file reads only when the query is about literal text (string search, comments, TODOs, log messages) — not about structure. --- diff --git a/crates/rpg-mcp/src/prompts/server_instructions.md b/crates/rpg-mcp/src/prompts/server_instructions.md index 313e162..1b87c50 100644 --- a/crates/rpg-mcp/src/prompts/server_instructions.md +++ b/crates/rpg-mcp/src/prompts/server_instructions.md @@ -1,7 +1,7 @@ RPG-Encoder: Repository Planning Graph — semantic understanding of any codebase. No API keys or local LLMs needed. YOU are the LLM — you analyze the code directly. -## USE RPG FIRST — BEFORE grep / cat / Read / find +## USE RPG FIRST — BEFORE grep / cat / find / file-reads Any user question about code structure, behavior, relationships, impact, dependencies, or cross-file patterns — reach for RPG tools BEFORE falling back @@ -13,19 +13,19 @@ chained greps. |---|---| | `grep -r` / `rg` (by intent) | `search_node(query="...")` — finds code by what it DOES | | `grep -r` / `rg` (by name/path) | `search_node(query="...", mode="snippets")` | -| `cat file` / Read a function | `fetch_node(entity_id="file:name")` | +| `cat file` / reading a function | `fetch_node(entity_id="file:name")` | | chained greps for "what calls X" | `explore_rpg(entity_id="...", direction="upstream")` | | recursive grep for "what depends on X" | `impact_radius(entity_id="...")` — with edge paths | | `wc -l` / `find` / `tree` | `rpg_info` — counts, hierarchy, inter-area connectivity | | reading the whole repo | `semantic_snapshot` — whole-repo view in one call | -| multi-step search + read + trace | `context_pack(query="...")` — 1 call, 44% fewer tokens | +| multi-step search + read + trace | `context_pack(query="...")` — 1 call instead of 3-5 | | "how do I refactor X safely" | `plan_change(goal="...")` — ordered entities + blast radius | | "find circular dependencies" | `detect_cycles` | | "find god objects / unstable code" | `analyze_health` | | "shortest path between A and B" | `find_paths(source, target)` | | "minimal subgraph connecting these" | `slice_between(entity_ids=[...])` | -**Fall back to grep / cat / Read only when the query is about LITERAL TEXT** +**Fall back to grep / cat / file-reads only when the query is about LITERAL TEXT** (string search, comments, TODO markers, log messages, license headers) — not about structure or semantics. This holds even if your training predisposes you toward shell tools; the RPG is cheaper, more accurate, and more complete for @@ -175,7 +175,7 @@ When using the RPG to understand or navigate a codebase (after lifting is comple - Use `fetch_node(fields="features,deps")` to skip source code (~80% smaller output) - Use `explore_rpg(format="compact")` for ID-preserving machine-readable rows (enables direct follow-up calls) - Use `explore_rpg(max_results=N)` to cap large dependency trees -- Use `context_pack` instead of search→fetch→explore chains (1 call vs 3-5, ~44% fewer tokens) +- Use `context_pack` instead of search→fetch→explore chains (1 call vs 3-5) - Use `impact_radius` for richer reachability analysis with edge paths (1 call vs multi-step explore) ## HEALTH ANALYSIS diff --git a/crates/rpg-mcp/src/tools.rs b/crates/rpg-mcp/src/tools.rs index 64e633c..f775f2c 100644 --- a/crates/rpg-mcp/src/tools.rs +++ b/crates/rpg-mcp/src/tools.rs @@ -156,7 +156,7 @@ impl RpgServer { } #[tool( - description = "PREFER THIS OVER cat/Read FOR A SINGLE ENTITY. Fetch detailed metadata and source code for a known entity by ID. Returns the entity's semantic features (what it does), dependencies (what it calls, what calls it), hierarchy position, and full source code. Use this instead of reading the whole file when you only need one function/class/method.", + description = "PREFER THIS OVER cat OR WHOLE-FILE READS FOR A SINGLE ENTITY. Fetch detailed metadata and source code for a known entity by ID. Returns the entity's semantic features (what it does), dependencies (what it calls, what calls it), hierarchy position, and full source code. Use this instead of reading the whole file when you only need one function/class/method.", annotations(read_only_hint = true, open_world_hint = false) )] async fn fetch_node( @@ -1164,7 +1164,7 @@ impl RpgServer { let batch_tokens = self.config.read().await.encoding.max_batch_tokens; let approx_total_k = (total_batches * batch_tokens).div_ceil(1000); output.push_str(&format!( - "\nNOTE: {} batches queued (~{}K tokens of source total). If your runtime supports sub-agent dispatch or a cheaper model, abort this call and invoke `lifting_status` for the delegation pattern. Continue here only if no dispatch is available.\n\n", + "\nNOTE: {} batches queued (~{}K tokens of source total). If your runtime supports sub-agent dispatch or a cheaper model, stop here — do not request batches 2..N — and invoke `lifting_status` for the delegation pattern. Continue the sequential loop only if no dispatch is available.\n\n", total_batches, approx_total_k, )); } @@ -2596,7 +2596,7 @@ impl RpgServer { } #[tool( - description = "PREFER THIS OVER MANUAL search → fetch → explore CHAINS. Single-call context pack: searches for entities matching your query, fetches their details and source code, expands neighbors to the specified depth (default 1), and trims to a token budget. Returns primary entities with source + features + deps, plus neighborhood entities for broader context. Replaces 3-5 chained tool calls with 1, ~44% fewer tokens.", + description = "PREFER THIS OVER MANUAL search → fetch → explore CHAINS. Single-call context pack: searches for entities matching your query, fetches their details and source code, expands neighbors to the specified depth (default 1), and trims to a token budget. Returns primary entities with source + features + deps, plus neighborhood entities for broader context. Replaces 3-5 chained tool calls with 1.", annotations(read_only_hint = true, open_world_hint = false) )] async fn context_pack( From 99e941645e831c9b1d2838a337b5fa282c2f24a8 Mon Sep 17 00:00:00 2001 From: userFRM Date: Tue, 14 Apr 2026 15:10:50 +0200 Subject: [PATCH 05/19] =?UTF-8?q?fix:=20address=20Codex=20round=202=20?= =?UTF-8?q?=E2=80=94=20CLI=20reload,=20config=20refresh,=20hedged=20dispat?= =?UTF-8?q?ch?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex round 2 review of PR #87 found 4 issues. All addressed: HIGH — CLI fallback (`rpg-encoder lift --provider ...`) writes to disk, but the MCP server keeps serving the in-memory graph until reload_rpg is called. Documented across all 4 surfaces (server.rs lifting_status block, server_instructions.md, README, .gemini/commands/rpg-lift.toml): after the CLI finishes, call reload_rpg in this session. MEDIUM — set_project_root swapped the root and reloaded the graph but left self.config pointing at the previous project's config. After switching projects, the server would serve the prior project's batch size and token budget. Now reloads RpgConfig atomically with the swap. MEDIUM — lifting_status checked `remaining >= 100` against the raw pre-auto-lift count. Auto-lift runs inside get_entities_for_lifting and can shrink the LLM-needed set dramatically for repos with many trivial entities. The dashboard could promise delegation for work the auto-lift would finish in zero calls. Reworded the recommendation as conditional ("likely-large workload — call get_entities_for_lifting next; if its batch-0 response includes the delegation NOTE, follow the dispatch pattern"). The batch-0 NOTE sees the post-auto-lift queue and is authoritative. LOW — Server instructions said `rpg_info` returns "No RPG found", but that's a tool error from ensure_graph, not a friendly status string. Changed to "any RPG tool returns 'No RPG found'". 651 tests pass. fmt/clippy clean. Co-Authored-By: Claude Opus 4.6 (1M context) --- .gemini/extensions/rpg/commands/rpg-lift.toml | 2 +- CHANGELOG.md | 25 +++++++++++++++++++ README.md | 2 +- .../src/prompts/server_instructions.md | 10 +++++--- crates/rpg-mcp/src/server.rs | 13 ++++++++-- crates/rpg-mcp/src/tools.rs | 6 +++++ 6 files changed, 50 insertions(+), 8 deletions(-) diff --git a/.gemini/extensions/rpg/commands/rpg-lift.toml b/.gemini/extensions/rpg/commands/rpg-lift.toml index 96ddf43..b50ec62 100644 --- a/.gemini/extensions/rpg/commands/rpg-lift.toml +++ b/.gemini/extensions/rpg/commands/rpg-lift.toml @@ -18,5 +18,5 @@ The lift pipeline: Progress is saved after each batch. If interrupted, re-running continues from where it stopped. -After completion, run `rpg-encoder info` to show the updated graph statistics. +After completion, run `rpg-encoder info` to show the updated graph statistics. If an MCP RPG server is currently connected to your editor, also call its `reload_rpg` tool so it picks up the new graph from disk. """ diff --git a/CHANGELOG.md b/CHANGELOG.md index e6986d7..7bd3320 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,31 @@ and this project adheres to [Semantic Versioning](https://semver.org/). ## [0.8.3] - 2026-04-14 +### Fixed (Codex round 2 review) + +- **CLI fallback (`rpg-encoder lift --provider ...`) leaves the MCP server + on a stale in-memory graph.** All four surfaces that mention the CLI + fallback (server.rs lifting_status block, server_instructions.md, + README, .gemini commands) now explicitly say "after the CLI finishes, + call `reload_rpg` in this session" so the server picks up the new + `.rpg/graph.json` from disk. +- **`set_project_root` failed to refresh `self.config`.** When switching + to a project with a different `.rpg/config.toml`, the server kept + serving the previous project's `encoding.max_batch_tokens` and other + settings. Now reloads `RpgConfig` from the new root atomically with the + root swap. +- **`lifting_status` large-scope NEXT STEP could promise delegation work + the auto-lifter would actually finish locally.** The check uses raw + `remaining` (pre-auto-lift), so on repos with many trivial entities + (getters, setters, constructors) the dashboard could recommend a worker + for ~0 LLM calls. Reworded to "likely-large workload — call + get_entities_for_lifting next; if its batch-0 response includes the + delegation NOTE, follow the dispatch pattern below". The batch-0 NOTE + is the authoritative signal because it sees the post-auto-lift queue. +- **"`rpg_info` says 'No RPG found'" was wrong.** `rpg_info` returns a + tool error, not a friendly status string. Changed to "any RPG tool + returns 'No RPG found'". + ### Changed - **`lifting_status` NEXT STEP now recommends sub-agent dispatch when the diff --git a/README.md b/README.md index 4e71169..ce72302 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ Then open any repo and tell your agent: Your agent handles everything: indexes entities (seconds), reads each function and adds intent-level features (a few minutes), organizes them into a semantic hierarchy, and commits `.rpg/graph.json` for your team. -For repos with ~100+ entities, `lifting_status` will tell your agent to delegate the lifting loop to a sub-agent or a cheaper model — feature extraction is pattern-matching, not novel reasoning. If your runtime has no sub-agent mechanism, run `rpg-encoder lift --provider anthropic|openai` from the terminal with an API key — the CLI drives an external LLM directly with no agent involvement. +For repos with ~100+ entities, `lifting_status` will tell your agent to delegate the lifting loop to a sub-agent or a cheaper model — feature extraction is pattern-matching, not novel reasoning. If your runtime has no sub-agent mechanism, run `rpg-encoder lift --provider anthropic|openai` from the terminal with an API key — the CLI drives an external LLM directly with no agent involvement. After the CLI finishes, call `reload_rpg` in your session to load the updated graph. Once lifted, try: diff --git a/crates/rpg-mcp/src/prompts/server_instructions.md b/crates/rpg-mcp/src/prompts/server_instructions.md index 1b87c50..758e934 100644 --- a/crates/rpg-mcp/src/prompts/server_instructions.md +++ b/crates/rpg-mcp/src/prompts/server_instructions.md @@ -31,9 +31,9 @@ about structure or semantics. This holds even if your training predisposes you toward shell tools; the RPG is cheaper, more accurate, and more complete for every structural question. -If a graph does not exist yet (`rpg_info` says "No RPG found"), run `build_rpg` -first. If entities are unlifted and the scope is large, see the LIFTING FLOW -below for delegation guidance. +If a graph does not exist yet (any RPG tool returns "No RPG found"), run +`build_rpg` first. If entities are unlifted and the scope is large, see the +LIFTING FLOW below for delegation guidance. ## LIFTING FLOW (step by step) @@ -136,7 +136,9 @@ Fallbacks when no delegation mechanism is available: - **Scoped lifting**: narrow each call, e.g. `get_entities_for_lifting(scope="src/auth/**")`, then `finalize_lifting`. Each scope fits in foreground context. - **CLI autonomous lift**: `rpg-encoder lift --provider anthropic|openai` uses an - external API key directly — no agent subscription involvement. + external API key directly — no agent subscription involvement. **After the CLI + finishes, call `reload_rpg` in this session** so the server picks up the updated + `.rpg/graph.json` — otherwise subsequent queries will still see the pre-lift state. After delegation returns, call `get_files_for_synthesis` + `submit_file_syntheses`, then `build_semantic_hierarchy` + `submit_hierarchy`. diff --git a/crates/rpg-mcp/src/server.rs b/crates/rpg-mcp/src/server.rs index 6a378d8..c7bb3a9 100644 --- a/crates/rpg-mcp/src/server.rs +++ b/crates/rpg-mcp/src/server.rs @@ -589,9 +589,18 @@ impl RpgServer { // Large repo — recommend delegating the mechanical loop so the // caller doesn't exhaust its own context. Keep NEXT STEP on one // line; follow-up detail is in labeled blocks below. + // + // Note: `remaining` is the raw unlifted count *before* auto-lift + // runs (which happens inside get_entities_for_lifting). Auto-lift + // shrinks the LLM-needed set considerably for repos with many + // trivial entities (getters, setters, constructors). The dispatch + // hint here is therefore conditional — get_entities_for_lifting + // batch 0 emits a confirming NOTE only when ≥10 LLM batches + // actually queue up. If the agent calls it and sees no NOTE, + // the work is small enough to lift directly. let batch_tokens = self.config.read().await.encoding.max_batch_tokens; out.push_str(&format!( - "NEXT STEP: Delegate lifting to a sub-agent or cheaper model — {} entities remain, each batch returns ~{}K tokens of source.\n", + "NEXT STEP: Likely-large lifting workload — {} entities remain (auto-lift may reduce this). Call get_entities_for_lifting(scope=\"*\") next; if its batch-0 response includes a delegation NOTE, follow the dispatch pattern below. Each remaining batch is ~{}K tokens of source.\n", remaining, batch_tokens.div_ceil(1000), )); out.push_str( @@ -602,7 +611,7 @@ impl RpgServer { \nFALLBACK (no sub-agent mechanism, no API key):\n \ Scope the lift to one subtree at a time — e.g. get_entities_for_lifting(scope=\"src/auth/**\") — and call finalize_lifting after each. Each scoped batch fits in foreground context.\n\ \nFALLBACK (no sub-agent mechanism, API key available):\n \ - Run `rpg-encoder lift --provider anthropic` (or `openai`) from the terminal — the CLI drives an external LLM directly with no agent involvement.\n", + Run `rpg-encoder lift --provider anthropic` (or `openai`) from the terminal — the CLI drives an external LLM directly with no agent involvement. After the CLI finishes, call `reload_rpg` in this session so the server picks up the updated graph from disk.\n", ); } else if lifted == 0 { out.push_str( diff --git a/crates/rpg-mcp/src/tools.rs b/crates/rpg-mcp/src/tools.rs index f775f2c..1211413 100644 --- a/crates/rpg-mcp/src/tools.rs +++ b/crates/rpg-mcp/src/tools.rs @@ -443,6 +443,12 @@ impl RpgServer { // Swap the root *self.project_root_cell.write().await = canonical.clone(); + // Reload the config from the new project — encoding.max_batch_tokens + // and other project-scoped settings must follow the root or tools + // will operate with the previous project's configuration. + *self.config.write().await = + rpg_core::config::RpgConfig::load(&canonical).unwrap_or_default(); + // Reset all session + sync state — everything is project-scoped *self.lifting_session.write().await = None; *self.hierarchy_session.write().await = None; From 3eea0bcbb16b1614f736b64ffe68e25eb261c2e9 Mon Sep 17 00:00:00 2001 From: userFRM Date: Tue, 14 Apr 2026 15:24:12 +0200 Subject: [PATCH 06/19] =?UTF-8?q?fix:=20address=20Codex=20round=203=20?= =?UTF-8?q?=E2=80=94=20reload=20after=20delegation,=20reload=5Frpg=20refre?= =?UTF-8?q?shes=20config?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex round 3 review of PR #87 found 3 more issues. All addressed: HIGH — The dispatch happy path assumes the worker's tool calls update the same in-memory graph the caller reads. That's true when the runtime shares the MCP session (e.g., Claude Code Task), but not guaranteed — some runtimes give sub-agents isolated MCP sessions, in which case the worker writes to disk via submit_lift_results but the parent keeps serving a stale in-memory graph. Documented across server.rs and server_instructions.md: "after the worker returns, call reload_rpg". No-op when sessions are shared, required when isolated. MEDIUM — reload_rpg only reloaded the graph, not the config. Parallel to the set_project_root fix from the prior round. If .rpg/config.toml was edited externally (or rewritten by the lifter), batch_size and max_batch_tokens stayed cached at the old values. reload_rpg now also reloads RpgConfig from disk. Tool description updated to reflect the broader scope. LOW — "any RPG tool returns 'No RPG found'" overstated the consistency of the error string. Different code paths return slightly different messages. Reworded to "RPG tools error with messages like 'No RPG found' or 'graph: not built'" — describes the pattern rather than promising a specific string. 651 tests pass. fmt/clippy clean. Co-Authored-By: Claude Opus 4.6 (1M context) --- crates/rpg-mcp/src/prompts/server_instructions.md | 12 ++++++++---- crates/rpg-mcp/src/server.rs | 2 +- crates/rpg-mcp/src/tools.rs | 10 ++++++++-- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/crates/rpg-mcp/src/prompts/server_instructions.md b/crates/rpg-mcp/src/prompts/server_instructions.md index 758e934..a0efafb 100644 --- a/crates/rpg-mcp/src/prompts/server_instructions.md +++ b/crates/rpg-mcp/src/prompts/server_instructions.md @@ -31,9 +31,10 @@ about structure or semantics. This holds even if your training predisposes you toward shell tools; the RPG is cheaper, more accurate, and more complete for every structural question. -If a graph does not exist yet (any RPG tool returns "No RPG found"), run -`build_rpg` first. If entities are unlifted and the scope is large, see the -LIFTING FLOW below for delegation guidance. +If a graph does not exist yet (RPG tools error with messages like "No RPG +found" or "graph: not built"), run `build_rpg` first. If entities are +unlifted and the scope is large, see the LIFTING FLOW below for delegation +guidance. ## LIFTING FLOW (step by step) @@ -130,7 +131,10 @@ finalize_lifting ``` Use whatever sub-agent or cheaper-model mechanism your runtime exposes. The graph -persists to disk, so the worker's tool calls update the same state the caller reads. +persists to disk after every submit, so the worker's writes survive. **After the +worker returns, call `reload_rpg`** to refresh the caller's in-memory graph — +required if the runtime gave the worker an isolated MCP session, no-op if it +shared yours. Fallbacks when no delegation mechanism is available: - **Scoped lifting**: narrow each call, e.g. `get_entities_for_lifting(scope="src/auth/**")`, diff --git a/crates/rpg-mcp/src/server.rs b/crates/rpg-mcp/src/server.rs index c7bb3a9..9b07e27 100644 --- a/crates/rpg-mcp/src/server.rs +++ b/crates/rpg-mcp/src/server.rs @@ -607,7 +607,7 @@ impl RpgServer { "\nLOOP (run in the delegated context):\n \ get_entities_for_lifting(scope=\"*\") -> analyze batch -> submit_lift_results -> repeat until DONE -> finalize_lifting\n\ \nDISPATCH:\n \ - Use whatever sub-agent / cheaper-model mechanism your runtime provides. The MCP graph is persisted to disk, so the worker's tool calls update the same state the caller reads.\n\ + Use whatever sub-agent / cheaper-model mechanism your runtime provides. The MCP graph persists to disk after every submit, so the worker's writes survive. **After the worker returns, call `reload_rpg`** — some runtimes give sub-agents an isolated MCP session, in which case the caller's in-memory graph is stale until reloaded. (No-op if your runtime shares the MCP session.)\n\ \nFALLBACK (no sub-agent mechanism, no API key):\n \ Scope the lift to one subtree at a time — e.g. get_entities_for_lifting(scope=\"src/auth/**\") — and call finalize_lifting after each. Each scoped batch fits in foreground context.\n\ \nFALLBACK (no sub-agent mechanism, API key available):\n \ diff --git a/crates/rpg-mcp/src/tools.rs b/crates/rpg-mcp/src/tools.rs index 1211413..5a75077 100644 --- a/crates/rpg-mcp/src/tools.rs +++ b/crates/rpg-mcp/src/tools.rs @@ -2023,10 +2023,16 @@ impl RpgServer { } #[tool( - description = "Reload the RPG graph from disk. Use after external changes to .rpg/graph.json." + description = "Reload the RPG graph and config from disk. Use after external changes to .rpg/graph.json or .rpg/config.toml — for example, after the CLI ran `rpg-encoder lift` or after editing batch-size settings." )] async fn reload_rpg(&self) -> Result { - match storage::load(&self.project_root().await) { + let project_root = self.project_root().await; + // Refresh config from disk too — if the user edited .rpg/config.toml + // or the lifter wrote new settings, pick them up here so subsequent + // tool calls operate against current values. + *self.config.write().await = + rpg_core::config::RpgConfig::load(&project_root).unwrap_or_default(); + match storage::load(&project_root) { Ok(g) => { let entities = g.metadata.total_entities; *self.graph.write().await = Some(g); From a981fc9545ee3d2c8bf70f18055b23f483229489 Mon Sep 17 00:00:00 2001 From: userFRM Date: Tue, 14 Apr 2026 15:45:42 +0200 Subject: [PATCH 07/19] =?UTF-8?q?feat:=20drift=20maintenance=20=E2=80=94?= =?UTF-8?q?=20auto-sync=20notice=20now=20actively=20prescribes=20re-lift?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Real-world session showed an agent committing new code repeatedly without re-lifting, even though the auto-sync notice reported "N need lifting" after each commit. The agent treated the count as informational. The user had to manually intervene with "did you lift?" to trigger a Haiku re-lift. Three changes to make drift actionable: 1. Auto-sync notice is now active, not informational. Examples: [auto-synced: +5 -0 ~3 entities; 5 new entities unlifted — semantic search is now incomplete; call lifting_status to refresh] [auto-synced: +120 -0 ~0 entities; 120 new entities unlifted — semantic search is now incomplete; call lifting_status for re-lift dispatch] [auto-synced: +2 -0 ~10 entities; 2 new + 10 stale features — semantic search is now incomplete; call lifting_status to refresh] New entities and stale features are reported separately so the agent sees the difference between "you added code" and "you modified code". 2. New "DRIFT MAINTENANCE" section in server_instructions.md explains the notice variants and frames re-lift as part of "definition of done" for any task that wrote code — the same way tests are. 3. submit_lift_results NEXT action is now scale-aware. When remaining count after a batch ≥ LARGE_SCOPE_ENTITIES, it points back at lifting_status for the dispatch pattern rather than encouraging another foreground batch. 651 tests pass. fmt/clippy clean. Co-Authored-By: Claude Opus 4.6 (1M context) --- CHANGELOG.md | 20 +++++++++++++ .../src/prompts/server_instructions.md | 24 ++++++++++++++++ crates/rpg-mcp/src/server.rs | 28 +++++++++++++++++-- crates/rpg-mcp/src/tools.rs | 14 ++++++++-- 4 files changed, 82 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7bd3320..c0aec14 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,26 @@ and this project adheres to [Semantic Versioning](https://semver.org/). ## [0.8.3] - 2026-04-14 +### Added (drift maintenance — re-lift after code changes) + +- **Auto-sync notice now actively recommends re-lift** when entities have + drifted. Previous notice was informational (`"; N need lifting"`) and + agents treated it as such — they read the count and moved on. New notice + separates new-entity drift from stale-feature drift and includes a verb + ("call lifting_status to refresh" or, at large scale, "...for re-lift + dispatch"). The change targets the specific failure mode where an agent + commits new code, sees the drift count, and continues to the next user + request without re-lifting — leaving semantic search incomplete for + downstream sessions. +- **New "DRIFT MAINTENANCE" section in `server_instructions.md`** explains + the three notice variants (`new entities unlifted`, `stale features`, + both) and frames re-lift as part of "definition of done" for any task + that wrote code — the same way running tests is. +- **`submit_lift_results` NEXT action is now scale-aware**: when the + remaining count after a batch ≥ `LARGE_SCOPE_ENTITIES`, it points the + caller back at `lifting_status` for the dispatch recommendation rather + than encouraging another foreground batch. + ### Fixed (Codex round 2 review) - **CLI fallback (`rpg-encoder lift --provider ...`) leaves the MCP server diff --git a/crates/rpg-mcp/src/prompts/server_instructions.md b/crates/rpg-mcp/src/prompts/server_instructions.md index a0efafb..adf0dfb 100644 --- a/crates/rpg-mcp/src/prompts/server_instructions.md +++ b/crates/rpg-mcp/src/prompts/server_instructions.md @@ -36,6 +36,30 @@ found" or "graph: not built"), run `build_rpg` first. If entities are unlifted and the scope is large, see the LIFTING FLOW below for delegation guidance. +## DRIFT MAINTENANCE — re-lift after code changes + +Lifting is stateful. The auto-sync notice at the top of every navigation +response tells you when entities have drifted: + +- `[auto-synced: ... ; N new entities unlifted ...]` — code was added, + semantic features are missing for it. `search_node` and `semantic_snapshot` + will not surface those entities. +- `[auto-synced: ... ; N entities have stale features ...]` — code was + modified after lift, the cached features no longer reflect the current + source. Search results may mislead. +- `[auto-synced: ... ; N new + M stale features ...]` — both. + +**Treat any drift notice as a deferred re-lift task.** Lifting is part of +"definition of done" for any change that adds or modifies code, the same +way running tests is. If you write code, you re-lift before reporting +complete. If a tool response shows drift you didn't introduce (e.g., +external commits), call `lifting_status` for the recommended re-lift +pattern. + +The fastest discipline: at the END of any task that wrote code, dispatch +a sub-agent to re-lift in the background — it costs ~zero of your context +and keeps semantic search accurate for the next user request. + ## LIFTING FLOW (step by step) 1. `build_rpg` — index the codebase (if no graph exists) diff --git a/crates/rpg-mcp/src/server.rs b/crates/rpg-mcp/src/server.rs index 9b07e27..1c4a19f 100644 --- a/crates/rpg-mcp/src/server.rs +++ b/crates/rpg-mcp/src/server.rs @@ -272,13 +272,37 @@ impl RpgServer { let (lifted, total) = graph.lifting_coverage(); let needs_lifting = total - lifted; let needs_relift = summary.modified_entity_ids.len(); + let total_drift = needs_lifting + needs_relift; let mut notice = format!( "[auto-synced: +{} -{} ~{} entities", summary.entities_added, summary.entities_removed, summary.entities_modified, ); - if needs_lifting > 0 || needs_relift > 0 { - notice.push_str(&format!("; {} need lifting", needs_lifting + needs_relift)); + + if total_drift > 0 { + if needs_lifting > 0 && needs_relift > 0 { + notice.push_str(&format!( + "; {} new + {} stale features", + needs_lifting, needs_relift, + )); + } else if needs_lifting > 0 { + notice.push_str(&format!("; {} new entities unlifted", needs_lifting)); + } else { + notice.push_str(&format!( + "; {} entities have stale features (modified since last lift)", + needs_relift, + )); + } + // Active recommendation — don't let the agent treat this as informational. + if total_drift >= crate::LARGE_SCOPE_ENTITIES { + notice.push_str( + " — semantic search is now incomplete; call lifting_status for re-lift dispatch", + ); + } else { + notice.push_str( + " — semantic search is now incomplete; call lifting_status to refresh", + ); + } } notice.push_str("]\n\n"); notice diff --git a/crates/rpg-mcp/src/tools.rs b/crates/rpg-mcp/src/tools.rs index 5a75077..0a029fa 100644 --- a/crates/rpg-mcp/src/tools.rs +++ b/crates/rpg-mcp/src/tools.rs @@ -1544,9 +1544,19 @@ impl RpgServer { } } - // NEXT action + // NEXT action — scale-aware so the caller doesn't burn its context + // grinding through batches when delegation would cost zero of its + // tokens. Mirrors the threshold used in lifting_status. if lifted < total { - result.push_str("\nNEXT: continue with get_entities_for_lifting, then call finalize_lifting when done."); + let remaining = total - lifted; + if remaining >= crate::LARGE_SCOPE_ENTITIES { + result.push_str(&format!( + "\nNEXT: {} entities still unlifted — call lifting_status for the recommended re-lift dispatch (likely a sub-agent / cheaper model in your runtime). Continue here only if no dispatch mechanism is available.", + remaining, + )); + } else { + result.push_str("\nNEXT: continue with get_entities_for_lifting, then call finalize_lifting when done."); + } } else { result.push_str("\nDONE: all entities lifted. Call finalize_lifting to build the semantic hierarchy."); } From bd3f7c68ca94463137120e37b2cddde96ad8d686 Mon Sep 17 00:00:00 2001 From: userFRM Date: Tue, 14 Apr 2026 15:55:48 +0200 Subject: [PATCH 08/19] =?UTF-8?q?fix:=20address=20Codex=20round=204=20?= =?UTF-8?q?=E2=80=94=20drift=20labeling,=20finalize=5Flifting=20timing,=20?= =?UTF-8?q?threshold=20docs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HIGH — Auto-sync notice mislabelled the unlifted count. I used (total - lifted), which is the global backlog, but rendered it as "N new entities unlifted". A small edit on a partially-lifted repo could claim 50 new entities when only 1 was actually new. Now reports per-update delta separately ("+N added unlifted, ~M stale") and notes any pre-existing backlog as "(+P pre-existing)". HIGH — finalize_lifting guidance in the no-dispatch FALLBACK was wrong. It said to call finalize_lifting after each scoped subtree, but finalize_lifting auto-routes pending entities and locks the hierarchy in. Calling it mid-flow against incomplete signals would bake in bad routing. Fixed in both server.rs and server_instructions.md to "call finalize_lifting ONCE at the very end". MEDIUM — LARGE_SCOPE_ENTITIES (lifting_status heuristic) and LARGE_SCOPE_BATCHES (get_entities_for_lifting batch-0 authority) can diverge under tuned config. Documented explicitly: dashboard is a heuristic gate, batch-0 NOTE is authoritative; they defer to each other in messaging when they disagree. 651 tests pass. fmt/clippy clean. Co-Authored-By: Claude Opus 4.6 (1M context) --- CHANGELOG.md | 22 ++++++++ crates/rpg-mcp/src/main.rs | 25 +++++---- .../src/prompts/server_instructions.md | 5 +- crates/rpg-mcp/src/server.rs | 53 +++++++++++-------- 4 files changed, 74 insertions(+), 31 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c0aec14..b7c14f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,28 @@ and this project adheres to [Semantic Versioning](https://semver.org/). ## [0.8.3] - 2026-04-14 +### Fixed (Codex round 4 review) + +- **Auto-sync notice mislabelled the unlifted count.** `total - lifted` + is the global backlog, not the per-update delta. Saying "N new entities + unlifted" against that number meant a one-line edit on a partially-lifted + repo could claim "50 new entities unlifted" when only 1 was actually new. + Now reports the per-update delta separately (`+N added unlifted, ~M stale`) + and notes any pre-existing backlog as `(+P pre-existing)`. +- **`finalize_lifting` guidance was wrong for scoped fallback.** The + no-dispatch fallback said to call `finalize_lifting` after each scoped + subtree. But `finalize_lifting` auto-routes pending entities and locks + the hierarchy — calling it mid-flow against incomplete signals would + bake in bad routing decisions. Corrected to "call finalize_lifting ONCE + at the very end after all scopes complete" across server.rs and + server_instructions.md. +- **Heuristic divergence between `LARGE_SCOPE_ENTITIES` and + `LARGE_SCOPE_BATCHES` is now documented explicitly.** Both constants + carry doc comments explaining that the batch-0 NOTE in + `get_entities_for_lifting` is the authoritative dispatch decision; the + dashboard `lifting_status` is a heuristic gate that defers to the NOTE + when they disagree. + ### Added (drift maintenance — re-lift after code changes) - **Auto-sync notice now actively recommends re-lift** when entities have diff --git a/crates/rpg-mcp/src/main.rs b/crates/rpg-mcp/src/main.rs index 705e28b..cf98eff 100644 --- a/crates/rpg-mcp/src/main.rs +++ b/crates/rpg-mcp/src/main.rs @@ -9,17 +9,24 @@ mod server; mod tools; mod types; -/// Entity count above which direct foreground lifting is discouraged. -/// Caller-side context cost grows linearly with entity count × batch token -/// budget; past this threshold the caller should delegate to a sub-agent -/// or cheaper model rather than loop locally. +/// Entity count above which `lifting_status` and similar dashboards switch to +/// recommending sub-agent delegation. **This is a heuristic gate, not the +/// authoritative dispatch decision.** The authoritative signal is the +/// batch-0 NOTE in `get_entities_for_lifting`, which sees the post-auto-lift +/// queue and uses the actual token-aware batch count. With user-tuned +/// `max_batch_tokens` or unusually small/large entities, the two can +/// diverge — when they do, the batch-0 NOTE wins. Both messages defer to +/// each other: dashboard says "check the NOTE", batch-0 NOTE is silent +/// when delegation isn't warranted. pub(crate) const LARGE_SCOPE_ENTITIES: usize = 100; -/// Batch count above which the batch-0 response includes a dispatch note. -/// Derived from `LARGE_SCOPE_ENTITIES` assuming ~10 entities per token-aware -/// batch at default config (batch_size=25, max_batch_tokens=8000). Kept as a -/// separate constant because the auto-lifter shrinks the LLM-needed set -/// before batching, so the ratio is conservative. +/// Batch count above which `get_entities_for_lifting` emits the batch-0 +/// dispatch note. Derived from `LARGE_SCOPE_ENTITIES` assuming ~10 entities +/// per token-aware batch at default config (batch_size=25, +/// max_batch_tokens=8000). Kept as a separate constant because the +/// auto-lifter shrinks the LLM-needed set before batching, so the ratio is +/// conservative. Authoritative for the dispatch decision (see +/// `LARGE_SCOPE_ENTITIES` for why). pub(crate) const LARGE_SCOPE_BATCHES: usize = 10; use anyhow::Result; diff --git a/crates/rpg-mcp/src/prompts/server_instructions.md b/crates/rpg-mcp/src/prompts/server_instructions.md index adf0dfb..8e66cc4 100644 --- a/crates/rpg-mcp/src/prompts/server_instructions.md +++ b/crates/rpg-mcp/src/prompts/server_instructions.md @@ -162,7 +162,10 @@ shared yours. Fallbacks when no delegation mechanism is available: - **Scoped lifting**: narrow each call, e.g. `get_entities_for_lifting(scope="src/auth/**")`, - then `finalize_lifting`. Each scope fits in foreground context. + and submit features per batch. Each scope fits in foreground context. Call + `finalize_lifting` ONCE after all scopes are complete — calling it mid-flow + auto-routes pending entities against incomplete signals and locks the + hierarchy in early. - **CLI autonomous lift**: `rpg-encoder lift --provider anthropic|openai` uses an external API key directly — no agent subscription involvement. **After the CLI finishes, call `reload_rpg` in this session** so the server picks up the updated diff --git a/crates/rpg-mcp/src/server.rs b/crates/rpg-mcp/src/server.rs index 1c4a19f..beffc2b 100644 --- a/crates/rpg-mcp/src/server.rs +++ b/crates/rpg-mcp/src/server.rs @@ -270,37 +270,48 @@ impl RpgServer { } let (lifted, total) = graph.lifting_coverage(); - let needs_lifting = total - lifted; - let needs_relift = summary.modified_entity_ids.len(); - let total_drift = needs_lifting + needs_relift; + let total_unlifted = total - lifted; + let added_now = summary.entities_added; + let stale_now = summary.modified_entity_ids.len(); + let aggregate_drift = total_unlifted + stale_now; let mut notice = format!( "[auto-synced: +{} -{} ~{} entities", summary.entities_added, summary.entities_removed, summary.entities_modified, ); - if total_drift > 0 { - if needs_lifting > 0 && needs_relift > 0 { - notice.push_str(&format!( - "; {} new + {} stale features", - needs_lifting, needs_relift, - )); - } else if needs_lifting > 0 { - notice.push_str(&format!("; {} new entities unlifted", needs_lifting)); - } else { - notice.push_str(&format!( - "; {} entities have stale features (modified since last lift)", - needs_relift, - )); + if aggregate_drift > 0 { + // Per-update delta — what THIS sync changed + let mut parts: Vec = Vec::new(); + if added_now > 0 { + parts.push(format!("+{} added unlifted", added_now)); + } + if stale_now > 0 { + parts.push(format!("~{} stale features", stale_now)); + } + if !parts.is_empty() { + notice.push_str("; "); + notice.push_str(&parts.join(", ")); + } + // Pre-existing backlog (entities that were already unlifted before this update) + let pre_existing = total_unlifted.saturating_sub(added_now); + if pre_existing > 0 { + if parts.is_empty() { + notice.push_str(&format!("; {} unlifted total", pre_existing)); + } else { + notice.push_str(&format!(" (+{} pre-existing)", pre_existing)); + } } - // Active recommendation — don't let the agent treat this as informational. - if total_drift >= crate::LARGE_SCOPE_ENTITIES { + // Active recommendation — graded by aggregate severity. The + // batch-0 NOTE in get_entities_for_lifting is authoritative + // for the dispatch decision; this is a heuristic gate. + if aggregate_drift >= crate::LARGE_SCOPE_ENTITIES { notice.push_str( - " — semantic search is now incomplete; call lifting_status for re-lift dispatch", + " — semantic search is incomplete; call lifting_status for re-lift dispatch guidance", ); } else { notice.push_str( - " — semantic search is now incomplete; call lifting_status to refresh", + " — semantic search is incomplete; call lifting_status to refresh", ); } } @@ -633,7 +644,7 @@ impl RpgServer { \nDISPATCH:\n \ Use whatever sub-agent / cheaper-model mechanism your runtime provides. The MCP graph persists to disk after every submit, so the worker's writes survive. **After the worker returns, call `reload_rpg`** — some runtimes give sub-agents an isolated MCP session, in which case the caller's in-memory graph is stale until reloaded. (No-op if your runtime shares the MCP session.)\n\ \nFALLBACK (no sub-agent mechanism, no API key):\n \ - Scope the lift to one subtree at a time — e.g. get_entities_for_lifting(scope=\"src/auth/**\") — and call finalize_lifting after each. Each scoped batch fits in foreground context.\n\ + Scope the lift to one subtree at a time — e.g. get_entities_for_lifting(scope=\"src/auth/**\") — and submit features per batch within that scope. Each scoped batch fits in foreground context. Call finalize_lifting ONCE at the very end after all scopes are complete; calling it mid-flow auto-routes pending entities against incomplete signals and locks the hierarchy in early.\n\ \nFALLBACK (no sub-agent mechanism, API key available):\n \ Run `rpg-encoder lift --provider anthropic` (or `openai`) from the terminal — the CLI drives an external LLM directly with no agent involvement. After the CLI finishes, call `reload_rpg` in this session so the server picks up the updated graph from disk.\n", ); From f176b28b32d671021542549b581098ec14469b6c Mon Sep 17 00:00:00 2001 From: userFRM Date: Tue, 14 Apr 2026 16:15:06 +0200 Subject: [PATCH 09/19] =?UTF-8?q?fix:=20address=20Codex=20round=205=20?= =?UTF-8?q?=E2=80=94=20stale-feature=20drift=20now=20drives=20lifting=5Fst?= =?UTF-8?q?atus?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Five findings, all threading off the same omission: the server *knew* which entities had been modified since last lift (via `summary.modified_entity_ids` on each `auto_sync_if_stale`) but never persisted that knowledge anywhere visible to the lifting state machine. So `lifting_status` reported "100% coverage" the moment the last ever-unlifted entity got features, even if half the repo had been modified since; and `get_entities_for_lifting(*)` would return zero entities to a caller trying to re-lift the staleness it had just been told about. Changes: - New persistent `stale_entity_ids: Arc>>` on RpgServer. Populated from `summary.modified_entity_ids` after each auto-sync, drained by `submit_lift_results` as entities get re-lifted, reset on `reload_rpg` and `set_project_root`. - `lifting_status` header now shows `stale_features: N entities modified since last lift` when the set is non-empty. NEXT STEP state machine compares `remaining + stale_features` against `LARGE_SCOPE_ENTITIES` and has dedicated branches for "unlifted + stale mixed" and "stale only". - `get_entities_for_lifting(scope="*")` augments its resolved scope with tracked stale entities (resolve_scope filters to `features.is_empty()` which would otherwise skip them). The auto-lift skip check now treats stale entities as if they had no features so they flow into `needs_llm` and come back through the normal LLM loop. - Large-scope NEXT STEP no longer bounces the caller through `get_entities_for_lifting` before seeing the dispatch recommendation. LOOP / DISPATCH / FALLBACK blocks are emitted directly by `lifting_status`, so the caller delegates without first loading a batch payload into its own context. - `set_project_root` and `reload_rpg` now use a `reload_config_with_warning` helper that distinguishes "no .rpg/config.toml" (silent default) from "present but malformed" (stderr warning, keeps previous in-memory config). Verified with `cargo fmt --all`, `cargo clippy --workspace --all-targets -- -D warnings`, `cargo test --workspace --lib`, and the rpg-mcp integration tests — all green. Co-Authored-By: Claude Opus 4.6 (1M context) --- CHANGELOG.md | 38 ++++++++++ crates/rpg-mcp/src/server.rs | 133 +++++++++++++++++++++++++++++++---- crates/rpg-mcp/src/tools.rs | 87 +++++++++++++++++++---- 3 files changed, 231 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b7c14f6..604b8c5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,44 @@ and this project adheres to [Semantic Versioning](https://semver.org/). ## [0.8.3] - 2026-04-14 +### Fixed (Codex round 5 review) + +- **`lifting_status` could not see stale-feature drift.** Coverage reached + 100% after sync but entities with modified sources still had outdated + features; the dashboard reported "Graph is complete" and sent callers off + to search. Added a persistent `stale_entity_ids: Arc>` on + `RpgServer` populated from `summary.modified_entity_ids` on every + `auto_sync_if_stale`, drained by `submit_lift_results` as entities get + re-lifted, and cleared on `reload_rpg` / `set_project_root`. Surfaced in + the header as `stale_features: N entities modified since last lift` and + factored into the NEXT STEP state machine. +- **`get_entities_for_lifting(scope="*")` silently skipped stale entities.** + `resolve_scope` filters to entities with no features, and the auto-lift + skip check short-circuits anything that already has features — so a + caller following the "re-lift stale entities" recipe would get back zero + entities even though the dashboard said N needed work. Now the server + snapshots `stale_entity_ids` before taking graph locks, augments the + resolved scope with stale entities when scope is `*`/`all`, and routes + stale entities into `needs_llm` regardless of existing features. +- **Circular delegation flow from `lifting_status`.** The large-scope NEXT + STEP sent callers to `get_entities_for_lifting(scope="*")` first, whose + batch-0 response then said "dispatch a sub-agent" — burning the caller's + context on a batch payload before they ever saw the dispatch + recommendation. Now `lifting_status` emits the LOOP/DISPATCH/FALLBACK + blocks directly, so the caller delegates without first loading a batch of + source. +- **NEXT STEP only branched on `remaining` (unlifted count).** When + `total - lifted == 0` but `stale_features_count > 0`, the old logic + skipped straight to hierarchy or "graph is complete" and never prompted + re-lift. The state machine now considers `remaining + stale_features` + together — dedicated branches cover "unlifted + stale mixed", "stale + only", and the large-scope threshold compares against the combined total. +- **Silent config load swallow.** `set_project_root` and `reload_rpg` used + `unwrap_or_default()` on config loads, collapsing "missing config file" + (expected) and "malformed TOML" (user error) into identical behavior. Added + `reload_config_with_warning` helper that logs a stderr warning when parse + fails so misconfigurations surface instead of silently reverting to defaults. + ### Fixed (Codex round 4 review) - **Auto-sync notice mislabelled the unlifted count.** `total - lifted` diff --git a/crates/rpg-mcp/src/server.rs b/crates/rpg-mcp/src/server.rs index beffc2b..4739e28 100644 --- a/crates/rpg-mcp/src/server.rs +++ b/crates/rpg-mcp/src/server.rs @@ -56,6 +56,12 @@ pub(crate) struct RpgServer { pub(crate) prompt_versions: PromptVersions, /// Last git HEAD SHA at which auto-sync ran. Prevents redundant updates. pub(crate) last_auto_sync_head: Arc>>, + /// Entity IDs whose source was modified after their features were lifted. + /// Populated by `auto_sync_if_stale` (from `summary.modified_entity_ids`), + /// drained by `submit_lift_results` as entities get re-lifted. Lets + /// `lifting_status` surface stale-feature drift even though those entities + /// still appear "lifted" in the coverage count. + pub(crate) stale_entity_ids: Arc>>, /// Hash of the last-synced workdir changeset (dirty files + their stat). /// Combined with `last_auto_sync_head` to detect when a re-sync is needed /// for uncommitted/staged/unstaged changes. @@ -84,6 +90,35 @@ impl RpgServer { self.project_root_cell.read().await.clone() } + /// Reload `.rpg/config.toml` into the given config slot. + /// - File missing → silently use defaults (the no-config-yet case). + /// - File present but malformed → log a warning, keep the existing config. + /// - File present and valid → swap. + pub(crate) async fn reload_config_with_warning( + slot: &Arc>, + project_root: &std::path::Path, + ) { + let config_path = project_root.join(".rpg/config.toml"); + if !config_path.exists() { + // Missing is a normal state — use defaults silently. + *slot.write().await = RpgConfig::default(); + return; + } + match RpgConfig::load(project_root) { + Ok(cfg) => { + *slot.write().await = cfg; + } + Err(e) => { + eprintln!( + "rpg: failed to parse {} ({}); keeping previous in-memory config", + config_path.display(), + e + ); + // Do NOT overwrite — leave the previous (working) config in place. + } + } + } + /// Create a new server, loading graph and config from `project_root` if present. pub(crate) fn new(project_root: PathBuf) -> Self { let graph = storage::load(&project_root).ok(); @@ -107,6 +142,7 @@ impl RpgServer { tool_router: Self::create_tool_router(), prompt_versions: PromptVersions::new(), last_auto_sync_head: Arc::new(RwLock::new(initial_head)), + stale_entity_ids: Arc::new(RwLock::new(std::collections::HashSet::new())), last_auto_sync_changeset: Arc::new(RwLock::new(None)), last_auto_sync_workdir_paths: Arc::new(RwLock::new(std::collections::HashSet::new())), lift_in_progress: Arc::new(std::sync::atomic::AtomicBool::new(false)), @@ -262,6 +298,20 @@ impl RpgServer { *self.last_auto_sync_changeset.write().await = Some(current_changeset); *self.last_auto_sync_workdir_paths.write().await = current_paths; + // Persist stale entity IDs so lifting_status can surface + // stale-feature drift in subsequent calls. These entities + // still count as "lifted" by coverage(), so without this + // set, lifting_status would report "100% coverage" while + // search_node returns outdated features. + { + let mut stale = self.stale_entity_ids.write().await; + for id in &summary.modified_entity_ids { + stale.insert(id.clone()); + } + // Prune entries for entities that no longer exist + stale.retain(|id| graph.entities.contains_key(id)); + } + if summary.entities_added == 0 && summary.entities_modified == 0 && summary.entities_removed == 0 @@ -529,6 +579,19 @@ impl RpgServer { ), }; + // Stale-feature drift — entities still counted as "lifted" because + // they have features, but those features are out of date because + // the source was modified after lifting. Tracked across syncs by + // auto_sync_if_stale. + let stale_features_count = { + let stale = self.stale_entity_ids.read().await; + // Filter to entities still present in the graph + stale + .iter() + .filter(|id| graph.entities.contains_key(*id)) + .count() + }; + let mut out = format!( "=== RPG Lifting Status ===\n\ {}\n\ @@ -536,6 +599,12 @@ impl RpgServer { hierarchy: {}\n", graph_line, lifted, total, coverage_pct, hierarchy_type, ); + if stale_features_count > 0 { + out.push_str(&format!( + "stale_features: {} entities modified since last lift (features outdated)\n", + stale_features_count, + )); + } // Per-area coverage let area_cov = graph.area_coverage(); @@ -615,31 +684,56 @@ impl RpgServer { // foreground lifting is not recommended. See also the matching // check in `get_entities_for_lifting` which expresses the same // heuristic in terms of batches. + // + // The state machine considers two kinds of "work remaining": + // - `remaining` — entities that have never been lifted + // - `stale_features_count` — entities with outdated features after + // a source modification (tracked across syncs via + // `stale_entity_ids`). These look "lifted" in coverage but their + // features no longer reflect the source. + // Their sum is what actually needs LLM work. out.push('\n'); let remaining = total.saturating_sub(lifted); + let work_remaining = remaining + stale_features_count; if stale_detail.is_some() { - out.push_str("NEXT STEP: Graph is stale. Call update_rpg to sync with code changes, then lift any new entities.\n"); - } else if remaining >= crate::LARGE_SCOPE_ENTITIES { + out.push_str("NEXT STEP: Graph is stale. Call update_rpg to sync with code changes, then lift any new or modified entities.\n"); + } else if work_remaining >= crate::LARGE_SCOPE_ENTITIES { // Large repo — recommend delegating the mechanical loop so the - // caller doesn't exhaust its own context. Keep NEXT STEP on one - // line; follow-up detail is in labeled blocks below. + // caller doesn't exhaust its own context. Give the dispatch + // pattern *directly* here rather than bouncing the caller through + // get_entities_for_lifting first (which would burn batch-0's + // source payload in the caller's context, the exact thing we're + // trying to avoid). // // Note: `remaining` is the raw unlifted count *before* auto-lift // runs (which happens inside get_entities_for_lifting). Auto-lift // shrinks the LLM-needed set considerably for repos with many - // trivial entities (getters, setters, constructors). The dispatch - // hint here is therefore conditional — get_entities_for_lifting - // batch 0 emits a confirming NOTE only when ≥10 LLM batches - // actually queue up. If the agent calls it and sees no NOTE, - // the work is small enough to lift directly. + // trivial entities (getters, setters, constructors). The agent + // can skip the dispatch if, once the worker calls + // get_entities_for_lifting batch 0, no delegation NOTE appears — + // in that case the queue is small enough to lift in one context. let batch_tokens = self.config.read().await.encoding.max_batch_tokens; + let workload_desc = if remaining > 0 && stale_features_count > 0 { + format!( + "{} unlifted + {} stale = {} entities", + remaining, stale_features_count, work_remaining, + ) + } else if stale_features_count > 0 { + format!( + "{} stale entities to re-lift (modified since last lift)", + stale_features_count, + ) + } else { + format!("{} entities unlifted", remaining) + }; out.push_str(&format!( - "NEXT STEP: Likely-large lifting workload — {} entities remain (auto-lift may reduce this). Call get_entities_for_lifting(scope=\"*\") next; if its batch-0 response includes a delegation NOTE, follow the dispatch pattern below. Each remaining batch is ~{}K tokens of source.\n", - remaining, batch_tokens.div_ceil(1000), + "NEXT STEP: Likely-large lifting workload — {} (auto-lift may reduce this). Dispatch a sub-agent to run the LOOP below; do not run it in this context — each batch is ~{}K tokens of source and will exhaust caller context over many iterations.\n", + workload_desc, + batch_tokens.div_ceil(1000), )); out.push_str( - "\nLOOP (run in the delegated context):\n \ + "\nLOOP (sub-agent runs this in its own context):\n \ get_entities_for_lifting(scope=\"*\") -> analyze batch -> submit_lift_results -> repeat until DONE -> finalize_lifting\n\ \nDISPATCH:\n \ Use whatever sub-agent / cheaper-model mechanism your runtime provides. The MCP graph persists to disk after every submit, so the worker's writes survive. **After the worker returns, call `reload_rpg`** — some runtimes give sub-agents an isolated MCP session, in which case the caller's in-memory graph is stale until reloaded. (No-op if your runtime shares the MCP session.)\n\ @@ -652,11 +746,24 @@ impl RpgServer { out.push_str( "NEXT STEP: Call get_entities_for_lifting(scope=\"*\") to start lifting.\n", ); - } else if lifted < total { + } else if remaining > 0 && stale_features_count > 0 { + out.push_str(&format!( + "NEXT STEP: {} unlifted + {} stale = {} entities need LLM work. Call get_entities_for_lifting(scope=\"*\") — it returns both unlifted entities and stale ones that need re-lifting in the same batches.\n", + remaining, stale_features_count, work_remaining, + )); + } else if remaining > 0 { out.push_str(&format!( "NEXT STEP: {} entities remaining. Call get_entities_for_lifting(scope=\"*\") to continue lifting.\n", remaining, )); + } else if stale_features_count > 0 { + // All entities have features, but some features are outdated. + // The post-sync delta is what matters here — we track modified + // entities in stale_entity_ids so agents know to re-lift them. + out.push_str(&format!( + "NEXT STEP: Coverage is 100% but {} entities have stale features (source modified after lift). Call get_entities_for_lifting(scope=\"*\") to re-lift just those — it surfaces stale entities as if they were unlifted.\n", + stale_features_count, + )); } else if !graph.metadata.semantic_hierarchy { out.push_str( "NEXT STEP: All entities lifted. Call finalize_lifting, then get_files_for_synthesis + submit_file_syntheses for holistic file-level features, then build_semantic_hierarchy + submit_hierarchy.\n", diff --git a/crates/rpg-mcp/src/tools.rs b/crates/rpg-mcp/src/tools.rs index 0a029fa..efe9764 100644 --- a/crates/rpg-mcp/src/tools.rs +++ b/crates/rpg-mcp/src/tools.rs @@ -3,6 +3,8 @@ //! The `#[tool_router]` proc macro requires every `#[tool]` method to live in one //! `impl` block, so this file cannot be split further without upstream changes. +use std::collections::HashSet; + use rmcp::{handler::server::wrapper::Parameters, tool, tool_router}; use rpg_core::graph::{RPGraph, normalize_path}; use rpg_core::storage; @@ -445,9 +447,9 @@ impl RpgServer { // Reload the config from the new project — encoding.max_batch_tokens // and other project-scoped settings must follow the root or tools - // will operate with the previous project's configuration. - *self.config.write().await = - rpg_core::config::RpgConfig::load(&canonical).unwrap_or_default(); + // will operate with the previous project's configuration. Logs a + // warning if the new project has a malformed .rpg/config.toml. + Self::reload_config_with_warning(&self.config, &canonical).await; // Reset all session + sync state — everything is project-scoped *self.lifting_session.write().await = None; @@ -458,6 +460,7 @@ impl RpgServer { *self.last_auto_sync_head.write().await = None; *self.last_auto_sync_changeset.write().await = None; *self.last_auto_sync_workdir_paths.write().await = std::collections::HashSet::new(); + *self.stale_entity_ids.write().await = std::collections::HashSet::new(); #[cfg(feature = "embeddings")] { *self.embedding_index.write().await = None; @@ -1011,12 +1014,49 @@ impl RpgServer { }; if needs_rebuild { + // Snapshot stale entity IDs *before* taking graph/session locks so + // we don't deadlock on nested writes. Stale entities are ones + // whose source changed after they were lifted — their features + // still exist but are outdated, so they should be treated as + // "needs LLM work" alongside unlifted entities. + let stale_snapshot: HashSet = { + let stale = self.stale_entity_ids.read().await; + stale.iter().cloned().collect() + }; + // Lock order: graph first, then session (consistent with lifting_status) let mut guard = self.graph.write().await; let mut session = self.lifting_session.write().await; let graph = guard.as_mut().ok_or("No RPG loaded")?; - let scope = rpg_encoder::lift::resolve_scope(graph, ¶ms.scope); + let mut scope = rpg_encoder::lift::resolve_scope(graph, ¶ms.scope); + + // For the "*"/"all" scope, `resolve_scope` filters to entities + // with *no* features — which correctly captures unlifted + // entities but excludes stale ones (they still have their old + // features). Augment the scope with any tracked stale + // entities so a single get_entities_for_lifting(scope="*") + // call covers both "never lifted" and "lifted-but-outdated". + // For other scope kinds (glob, hierarchy path, id list), + // `resolve_scope` doesn't filter by lifted state, so any + // stale entity matching the scope is already present. + let params_scope_trimmed = params.scope.trim(); + if params_scope_trimmed == "*" || params_scope_trimmed.eq_ignore_ascii_case("all") { + let already: HashSet<&String> = scope.entity_ids.iter().collect(); + let to_add: Vec = stale_snapshot + .iter() + .filter(|id| !already.contains(id)) + .filter(|id| { + graph + .entities + .get(*id) + .is_some_and(|e| e.kind != rpg_core::graph::EntityKind::Module) + }) + .cloned() + .collect(); + scope.entity_ids.extend(to_add); + } + if scope.entity_ids.is_empty() { *session = None; return Ok(format!( @@ -1048,11 +1088,17 @@ impl RpgServer { let mut needs_llm = Vec::new(); let mut review_candidates: Vec<(String, Vec)> = Vec::new(); for raw in all_raw_entities { - // Skip entities that already have curated features - let already_lifted = graph - .entities - .get(&raw.id()) - .is_some_and(|e| !e.semantic_features.is_empty()); + let raw_id = raw.id(); + // Stale entities get re-lifted regardless of existing + // features — their features are known-outdated because + // the source was modified after the previous lift. + let is_stale = stale_snapshot.contains(&raw_id); + // Otherwise, skip entities that already have curated features. + let already_lifted = !is_stale + && graph + .entities + .get(&raw_id) + .is_some_and(|e| !e.semantic_features.is_empty()); if already_lifted { continue; } @@ -1435,6 +1481,15 @@ impl RpgServer { graph.refresh_metadata(); + // Re-lifted entities are no longer stale — drain them from the set + // tracked by auto-sync so lifting_status reports accurate drift. + if !resolved_features.is_empty() { + let mut stale = self.stale_entity_ids.write().await; + for id in resolved_features.keys() { + stale.remove(id); + } + } + storage::save(&self.project_root().await, graph) .map_err(|e| format!("Failed to save RPG: {}", e))?; @@ -2037,11 +2092,15 @@ impl RpgServer { )] async fn reload_rpg(&self) -> Result { let project_root = self.project_root().await; - // Refresh config from disk too — if the user edited .rpg/config.toml - // or the lifter wrote new settings, pick them up here so subsequent - // tool calls operate against current values. - *self.config.write().await = - rpg_core::config::RpgConfig::load(&project_root).unwrap_or_default(); + // Refresh config from disk — if the user edited .rpg/config.toml + // or the lifter wrote new settings, pick them up here. Logs a + // warning if the file exists but failed to parse, then keeps the + // existing config (don't clobber a working in-memory config over + // a temporarily broken edit). + Self::reload_config_with_warning(&self.config, &project_root).await; + // Reset drift tracking — the on-disk graph might already reflect + // submissions from an external CLI run. + *self.stale_entity_ids.write().await = std::collections::HashSet::new(); match storage::load(&project_root) { Ok(g) => { let entities = g.metadata.total_entities; From 5f755755774b6edc41a761625d3452f4ad43791b Mon Sep 17 00:00:00 2001 From: userFRM Date: Tue, 14 Apr 2026 16:16:42 +0200 Subject: [PATCH 10/19] fix: drain stale_entity_ids when auto-lift re-writes a stale entity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to round 5 stale-tracking work. The auto-lift path inside get_entities_for_lifting writes features directly to the graph and persists them there — it never round-trips through submit_lift_results, so the drain logic added in round 5 never fired for auto-lifted stale entities. Effect: a stale entity that matched a high-confidence auto-lift pattern would get correctly re-lifted (fresh features on disk) but stay pinned in stale_entity_ids forever, inflating the "stale_features" count in lifting_status. Fix: track auto-relifted stale IDs in a local vec as each raw entity is processed, and drain them from stale_entity_ids right after the graph save. Mirrors the drain that submit_lift_results already does for the LLM path. Also canonicalizes a small pre-existing inconsistency: `raw.id()` was being called twice (once for the skip check, once inside match arms) — hoisted to `raw_id` to match the new stale lookup and save a clone. Co-Authored-By: Claude Opus 4.6 (1M context) --- crates/rpg-mcp/src/tools.rs | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/crates/rpg-mcp/src/tools.rs b/crates/rpg-mcp/src/tools.rs index efe9764..c239107 100644 --- a/crates/rpg-mcp/src/tools.rs +++ b/crates/rpg-mcp/src/tools.rs @@ -1087,6 +1087,13 @@ impl RpgServer { let mut auto_lifted = 0usize; let mut needs_llm = Vec::new(); let mut review_candidates: Vec<(String, Vec)> = Vec::new(); + // Track which stale entities got re-lifted (either via auto-lift + // above or routed into needs_llm below) so we can drain them + // from `stale_entity_ids` once they're written. Auto-lift + // persists its features directly inside this function, so it + // must drain the set itself; needs_llm entities are drained + // in `submit_lift_results` after the caller submits features. + let mut auto_relifted_stale: Vec = Vec::new(); for raw in all_raw_entities { let raw_id = raw.id(); // Stale entities get re-lifted regardless of existing @@ -1105,21 +1112,26 @@ impl RpgServer { match engine.try_lift_with_confidence(&raw) { Some((features, rpg_encoder::lift::LiftConfidence::Accept)) => { // High confidence — apply features directly - if let Some(entity) = graph.entities.get_mut(&raw.id()) { + if let Some(entity) = graph.entities.get_mut(&raw_id) { entity.semantic_features = features; entity.feature_source = Some("auto".to_string()); auto_lifted += 1; + if is_stale { + auto_relifted_stale.push(raw_id.clone()); + } } } Some((features, rpg_encoder::lift::LiftConfidence::Review)) => { // Medium confidence — apply features but flag for review - let eid = raw.id(); - if let Some(entity) = graph.entities.get_mut(&eid) { + if let Some(entity) = graph.entities.get_mut(&raw_id) { entity.semantic_features = features.clone(); entity.feature_source = Some("auto".to_string()); auto_lifted += 1; + if is_stale { + auto_relifted_stale.push(raw_id.clone()); + } } - review_candidates.push((eid, features)); + review_candidates.push((raw_id, features)); } Some((_, rpg_encoder::lift::LiftConfidence::Reject)) | None => { needs_llm.push(raw); @@ -1135,6 +1147,17 @@ impl RpgServer { } } + // Drain stale tracking for entities auto-lifter just wrote + // fresh features for. Without this, lifting_status would keep + // counting them as stale forever because the auto-lift path + // skips submit_lift_results entirely. + if !auto_relifted_stale.is_empty() { + let mut stale = self.stale_entity_ids.write().await; + for id in &auto_relifted_stale { + stale.remove(id); + } + } + if needs_llm.is_empty() { *session = None; let (lifted, total) = graph.lifting_coverage(); From 295de14d9d75b95a8de02f96cdd056b54aad5961 Mon Sep 17 00:00:00 2001 From: userFRM Date: Tue, 14 Apr 2026 16:33:44 +0200 Subject: [PATCH 11/19] fix: stale-tracking coverage gaps + CHANGELOG format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Code fixes: - update_rpg now feeds summary.modified_entity_ids into stale_entity_ids so its "needs_relift: N" reply aligns with what lifting_status and get_entities_for_lifting(scope="*") report. Previously those values were reported but the set was never populated from the manual update path — only auto_sync_if_stale touched it. - submit_lift_results NEXT/DONE logic now counts unlifted + stale remaining. A stale-only re-lift loop (coverage already 100%) would otherwise see "DONE" after batch 1 while later batches were still queued. - reload_rpg now clears stale_entity_ids only on the success path after storage::load returns Ok. A transient read error no longer erases the drift backlog while leaving the previous graph in memory. - CLI fallback guidance ("rpg-encoder lift --provider ...") is gated to cases with actual unlifted entities. The CLI's resolve_scope filters to entities with no features, so a stale-only backlog is a no-op for the CLI — surfacing it there was a dead-end recipe. Added a note explaining the limitation in server.rs, server_instructions.md, and README. CHANGELOG: - Collapsed v0.8.3 into the three standard Keep a Changelog buckets (Added, Changed, Fixed). Dropped subjective/process-revealing subheadings like "Fixed (Codex round 5 review)" and "Tool-preference guidance (RPG first, grep second)" — those expose the internal review cycle rather than describing what users see. Verified with cargo fmt --all, cargo clippy --workspace --all-targets -- -D warnings, cargo test --workspace --lib, cargo test -p rpg-mcp. Co-Authored-By: Claude Opus 4.6 (1M context) --- CHANGELOG.md | 273 +++++++----------- README.md | 2 +- .../src/prompts/server_instructions.md | 7 +- crates/rpg-mcp/src/server.rs | 15 +- crates/rpg-mcp/src/tools.rs | 59 +++- 5 files changed, 173 insertions(+), 183 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 604b8c5..47d3323 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,173 +7,118 @@ and this project adheres to [Semantic Versioning](https://semver.org/). ## [0.8.3] - 2026-04-14 -### Fixed (Codex round 5 review) - -- **`lifting_status` could not see stale-feature drift.** Coverage reached - 100% after sync but entities with modified sources still had outdated - features; the dashboard reported "Graph is complete" and sent callers off - to search. Added a persistent `stale_entity_ids: Arc>` on - `RpgServer` populated from `summary.modified_entity_ids` on every - `auto_sync_if_stale`, drained by `submit_lift_results` as entities get - re-lifted, and cleared on `reload_rpg` / `set_project_root`. Surfaced in - the header as `stale_features: N entities modified since last lift` and - factored into the NEXT STEP state machine. -- **`get_entities_for_lifting(scope="*")` silently skipped stale entities.** - `resolve_scope` filters to entities with no features, and the auto-lift - skip check short-circuits anything that already has features — so a - caller following the "re-lift stale entities" recipe would get back zero - entities even though the dashboard said N needed work. Now the server - snapshots `stale_entity_ids` before taking graph locks, augments the - resolved scope with stale entities when scope is `*`/`all`, and routes - stale entities into `needs_llm` regardless of existing features. -- **Circular delegation flow from `lifting_status`.** The large-scope NEXT - STEP sent callers to `get_entities_for_lifting(scope="*")` first, whose - batch-0 response then said "dispatch a sub-agent" — burning the caller's - context on a batch payload before they ever saw the dispatch - recommendation. Now `lifting_status` emits the LOOP/DISPATCH/FALLBACK - blocks directly, so the caller delegates without first loading a batch of - source. -- **NEXT STEP only branched on `remaining` (unlifted count).** When - `total - lifted == 0` but `stale_features_count > 0`, the old logic - skipped straight to hierarchy or "graph is complete" and never prompted - re-lift. The state machine now considers `remaining + stale_features` - together — dedicated branches cover "unlifted + stale mixed", "stale - only", and the large-scope threshold compares against the combined total. -- **Silent config load swallow.** `set_project_root` and `reload_rpg` used - `unwrap_or_default()` on config loads, collapsing "missing config file" - (expected) and "malformed TOML" (user error) into identical behavior. Added - `reload_config_with_warning` helper that logs a stderr warning when parse - fails so misconfigurations surface instead of silently reverting to defaults. - -### Fixed (Codex round 4 review) - -- **Auto-sync notice mislabelled the unlifted count.** `total - lifted` - is the global backlog, not the per-update delta. Saying "N new entities - unlifted" against that number meant a one-line edit on a partially-lifted - repo could claim "50 new entities unlifted" when only 1 was actually new. - Now reports the per-update delta separately (`+N added unlifted, ~M stale`) - and notes any pre-existing backlog as `(+P pre-existing)`. -- **`finalize_lifting` guidance was wrong for scoped fallback.** The - no-dispatch fallback said to call `finalize_lifting` after each scoped - subtree. But `finalize_lifting` auto-routes pending entities and locks - the hierarchy — calling it mid-flow against incomplete signals would - bake in bad routing decisions. Corrected to "call finalize_lifting ONCE - at the very end after all scopes complete" across server.rs and - server_instructions.md. -- **Heuristic divergence between `LARGE_SCOPE_ENTITIES` and - `LARGE_SCOPE_BATCHES` is now documented explicitly.** Both constants - carry doc comments explaining that the batch-0 NOTE in - `get_entities_for_lifting` is the authoritative dispatch decision; the - dashboard `lifting_status` is a heuristic gate that defers to the NOTE - when they disagree. - -### Added (drift maintenance — re-lift after code changes) - -- **Auto-sync notice now actively recommends re-lift** when entities have - drifted. Previous notice was informational (`"; N need lifting"`) and - agents treated it as such — they read the count and moved on. New notice - separates new-entity drift from stale-feature drift and includes a verb - ("call lifting_status to refresh" or, at large scale, "...for re-lift - dispatch"). The change targets the specific failure mode where an agent - commits new code, sees the drift count, and continues to the next user - request without re-lifting — leaving semantic search incomplete for - downstream sessions. -- **New "DRIFT MAINTENANCE" section in `server_instructions.md`** explains - the three notice variants (`new entities unlifted`, `stale features`, - both) and frames re-lift as part of "definition of done" for any task - that wrote code — the same way running tests is. -- **`submit_lift_results` NEXT action is now scale-aware**: when the - remaining count after a batch ≥ `LARGE_SCOPE_ENTITIES`, it points the - caller back at `lifting_status` for the dispatch recommendation rather - than encouraging another foreground batch. - -### Fixed (Codex round 2 review) - -- **CLI fallback (`rpg-encoder lift --provider ...`) leaves the MCP server - on a stale in-memory graph.** All four surfaces that mention the CLI - fallback (server.rs lifting_status block, server_instructions.md, - README, .gemini commands) now explicitly say "after the CLI finishes, - call `reload_rpg` in this session" so the server picks up the new - `.rpg/graph.json` from disk. -- **`set_project_root` failed to refresh `self.config`.** When switching - to a project with a different `.rpg/config.toml`, the server kept - serving the previous project's `encoding.max_batch_tokens` and other - settings. Now reloads `RpgConfig` from the new root atomically with the - root swap. -- **`lifting_status` large-scope NEXT STEP could promise delegation work - the auto-lifter would actually finish locally.** The check uses raw - `remaining` (pre-auto-lift), so on repos with many trivial entities - (getters, setters, constructors) the dashboard could recommend a worker - for ~0 LLM calls. Reworded to "likely-large workload — call - get_entities_for_lifting next; if its batch-0 response includes the - delegation NOTE, follow the dispatch pattern below". The batch-0 NOTE - is the authoritative signal because it sees the post-auto-lift queue. -- **"`rpg_info` says 'No RPG found'" was wrong.** `rpg_info` returns a - tool error, not a friendly status string. Changed to "any RPG tool - returns 'No RPG found'". +### Added + +- `lifting_status` tracks stale-feature drift across calls. A persistent + per-server set records entities whose source was modified after they + were lifted; the dashboard reports `stale_features: N entities modified + since last lift` and the NEXT STEP state machine prompts re-lift even + when coverage is 100%. +- `get_entities_for_lifting(scope="*")` now returns stale entities + alongside unlifted ones, so a single call covers both "never lifted" + and "lifted-but-outdated" work. Stale entities bypass the auto-lift + skip check and flow through the normal LLM batch loop. +- `lifting_status` emits a sub-agent dispatch recommendation when the + remaining work (unlifted + stale) is ≥100 entities. The response + contains `LOOP` / `DISPATCH` / `FALLBACK` blocks so callers delegate + directly without first loading a batch of source into their own context. +- `get_entities_for_lifting` batch-0 emits a one-line dispatch hint when + ≥10 token-aware batches are queued, pointing back to `lifting_status` + for the full recommendation. +- `submit_lift_results` NEXT action becomes scale-aware — when remaining + work ≥100 entities, it redirects the caller to `lifting_status` instead + of encouraging another foreground batch. +- Auto-sync notice now prescribes a verb. It distinguishes per-update + delta from pre-existing backlog and separates new-entity drift from + stale-feature drift, so an agent that commits code and sees the notice + is told to re-lift rather than informed of a count. +- `server_instructions.md`: new `USE RPG FIRST` top section with a + mapping table from shell patterns (`grep -r`, `cat`, `find`, `wc -l`, + chained greps) to the RPG tool that replaces them. +- `server_instructions.md`: new `DRIFT MAINTENANCE` section explaining + the three auto-sync notice variants and framing re-lift as part of + definition-of-done for any task that wrote code. +- Tool descriptions for `search_node`, `fetch_node`, `explore_rpg`, + `rpg_info`, `semantic_snapshot`, `context_pack`, `impact_radius`, + `plan_change`, `analyze_health`, `detect_cycles`, `find_paths`, and + `slice_between` now open with a `PREFER THIS OVER ...` marker naming + the shell command or workflow they replace. +- `.claude/skills/rpg/SKILL.md` and `README.md` carry the same RPG-first + mapping table as the server prompt. +- `reload_config_with_warning` helper (`server.rs`) distinguishes missing + `.rpg/config.toml` (silent default) from malformed TOML (stderr warning, + keeps previous in-memory config). +- Crate-visible `LARGE_SCOPE_ENTITIES` (100) and `LARGE_SCOPE_BATCHES` (10) + constants replace duplicated magic numbers. Doc comments describe the + heuristic-vs-authoritative relationship. ### Changed -- **`lifting_status` NEXT STEP now recommends sub-agent dispatch when the - remaining entity count is ≥`LARGE_SCOPE_ENTITIES` (100).** Previously the - guidance was to call `get_entities_for_lifting(scope="*")` directly in - the foreground regardless of size, which silently consumed the caller's - context window — on a 1500-entity repo, ~150 batches of source code - before any real user work could begin. -- **`NEXT STEP:` is still a single parseable line.** Dispatch detail is - emitted in labeled blocks (`LOOP:`, `DISPATCH:`, `FALLBACK:`) immediately - below, so existing line-based consumers continue to work. -- **Batch-size estimates are read from the live `max_batch_tokens` config** - (default 8000) instead of the previous hard-coded `~12K` figure. The - total-token estimate scales correctly when the user overrides the budget. -- **Guidance is runtime-neutral.** No specific runtime's dispatch syntax - appears in the core response — callers use whatever sub-agent or - cheaper-model mechanism their runtime exposes. Two fallbacks are listed - explicitly: scoped lifting (`get_entities_for_lifting(scope="src/area/**")`) - for callers with no delegation mechanism and no API key, and - `rpg-encoder lift --provider anthropic|openai` for callers with an API key - and no sub-agent support. -- **`get_entities_for_lifting` batch-0 response** includes a one-line - dispatch hint when ≥`LARGE_SCOPE_BATCHES` (10) batches are queued, - directing the caller back to `lifting_status` for the full delegation - recommendation. Kept deliberately short so it doesn't duplicate detail. -- `server_instructions.md` LIFTING FLOW sub-section rewritten and shortened - (net ≤30 tokens over pre-PR baseline for that sub-section). Note: the - separate "USE RPG FIRST" section below adds ~500 tokens to the handshake - prompt — the intended intervention. -- New workspace-visible constants `LARGE_SCOPE_ENTITIES` and - `LARGE_SCOPE_BATCHES` in `rpg-mcp` replace duplicated magic numbers - across server.rs and tools.rs. - -### Documentation - -- README delegation guidance: added a short paragraph explaining the - large-repo path (sub-agent dispatch or CLI autonomous lift) so users - aren't surprised when `lifting_status` starts returning delegation - recommendations instead of direct lift instructions. - -### Tool-preference guidance (RPG first, grep second) - -Agents consistently fall back to `grep`/`cat`/`Read` for codebase questions -even when RPG is installed, because the server's prompt and tool descriptions -didn't actively counter the training default. - -- **`server_instructions.md`**: new top section "USE RPG FIRST — BEFORE - grep / cat / Read / find" with a concrete mapping table listing each - shell pattern and the RPG tool that replaces it. Placed before the - LIFTING FLOW so the directive is read before any workflow detail. -- **Tool descriptions**: each of `search_node`, `fetch_node`, `explore_rpg`, - `rpg_info`, `semantic_snapshot`, `context_pack`, `impact_radius`, - `plan_change`, `analyze_health`, `detect_cycles`, `find_paths`, - `slice_between` now opens with a "PREFER THIS OVER ..." marker naming - the shell command or workflow it replaces. Description tokens are - what agents weigh when choosing a tool — making the displacement - explicit is the intervention that sticks. -- **`.claude/skills/rpg/SKILL.md`**: same mapping table scoped to the - CLI surface, plus a note pointing to MCP tools when available. -- **README**: dedicated "Use RPG before grep/cat/find" section with - the same mapping table as the server prompt — so human readers see - the same positioning the agent does. +- `lifting_status` NEXT STEP is runtime-neutral. No specific runtime's + dispatch syntax appears in the response; callers use whatever sub-agent + or cheaper-model mechanism their runtime exposes. Explicit fallbacks: + scoped lifting for callers with no delegation mechanism, and + `rpg-encoder lift --provider anthropic|openai` for callers with an API + key and no sub-agent support. +- Batch-size estimates in NEXT STEP messages read from the live + `encoding.max_batch_tokens` config instead of a hard-coded `~12K` + figure, so the estimate scales when the user overrides the budget. +- `NEXT STEP:` remains a single parseable line; dispatch detail is + emitted in labeled blocks immediately below. +- `server_instructions.md` LIFTING FLOW sub-section rewritten and + shortened. +- `update_rpg` now feeds `summary.modified_entity_ids` into the + stale-tracking set so its `needs_relift: N` reply aligns with what + `lifting_status` and `get_entities_for_lifting(scope="*")` report. +- `reload_rpg` clears the stale-tracking set only after the graph reload + succeeds, so a transient read error no longer wipes the drift backlog + while leaving the previous graph in memory. +- `set_project_root` resets the stale-tracking set as part of the + project-scoped state reset. +- CLI fallback in large-scope guidance is gated to cases with actual + unlifted work, with a note that `rpg-encoder lift` does not re-lift + stale entities (it filters to entities with no features). + +### Fixed + +- `lifting_status` previously reported `Graph is complete` as soon as + every entity had some features, ignoring stale features from modified + sources. The state machine now considers `remaining + stale_features` + combined. +- `get_entities_for_lifting(scope="*")` previously returned zero entities + when all drift was stale (features present, sources modified) because + `resolve_scope` filters to entities with no features. +- Auto-sync notice previously conflated per-update delta with global + backlog, so a one-line edit on a partially-lifted repo could claim + "50 new entities unlifted" when only 1 was actually new. +- `finalize_lifting` fallback guidance previously said to call it after + each scoped subtree. That auto-routes pending entities against + incomplete signals and locks the hierarchy early. Guidance now says + to call `finalize_lifting` once after all scopes complete. +- `rpg-encoder lift --provider ...` (CLI fallback) left the MCP server + on a stale in-memory graph. All docs that mention the CLI fallback + now specify that the caller must call `reload_rpg` afterward. +- `set_project_root` and `reload_rpg` previously used + `unwrap_or_default()` on config loads, collapsing missing-config and + malformed-TOML into identical silent behavior. Malformed TOML now + surfaces a stderr warning and leaves the in-memory config untouched. +- `set_project_root` failed to refresh `self.config` on project switch; + the server kept serving the previous project's encoding settings. +- `lifting_status` large-scope recommendation previously ran off raw + unlifted count, before auto-lift had reduced the set. On repos full + of trivial entities (getters, setters, constructors) it could + recommend delegation for ~0 LLM calls. The large-scope branch now + hedges — it signals likely-large and defers the authoritative check + to the post-auto-lift batch count in `get_entities_for_lifting`. +- `rpg_info` error wording ("No RPG found") was miscited as a friendly + status string; corrected to "any RPG tool returns 'No RPG found'". +- `submit_lift_results` previously emitted `DONE` as soon as coverage + reached 100%, which could terminate a stale-only re-lift loop after + batch 1 while later batches were still queued. The NEXT/DONE branch + now counts unlifted + stale remaining. +- Auto-lifted features for entities that were previously flagged stale + now drain the stale-tracking set, so the count doesn't inflate when + the auto-lifter writes fresh features directly. ## [0.8.2] - 2026-04-14 diff --git a/README.md b/README.md index ce72302..4823aa3 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ Then open any repo and tell your agent: Your agent handles everything: indexes entities (seconds), reads each function and adds intent-level features (a few minutes), organizes them into a semantic hierarchy, and commits `.rpg/graph.json` for your team. -For repos with ~100+ entities, `lifting_status` will tell your agent to delegate the lifting loop to a sub-agent or a cheaper model — feature extraction is pattern-matching, not novel reasoning. If your runtime has no sub-agent mechanism, run `rpg-encoder lift --provider anthropic|openai` from the terminal with an API key — the CLI drives an external LLM directly with no agent involvement. After the CLI finishes, call `reload_rpg` in your session to load the updated graph. +For repos with ~100+ entities, `lifting_status` will tell your agent to delegate the lifting loop to a sub-agent or a cheaper model — feature extraction is pattern-matching, not novel reasoning. If your runtime has no sub-agent mechanism, run `rpg-encoder lift --provider anthropic|openai` from the terminal with an API key — the CLI drives an external LLM directly with no agent involvement. After the CLI finishes, call `reload_rpg` in your session to load the updated graph. The CLI lifts entities with no features; re-lifting stale entities (features present but outdated after code changes) is handled by the in-session MCP flow, not the CLI. Once lifted, try: diff --git a/crates/rpg-mcp/src/prompts/server_instructions.md b/crates/rpg-mcp/src/prompts/server_instructions.md index 8e66cc4..4610b25 100644 --- a/crates/rpg-mcp/src/prompts/server_instructions.md +++ b/crates/rpg-mcp/src/prompts/server_instructions.md @@ -166,10 +166,13 @@ Fallbacks when no delegation mechanism is available: `finalize_lifting` ONCE after all scopes are complete — calling it mid-flow auto-routes pending entities against incomplete signals and locks the hierarchy in early. -- **CLI autonomous lift**: `rpg-encoder lift --provider anthropic|openai` uses an - external API key directly — no agent subscription involvement. **After the CLI +- **CLI autonomous lift** (unlifted entities only): `rpg-encoder lift --provider anthropic|openai` + uses an external API key directly — no agent subscription involvement. **After the CLI finishes, call `reload_rpg` in this session** so the server picks up the updated `.rpg/graph.json` — otherwise subsequent queries will still see the pre-lift state. + Note: the CLI lifts entities with no features; it does not re-lift stale entities + (features present but outdated after source edits). For stale-entity re-lifting use + the MCP loop above (sub-agent dispatch or foreground scoped lifting). After delegation returns, call `get_files_for_synthesis` + `submit_file_syntheses`, then `build_semantic_hierarchy` + `submit_hierarchy`. diff --git a/crates/rpg-mcp/src/server.rs b/crates/rpg-mcp/src/server.rs index 4739e28..4067180 100644 --- a/crates/rpg-mcp/src/server.rs +++ b/crates/rpg-mcp/src/server.rs @@ -738,10 +738,19 @@ impl RpgServer { \nDISPATCH:\n \ Use whatever sub-agent / cheaper-model mechanism your runtime provides. The MCP graph persists to disk after every submit, so the worker's writes survive. **After the worker returns, call `reload_rpg`** — some runtimes give sub-agents an isolated MCP session, in which case the caller's in-memory graph is stale until reloaded. (No-op if your runtime shares the MCP session.)\n\ \nFALLBACK (no sub-agent mechanism, no API key):\n \ - Scope the lift to one subtree at a time — e.g. get_entities_for_lifting(scope=\"src/auth/**\") — and submit features per batch within that scope. Each scoped batch fits in foreground context. Call finalize_lifting ONCE at the very end after all scopes are complete; calling it mid-flow auto-routes pending entities against incomplete signals and locks the hierarchy in early.\n\ - \nFALLBACK (no sub-agent mechanism, API key available):\n \ - Run `rpg-encoder lift --provider anthropic` (or `openai`) from the terminal — the CLI drives an external LLM directly with no agent involvement. After the CLI finishes, call `reload_rpg` in this session so the server picks up the updated graph from disk.\n", + Scope the lift to one subtree at a time — e.g. get_entities_for_lifting(scope=\"src/auth/**\") — and submit features per batch within that scope. Each scoped batch fits in foreground context. Call finalize_lifting ONCE at the very end after all scopes are complete; calling it mid-flow auto-routes pending entities against incomplete signals and locks the hierarchy in early.\n", ); + // The CLI fallback only helps when there is unlifted work. + // `rpg-encoder lift` resolves `scope="*"` to entities with no + // features, so a stale-only backlog (features present, sources + // modified) is a no-op for the CLI — surfacing it there would + // be a dead-end recipe. + if remaining > 0 { + out.push_str( + "\nFALLBACK (no sub-agent mechanism, API key available, unlifted entities only):\n \ + Run `rpg-encoder lift --provider anthropic` (or `openai`) from the terminal — the CLI drives an external LLM directly with no agent involvement. After the CLI finishes, call `reload_rpg` in this session so the server picks up the updated graph from disk. Note: the CLI lifts entities with no features; stale entities (features present but outdated) must be re-lifted via the MCP loop above.\n", + ); + } } else if lifted == 0 { out.push_str( "NEXT STEP: Call get_entities_for_lifting(scope=\"*\") to start lifting.\n", diff --git a/crates/rpg-mcp/src/tools.rs b/crates/rpg-mcp/src/tools.rs index c239107..8531003 100644 --- a/crates/rpg-mcp/src/tools.rs +++ b/crates/rpg-mcp/src/tools.rs @@ -1625,18 +1625,36 @@ impl RpgServer { // NEXT action — scale-aware so the caller doesn't burn its context // grinding through batches when delegation would cost zero of its // tokens. Mirrors the threshold used in lifting_status. - if lifted < total { - let remaining = total - lifted; - if remaining >= crate::LARGE_SCOPE_ENTITIES { - result.push_str(&format!( - "\nNEXT: {} entities still unlifted — call lifting_status for the recommended re-lift dispatch (likely a sub-agent / cheaper model in your runtime). Continue here only if no dispatch mechanism is available.", - remaining, - )); + // + // Work is "unlifted + stale" — stale entities still show as lifted + // in coverage but need re-lifting. Emitting DONE on `lifted == total` + // alone would tell a stale-only re-lift loop to stop after batch 1 + // while later batches are still queued. + let stale_remaining = { + let stale = self.stale_entity_ids.read().await; + stale + .iter() + .filter(|id| graph.entities.contains_key(*id)) + .count() + }; + let unlifted = total.saturating_sub(lifted); + let work_remaining = unlifted + stale_remaining; + if work_remaining == 0 { + result.push_str("\nDONE: all entities lifted. Call finalize_lifting to build the semantic hierarchy."); + } else if work_remaining >= crate::LARGE_SCOPE_ENTITIES { + let breakdown = if unlifted > 0 && stale_remaining > 0 { + format!("{} unlifted + {} stale", unlifted, stale_remaining) + } else if stale_remaining > 0 { + format!("{} stale", stale_remaining) } else { - result.push_str("\nNEXT: continue with get_entities_for_lifting, then call finalize_lifting when done."); - } + format!("{} unlifted", unlifted) + }; + result.push_str(&format!( + "\nNEXT: {} entities still need LLM work ({}) — call lifting_status for the recommended re-lift dispatch (likely a sub-agent / cheaper model in your runtime). Continue here only if no dispatch mechanism is available.", + work_remaining, breakdown, + )); } else { - result.push_str("\nDONE: all entities lifted. Call finalize_lifting to build the semantic hierarchy."); + result.push_str("\nNEXT: continue with get_entities_for_lifting, then call finalize_lifting when done."); } Ok(result) } @@ -2000,6 +2018,18 @@ impl RpgServer { rpg_encoder::evolution::get_head_sha(&self.project_root().await).ok(); *self.last_auto_sync_changeset.write().await = None; + // Track modified entities so lifting_status and + // get_entities_for_lifting(scope="*") surface them as re-lift work. + // Without this, the "needs_relift: N" value we report below would + // point the caller at a path that returns zero entities. + { + let mut stale = self.stale_entity_ids.write().await; + for id in &summary.modified_entity_ids { + stale.insert(id.clone()); + } + stale.retain(|id| g.entities.contains_key(id)); + } + // Clear sessions — entity list changed *self.lifting_session.write().await = None; *self.hierarchy_session.write().await = None; @@ -2121,12 +2151,15 @@ impl RpgServer { // existing config (don't clobber a working in-memory config over // a temporarily broken edit). Self::reload_config_with_warning(&self.config, &project_root).await; - // Reset drift tracking — the on-disk graph might already reflect - // submissions from an external CLI run. - *self.stale_entity_ids.write().await = std::collections::HashSet::new(); match storage::load(&project_root) { Ok(g) => { let entities = g.metadata.total_entities; + // Reset drift tracking only on success — the on-disk graph + // might already reflect submissions from an external CLI + // run, so the in-memory stale set no longer applies. On + // failure we keep the previous graph *and* its drift backlog + // so a transient read error doesn't silently erase state. + *self.stale_entity_ids.write().await = std::collections::HashSet::new(); *self.graph.write().await = Some(g); // Sync embedding index incrementally #[cfg(feature = "embeddings")] From 0cc43c310637e40ed0747eada6da413149fc3c39 Mon Sep 17 00:00:00 2001 From: userFRM Date: Tue, 14 Apr 2026 17:37:25 +0200 Subject: [PATCH 12/19] feat: nudge lifting on build_rpg + doc lock order + cache-friendly preamble MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit build_rpg: The tail of the response is now a directive NEXT STEP — "lift now, don't wait for user to ask", with scale-aware branching: small scope lifts inline, large scope dispatches a sub-agent with the LOOP pattern inline. Previously the response ended with a passive "Tip: use get_entities_for_lifting" line that agents routinely skipped, which is why the common flow in practice was "user builds RPG, asks a semantic question, gets poor results, manually asks to lift". Lock order: Added a struct-level doc comment on `RpgServer` declaring the canonical lock order (graph → session → stale → pending → auto-sync → config → embedding → project_root). Every nested-lock call site already respects this order — documenting it here so future reviewers (and Codex) don't have to re-derive it across a dozen files. Paths that acquire inner locks one at a time with release-between (the statement-per-lock pattern in set_project_root and reload_rpg) don't hold two locks simultaneously and so cannot form a cycle regardless of order. Cache-friendly preamble: `get_routing_candidates` no longer prints the graph revision hash in its header. Putting a content-hash on line 1 of the response means the cache fingerprint changes on every graph update, invalidating the stable instructions + entity table that follow. The revision moved to the NEXT_ACTION block at the bottom where it's read back for the submit_routing_decisions call — same data, stable prefix. Verified with cargo fmt --all, cargo clippy --workspace --all-targets -- -D warnings, cargo test -p rpg-mcp. Co-Authored-By: Claude Opus 4.6 (1M context) --- CHANGELOG.md | 10 +++++++++ crates/rpg-mcp/src/server.rs | 28 ++++++++++++++++++++++++ crates/rpg-mcp/src/tools.rs | 41 +++++++++++++++++++++++++++++------- 3 files changed, 71 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 47d3323..6ca21f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/). ### Added +- `build_rpg` response now emits an action-oriented NEXT STEP directing the + agent to lift the graph immediately — small scope lifts in the current + context, large scope dispatches a sub-agent. Previously a passive + "Tip: use get_entities_for_lifting" hint that agents routinely skipped. +- Canonical lock-order invariant documented on the `RpgServer` struct so + reviewers don't have to re-derive it from scattered call sites. - `lifting_status` tracks stale-feature drift across calls. A persistent per-server set records entities whose source was modified after they were lifted; the dashboard reports `stale_features: N entities modified @@ -67,6 +73,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/). emitted in labeled blocks immediately below. - `server_instructions.md` LIFTING FLOW sub-section rewritten and shortened. +- `get_routing_candidates` response header no longer includes the graph + revision hash — it moved to the NEXT_ACTION block at the bottom. Keeps + the stable preamble (instructions + entity table) cache-eligible while + still surfacing the revision where the agent needs to read it back. - `update_rpg` now feeds `summary.modified_entity_ids` into the stale-tracking set so its `needs_relift: N` reply aligns with what `lifting_status` and `get_entities_for_lifting(scope="*")` report. diff --git a/crates/rpg-mcp/src/server.rs b/crates/rpg-mcp/src/server.rs index 4067180..c2cf4dc 100644 --- a/crates/rpg-mcp/src/server.rs +++ b/crates/rpg-mcp/src/server.rs @@ -35,6 +35,34 @@ impl PromptVersions { } /// The RPG MCP server state. +/// +/// # Lock order invariant +/// +/// Several of the fields below are `Arc>`. When a code path holds +/// more than one of them at the same time, locks must be acquired in the +/// following order (outermost first): +/// +/// 1. `graph` +/// 2. `lifting_session` / `hierarchy_session` +/// 3. `stale_entity_ids` +/// 4. `pending_routing` +/// 5. `last_auto_sync_head` / `last_auto_sync_changeset` / `last_auto_sync_workdir_paths` +/// 6. `config` +/// 7. `embedding_index` +/// 8. `project_root_cell` +/// +/// Paths that touch only one lock at a time are unaffected. Paths that +/// acquire several locks but release each before acquiring the next +/// (statement-per-lock pattern in `set_project_root` and `reload_rpg`) +/// are also unaffected — at no moment do they hold two locks, so no +/// cycle can form. +/// +/// The invariant is needed because tokio's `RwLock` is not re-entrant +/// and writers block readers while waiting: two tasks that each hold +/// one inner lock and wait for the other's outer lock would deadlock. +/// Keeping `graph` as the outermost held lock everywhere ensures that +/// any two nested paths serialize through `graph` and never form a +/// cycle on the inner locks. #[derive(Clone)] pub(crate) struct RpgServer { /// Active project root. Mutable at runtime via the `set_project_root` tool diff --git a/crates/rpg-mcp/src/tools.rs b/crates/rpg-mcp/src/tools.rs index 8531003..8e444e7 100644 --- a/crates/rpg-mcp/src/tools.rs +++ b/crates/rpg-mcp/src/tools.rs @@ -787,15 +787,35 @@ impl RpgServer { stats.orphaned, stats.new_entities, )); - } else { - result.push_str( - "\nTip: use get_entities_for_lifting + submit_lift_results to add semantic features.", - ); } - } else { + } + + // NEXT STEP — action-oriented, scale-aware. A build_rpg response is + // usually the first RPG tool call in a session, so the agent reads + // this before asking the user what to do next. A weak "tip" gets + // ignored; a directive with sizing guidance gets followed. Semantic + // tools (search_node, context_pack, plan_change) are lossy on an + // unlifted graph — users notice this as "search doesn't find the + // thing I know is there" — so the default is "lift now". + let unlifted = meta.total_entities.saturating_sub(meta.lifted_entities); + if unlifted == 0 { result.push_str( - "\nTip: use get_entities_for_lifting + submit_lift_results to add semantic features.", + "\n\nNEXT STEP: Graph is fully lifted. Semantic tools (search_node, context_pack, plan_change, explore_rpg) are ready — prefer them over grep/cat/find for any structural question.", ); + } else if unlifted >= crate::LARGE_SCOPE_ENTITIES { + let batch_tokens = self.config.read().await.encoding.max_batch_tokens; + result.push_str(&format!( + "\n\nNEXT STEP: {} entities unlifted (of {}). Dispatch a sub-agent now to run the lift loop — don't wait for the user to ask. Each batch is ~{}K tokens of source, so running the loop here would exhaust caller context before any real work begins.\n\ + \nLOOP (sub-agent runs this in its own context):\n \ + get_entities_for_lifting(scope=\"*\") -> analyze batch -> submit_lift_results -> repeat until DONE -> finalize_lifting\n\ + \nAfter the worker returns, call reload_rpg — some runtimes give sub-agents an isolated MCP session, in which case the caller's in-memory graph is stale until reloaded. Call lifting_status for per-state recommendations at any time.", + unlifted, meta.total_entities, batch_tokens.div_ceil(1000), + )); + } else { + result.push_str(&format!( + "\n\nNEXT STEP: {} entities unlifted (of {}). Lift now — don't wait for the user to ask; semantic search/fetch won't find unlifted entities by intent. Call get_entities_for_lifting(scope=\"*\"), analyze the batch, submit via submit_lift_results, repeat until DONE, then finalize_lifting.", + unlifted, meta.total_entities, + )); } Ok(result) } @@ -1693,9 +1713,14 @@ impl RpgServer { let batch = &pending[start..end]; + // Header kept free of the revision hash so the response prefix stays + // stable across graph updates — the LLM prompt cache can then retain + // the instructions + entity list even as the revision changes. The + // revision itself is emitted below the data, near the NEXT_ACTION + // block where the agent actually needs to read it. let mut result = format!( - "## ROUTING CANDIDATES (batch {} of {}, revision: {})\n\n", - batch_index, total_batches, revision, + "## ROUTING CANDIDATES (batch {} of {})\n\n", + batch_index, total_batches, ); // Include routing instructions on batch 0 From a4b84af2f4e83a2c9e942432866b0a2133472d2a Mon Sep 17 00:00:00 2001 From: userFRM Date: Tue, 14 Apr 2026 17:45:23 +0200 Subject: [PATCH 13/19] fix: deadlock cycle in build_semantic_hierarchy + module-count mismatch in build_rpg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two issues caught by Codex: Deadlock cycle (HIGH): build_semantic_hierarchy's sharded init path took hierarchy_session.write() first, then acquired graph.read() to compute clusters. update_rpg takes graph.write() first, then hierarchy_session.write(). Concurrent schedule: A holds session, waits on graph; B holds graph, waits on session. Real cycle, contradicted the lock-order invariant I had just declared on RpgServer. Fix: compute clusters under graph.read() first (no session held), then acquire hierarchy_session.write() and install — with a racing-initializer check so a second concurrent caller keeps the first caller's clusters instead of clobbering them. Module-count mismatch (MEDIUM): build_rpg's NEXT STEP drove its "unlifted of total" count and LARGE_SCOPE delegation threshold from meta.total_entities / meta.lifted_entities, both of which include Module entities. But lifting_coverage() and get_entities_for_lifting exclude modules (they get features via file-level synthesis, not direct lifting). On a codebase with many Module entities, build_rpg would report ~100 more "unlifted" than lifting_status, and could recommend sub-agent dispatch when lifting_status would still recommend foreground lifting for the same graph. Fix: capture lifting_coverage() before the graph moves into self.graph, use it for both the "lifted: X/Y" header line and the NEXT STEP count. Added a note "(excludes modules, ...)" so the number difference between the entities count and lifted/total is explicit. Verified with cargo fmt, cargo clippy --workspace --all-targets -- -D warnings, cargo test --workspace --lib, cargo test -p rpg-mcp. Co-Authored-By: Claude Opus 4.6 (1M context) --- CHANGELOG.md | 12 +++++++ crates/rpg-mcp/src/tools.rs | 71 +++++++++++++++++++++++++------------ 2 files changed, 60 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6ca21f9..e69dbb3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/). "Tip: use get_entities_for_lifting" hint that agents routinely skipped. - Canonical lock-order invariant documented on the `RpgServer` struct so reviewers don't have to re-derive it from scattered call sites. +- `build_semantic_hierarchy` sharded init now takes `graph.read()` to + compute clusters *before* acquiring `hierarchy_session.write()`, + respecting the declared graph-before-session order. Previously the + sharded path acquired `hierarchy_session.write()` first and then + `graph.read()`, which formed a deadlock cycle with `update_rpg`'s + graph-then-session order under concurrent scheduling. - `lifting_status` tracks stale-feature drift across calls. A persistent per-server set records entities whose source was modified after they were lifted; the dashboard reports `stale_features: N entities modified @@ -122,6 +128,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/). to the post-auto-lift batch count in `get_entities_for_lifting`. - `rpg_info` error wording ("No RPG found") was miscited as a friendly status string; corrected to "any RPG tool returns 'No RPG found'". +- `build_rpg` NEXT STEP previously counted `Module` entities in its + "unlifted" total, while `lifting_status` and `get_entities_for_lifting` + exclude them. The two could disagree by hundreds of entities on large + codebases, tripping the delegation threshold in `build_rpg` when + `lifting_status` would still recommend foreground lifting. Both paths + now use `lifting_coverage()` (non-module) for the count. - `submit_lift_results` previously emitted `DONE` as soon as coverage reached 100%, which could terminate a stale-only re-lift loop after batch 1 while later batches were still queued. The NEXT/DONE branch diff --git a/crates/rpg-mcp/src/tools.rs b/crates/rpg-mcp/src/tools.rs index 8e444e7..1339010 100644 --- a/crates/rpg-mcp/src/tools.rs +++ b/crates/rpg-mcp/src/tools.rs @@ -700,6 +700,14 @@ impl RpgServer { storage::save(project_root, &graph).map_err(|e| format!("Failed to save RPG: {}", e))?; let _ = storage::ensure_gitignore(project_root); + // Capture lifting coverage BEFORE the graph moves into `self.graph`. + // `lifting_coverage()` excludes `Module` entities (they get features + // via file-level synthesis, not direct lifting), which matches the + // semantics of `get_entities_for_lifting`. `meta.total_entities` + // includes modules, so it would inflate the "unlifted" count and + // trip the delegation threshold too early. + let (lifted_non_module, total_non_module) = graph.lifting_coverage(); + // Update in-memory state let meta = graph.metadata.clone(); *self.graph.write().await = Some(graph); @@ -743,6 +751,11 @@ impl RpgServer { } else { "structural" }; + // `lifted: X/Y` uses non-module counts (matches `lifting_status` and + // `get_entities_for_lifting`) so the numbers agents see here line + // up with the numbers they see elsewhere. The `entities: N` line + // above is the raw total including modules — those get features + // via file-level synthesis, not direct lifting. let mut result = format!( "RPG built successfully.\n\ languages: {}\n\ @@ -751,7 +764,7 @@ impl RpgServer { functional_areas: {}\n\ dependency_edges: {}\n\ containment_edges: {}\n\ - lifted: {}/{}\n\ + lifted: {}/{} (excludes modules, which are aggregated from files)\n\ hierarchy: {}", lang_display, meta.total_entities, @@ -759,8 +772,8 @@ impl RpgServer { meta.functional_areas, meta.dependency_edges, meta.containment_edges, - meta.lifted_entities, - meta.total_entities, + lifted_non_module, + total_non_module, hierarchy_label, ); @@ -797,7 +810,7 @@ impl RpgServer { // tools (search_node, context_pack, plan_change) are lossy on an // unlifted graph — users notice this as "search doesn't find the // thing I know is there" — so the default is "lift now". - let unlifted = meta.total_entities.saturating_sub(meta.lifted_entities); + let unlifted = total_non_module.saturating_sub(lifted_non_module); if unlifted == 0 { result.push_str( "\n\nNEXT STEP: Graph is fully lifted. Semantic tools (search_node, context_pack, plan_change, explore_rpg) are ready — prefer them over grep/cat/find for any structural question.", @@ -809,12 +822,12 @@ impl RpgServer { \nLOOP (sub-agent runs this in its own context):\n \ get_entities_for_lifting(scope=\"*\") -> analyze batch -> submit_lift_results -> repeat until DONE -> finalize_lifting\n\ \nAfter the worker returns, call reload_rpg — some runtimes give sub-agents an isolated MCP session, in which case the caller's in-memory graph is stale until reloaded. Call lifting_status for per-state recommendations at any time.", - unlifted, meta.total_entities, batch_tokens.div_ceil(1000), + unlifted, total_non_module, batch_tokens.div_ceil(1000), )); } else { result.push_str(&format!( "\n\nNEXT STEP: {} entities unlifted (of {}). Lift now — don't wait for the user to ask; semantic search/fetch won't find unlifted entities by intent. Call get_entities_for_lifting(scope=\"*\"), analyze the batch, submit via submit_lift_results, repeat until DONE, then finalize_lifting.", - unlifted, meta.total_entities, + unlifted, total_non_module, )); } Ok(result) @@ -2645,30 +2658,42 @@ impl RpgServer { // Handle sharded workflow if needs_sharding { - let mut session_guard = self.hierarchy_session.write().await; - - // Initialize session if it doesn't exist - if session_guard.is_none() { - let graph_guard = self.graph.read().await; - let graph = graph_guard.as_ref().unwrap(); - - let clusters = rpg_encoder::hierarchy::cluster_files_for_hierarchy(graph, 70); - let total_clusters = clusters.len(); - - *session_guard = Some(HierarchySession { - clusters, - functional_areas: None, - assignments: std::collections::HashMap::new(), - batches_completed: 0, - }); + // Lock order invariant (see RpgServer doc): graph before + // hierarchy_session. Peek the session first WITHOUT holding + // graph, compute clusters under graph if we need to + // initialize, then take hierarchy_session.write() and install + // — with a re-check against racing initializers. + let already_initialized = self.hierarchy_session.read().await.is_some(); + + if !already_initialized { + // Compute clusters under graph.read() (no session held) + let new_clusters = { + let graph_guard = self.graph.read().await; + let graph = graph_guard.as_ref().unwrap(); + rpg_encoder::hierarchy::cluster_files_for_hierarchy(graph, 70) + }; - drop(graph_guard); + // Now install — if another caller raced us, keep theirs. + let mut session_guard = self.hierarchy_session.write().await; + let total_clusters = if session_guard.is_none() { + let count = new_clusters.len(); + *session_guard = Some(HierarchySession { + clusters: new_clusters, + functional_areas: None, + assignments: std::collections::HashMap::new(), + batches_completed: 0, + }); + count + } else { + session_guard.as_ref().unwrap().clusters.len() + }; drop(session_guard); // Return batch 0 (domain discovery) return self.build_batch_0_domain_discovery(total_clusters).await; } + let mut session_guard = self.hierarchy_session.write().await; // Session exists - continue with next batch let session = session_guard.as_mut().unwrap(); let total_batches = session.clusters.len() + 1; // +1 for domain discovery From d0fb97eae0be4326f6400d14154b93e65513b4ad Mon Sep 17 00:00:00 2001 From: userFRM Date: Tue, 14 Apr 2026 17:52:12 +0200 Subject: [PATCH 14/19] fix: close TOCTOU in build_semantic_hierarchy install + rename "lifted" header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TOCTOU: build_semantic_hierarchy's sharded init computed clusters under graph.read(), dropped that lock, then acquired hierarchy_session.write(). In the gap, build_rpg/update_rpg could swap the graph and clear the session; this path would then install clusters derived from the pre-update graph. Fix: hold graph.read() through the hierarchy_session install. Graph-before-session matches the declared lock order, so no new cycle. Header rename: "lifted: X/Y (excludes modules...)" reads as though "X/Y" undercounts, since "entities: N" above uses the module-inclusive total. Renamed to "liftable_entities: X/Y (modules are aggregated from files, not lifted directly)" per Codex's suggestion — keeps the non-module semantics explicit and removes the mental diff against the entities line. Verified with cargo fmt, cargo clippy -p rpg-mcp --all-targets -- -D warnings, cargo test -p rpg-mcp. Co-Authored-By: Claude Opus 4.6 (1M context) --- crates/rpg-mcp/src/tools.rs | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/crates/rpg-mcp/src/tools.rs b/crates/rpg-mcp/src/tools.rs index 1339010..debf9ba 100644 --- a/crates/rpg-mcp/src/tools.rs +++ b/crates/rpg-mcp/src/tools.rs @@ -764,7 +764,7 @@ impl RpgServer { functional_areas: {}\n\ dependency_edges: {}\n\ containment_edges: {}\n\ - lifted: {}/{} (excludes modules, which are aggregated from files)\n\ + liftable_entities: {}/{} (modules are aggregated from files, not lifted directly)\n\ hierarchy: {}", lang_display, meta.total_entities, @@ -2660,20 +2660,22 @@ impl RpgServer { if needs_sharding { // Lock order invariant (see RpgServer doc): graph before // hierarchy_session. Peek the session first WITHOUT holding - // graph, compute clusters under graph if we need to - // initialize, then take hierarchy_session.write() and install - // — with a re-check against racing initializers. + // graph; if we need to initialize, take graph.read() FIRST + // and hold it through the session.write() install so that a + // concurrent build_rpg/update_rpg that clears the session + // can't slip in between us computing clusters and installing + // them (that would persist clusters derived from the + // pre-update graph). let already_initialized = self.hierarchy_session.read().await.is_some(); if !already_initialized { - // Compute clusters under graph.read() (no session held) - let new_clusters = { - let graph_guard = self.graph.read().await; - let graph = graph_guard.as_ref().unwrap(); - rpg_encoder::hierarchy::cluster_files_for_hierarchy(graph, 70) - }; + let graph_guard = self.graph.read().await; + let graph = graph_guard.as_ref().unwrap(); + let new_clusters = rpg_encoder::hierarchy::cluster_files_for_hierarchy(graph, 70); - // Now install — if another caller raced us, keep theirs. + // Acquire hierarchy_session under graph to close the TOCTOU + // window. If another caller raced us and already installed + // a session, keep theirs and drop our speculative clusters. let mut session_guard = self.hierarchy_session.write().await; let total_clusters = if session_guard.is_none() { let count = new_clusters.len(); @@ -2688,6 +2690,7 @@ impl RpgServer { session_guard.as_ref().unwrap().clusters.len() }; drop(session_guard); + drop(graph_guard); // Return batch 0 (domain discovery) return self.build_batch_0_domain_discovery(total_clusters).await; From c51a1a6d7a9dabccdf4a20d53cc188fa08fdfa39 Mon Sep 17 00:00:00 2001 From: userFRM Date: Tue, 14 Apr 2026 17:58:42 +0200 Subject: [PATCH 15/19] fix: session-clear TOCTOU in batch-0 + project-switch config contamination MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two follow-ups from Codex's round 8: Session-clear race (MEDIUM): build_batch_0_domain_discovery re-read self.hierarchy_session after the caller had already decided to emit batch 0. In the gap between install/drop and the re-read, a concurrent build_rpg or update_rpg can clear the session, and the re-read panics on unwrap(). Refactored the helper to take clusters as a parameter — callers snapshot the cluster list while holding the session lock and hand it off, so the helper never re-reads session state it doesn't own. Project-switch config contamination (LOW): set_project_root was calling reload_config_with_warning, whose "keep previous config on parse failure" semantics are correct for same-project reload but wrong for project switch. A project whose .rpg/config.toml was malformed would silently inherit the previous project's encoding/batch settings. Added a dedicated project-switch load path that falls back to RpgConfig::default() on parse failure instead of preserving the old project's config. Verified with cargo fmt, cargo clippy -p rpg-mcp --all-targets -- -D warnings, cargo test -p rpg-mcp. Co-Authored-By: Claude Opus 4.6 (1M context) --- CHANGELOG.md | 13 +++++ crates/rpg-mcp/src/hierarchy_helpers.rs | 16 ++++-- crates/rpg-mcp/src/tools.rs | 71 +++++++++++++++++-------- 3 files changed, 73 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e69dbb3..d1bb27d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/). sharded path acquired `hierarchy_session.write()` first and then `graph.read()`, which formed a deadlock cycle with `update_rpg`'s graph-then-session order under concurrent scheduling. +- `build_batch_0_domain_discovery` now takes clusters as a parameter + instead of re-reading `self.hierarchy_session`. The previous design + left a TOCTOU window where a concurrent `build_rpg`/`update_rpg` + could clear the session between installation and domain-discovery + rendering; the re-read would then panic on `unwrap()`. Callers + snapshot the clusters while holding the session lock and pass them + through. +- `set_project_root` no longer preserves the previous project's + in-memory config when the new project has a malformed + `.rpg/config.toml`. It now falls back to `RpgConfig::default()` on + parse failure — project switch must not silently cross-contaminate + encoding/batch settings. (Same-project `reload_rpg` keeps the + previous config on parse failure, which is correct for that flow.) - `lifting_status` tracks stale-feature drift across calls. A persistent per-server set records entities whose source was modified after they were lifted; the dashboard reports `stale_features: N entities modified diff --git a/crates/rpg-mcp/src/hierarchy_helpers.rs b/crates/rpg-mcp/src/hierarchy_helpers.rs index 4fc775b..f5e64cd 100644 --- a/crates/rpg-mcp/src/hierarchy_helpers.rs +++ b/crates/rpg-mcp/src/hierarchy_helpers.rs @@ -4,15 +4,21 @@ use crate::server::RpgServer; use rpg_core::graph::{EntityKind, normalize_path}; impl RpgServer { - /// Build batch 0: Domain discovery from representative files across all clusters + /// Build batch 0: Domain discovery from representative files across all clusters. + /// + /// Takes `clusters` as a direct parameter rather than re-reading + /// `self.hierarchy_session` inside this function. The caller's already + /// held the session lock when it decided to emit batch 0, so passing + /// the data through avoids a TOCTOU window where a concurrent + /// `build_rpg`/`update_rpg` could clear the session between the + /// caller's decision and our re-read. pub(crate) async fn build_batch_0_domain_discovery( &self, - total_clusters: usize, + clusters: &[rpg_encoder::hierarchy::FileCluster], ) -> Result { + let total_clusters = clusters.len(); let guard = self.graph.read().await; let graph = guard.as_ref().unwrap(); - let session_guard = self.hierarchy_session.read().await; - let session = session_guard.as_ref().unwrap(); let root = self.project_root().await; let repo_name = root @@ -22,7 +28,7 @@ impl RpgServer { // Collect representative files from all clusters let mut representative_features = String::new(); - for cluster in &session.clusters { + for cluster in clusters { for file in &cluster.representatives { // Find Module entity for this file for entity in graph.entities.values() { diff --git a/crates/rpg-mcp/src/tools.rs b/crates/rpg-mcp/src/tools.rs index debf9ba..547c388 100644 --- a/crates/rpg-mcp/src/tools.rs +++ b/crates/rpg-mcp/src/tools.rs @@ -445,11 +445,27 @@ impl RpgServer { // Swap the root *self.project_root_cell.write().await = canonical.clone(); - // Reload the config from the new project — encoding.max_batch_tokens - // and other project-scoped settings must follow the root or tools - // will operate with the previous project's configuration. Logs a - // warning if the new project has a malformed .rpg/config.toml. - Self::reload_config_with_warning(&self.config, &canonical).await; + // Load the NEW project's config. Unlike `reload_rpg` (same-project + // reload, where keeping the previous in-memory config on parse + // error is the right call because the old config was project- + // valid), a project switch must *not* inherit the old project's + // config — that would silently cross-contaminate encoding/batch + // settings across unrelated codebases. So on parse failure we + // fall back to `RpgConfig::default()` and emit a warning, and on + // "file absent" we likewise use defaults. + { + let new_config = match rpg_core::config::RpgConfig::load(&canonical) { + Ok(cfg) => cfg, + Err(e) => { + eprintln!( + "rpg: failed to parse .rpg/config.toml in new project ({}); falling back to defaults for this project", + e + ); + rpg_core::config::RpgConfig::default() + } + }; + *self.config.write().await = new_config; + } // Reset all session + sync state — everything is project-scoped *self.lifting_session.write().await = None; @@ -2676,36 +2692,47 @@ impl RpgServer { // Acquire hierarchy_session under graph to close the TOCTOU // window. If another caller raced us and already installed // a session, keep theirs and drop our speculative clusters. + // Snapshot the winning clusters BEFORE dropping the session + // lock so `build_batch_0_domain_discovery` doesn't have to + // re-read `self.hierarchy_session` — that second read was + // the session-clear race Codex called out. let mut session_guard = self.hierarchy_session.write().await; - let total_clusters = if session_guard.is_none() { - let count = new_clusters.len(); - *session_guard = Some(HierarchySession { - clusters: new_clusters, - functional_areas: None, - assignments: std::collections::HashMap::new(), - batches_completed: 0, - }); - count - } else { - session_guard.as_ref().unwrap().clusters.len() - }; + let clusters_snapshot: Vec = + if session_guard.is_none() { + let snapshot = new_clusters.clone(); + *session_guard = Some(HierarchySession { + clusters: new_clusters, + functional_areas: None, + assignments: std::collections::HashMap::new(), + batches_completed: 0, + }); + snapshot + } else { + session_guard.as_ref().unwrap().clusters.clone() + }; drop(session_guard); drop(graph_guard); - // Return batch 0 (domain discovery) - return self.build_batch_0_domain_discovery(total_clusters).await; + return self + .build_batch_0_domain_discovery(&clusters_snapshot) + .await; } let mut session_guard = self.hierarchy_session.write().await; // Session exists - continue with next batch let session = session_guard.as_mut().unwrap(); let total_batches = session.clusters.len() + 1; // +1 for domain discovery - let clusters_len = session.clusters.len(); if session.batches_completed == 0 { - // Still on batch 0 - waiting for functional areas + // Still on batch 0 — snapshot clusters under the session + // lock and hand them off so build_batch_0 doesn't re-read + // self.hierarchy_session (same race as the init path). + let clusters_snapshot: Vec = + session.clusters.clone(); drop(session_guard); - return self.build_batch_0_domain_discovery(clusters_len).await; + return self + .build_batch_0_domain_discovery(&clusters_snapshot) + .await; } if session.batches_completed > session.clusters.len() { From 26a271593007561c9a34a68819ca9a6e2c7ac82d Mon Sep 17 00:00:00 2001 From: userFRM Date: Tue, 14 Apr 2026 18:04:50 +0200 Subject: [PATCH 16/19] fix: collapse build_semantic_hierarchy init into single write-lock acquisition Final session-clear race closure. The previous fix sampled hierarchy_session with a read lock, branched on "is it initialized?", and then trusted that answer after a later write-lock acquisition. Between the two, a concurrent build_rpg/update_rpg/reload_rpg/ set_project_root could clear the session, and the subsequent session_guard.as_mut().unwrap() on the "already initialized" branch would panic. Fix: collapse both branches into a single session.write() acquisition under graph.read() (still graph-before-session per the declared invariant). Decide init-vs-continue while holding the write lock; pack the work into an Action enum; drop the locks; then render. The session cannot change between the decision and the render because the decision-time snapshot carries everything the render needs. Verified with cargo fmt, cargo clippy -p rpg-mcp --all-targets -- -D warnings, cargo test -p rpg-mcp. Co-Authored-By: Claude Opus 4.6 (1M context) --- crates/rpg-mcp/src/tools.rs | 157 +++++++++++++++++++----------------- 1 file changed, 84 insertions(+), 73 deletions(-) diff --git a/crates/rpg-mcp/src/tools.rs b/crates/rpg-mcp/src/tools.rs index 547c388..40e606f 100644 --- a/crates/rpg-mcp/src/tools.rs +++ b/crates/rpg-mcp/src/tools.rs @@ -2675,85 +2675,96 @@ impl RpgServer { // Handle sharded workflow if needs_sharding { // Lock order invariant (see RpgServer doc): graph before - // hierarchy_session. Peek the session first WITHOUT holding - // graph; if we need to initialize, take graph.read() FIRST - // and hold it through the session.write() install so that a - // concurrent build_rpg/update_rpg that clears the session - // can't slip in between us computing clusters and installing - // them (that would persist clusters derived from the - // pre-update graph). - let already_initialized = self.hierarchy_session.read().await.is_some(); - - if !already_initialized { - let graph_guard = self.graph.read().await; - let graph = graph_guard.as_ref().unwrap(); - let new_clusters = rpg_encoder::hierarchy::cluster_files_for_hierarchy(graph, 70); - - // Acquire hierarchy_session under graph to close the TOCTOU - // window. If another caller raced us and already installed - // a session, keep theirs and drop our speculative clusters. - // Snapshot the winning clusters BEFORE dropping the session - // lock so `build_batch_0_domain_discovery` doesn't have to - // re-read `self.hierarchy_session` — that second read was - // the session-clear race Codex called out. - let mut session_guard = self.hierarchy_session.write().await; - let clusters_snapshot: Vec = - if session_guard.is_none() { - let snapshot = new_clusters.clone(); - *session_guard = Some(HierarchySession { - clusters: new_clusters, - functional_areas: None, - assignments: std::collections::HashMap::new(), - batches_completed: 0, - }); - snapshot - } else { - session_guard.as_ref().unwrap().clusters.clone() - }; - drop(session_guard); - drop(graph_guard); - - return self - .build_batch_0_domain_discovery(&clusters_snapshot) - .await; + // hierarchy_session. A concurrent build_rpg/update_rpg/ + // reload_rpg/set_project_root can clear the session at any + // moment before we hold its write lock, so decide whether to + // initialize only while holding the write lock — never by + // re-trusting an earlier peek. We take graph.read() FIRST + // (ordering) so that if we need to initialize, we can compute + // clusters from a stable graph and install under the + // session.write() that's about to follow. + enum Action { + EmitBatch0(Vec), + EmitBatchN { + batch_idx: usize, + cluster: rpg_encoder::hierarchy::FileCluster, + functional_areas: Vec, + total_batches: usize, + }, + AllDone { + total_batches: usize, + }, } - let mut session_guard = self.hierarchy_session.write().await; - // Session exists - continue with next batch - let session = session_guard.as_mut().unwrap(); - let total_batches = session.clusters.len() + 1; // +1 for domain discovery - - if session.batches_completed == 0 { - // Still on batch 0 — snapshot clusters under the session - // lock and hand them off so build_batch_0 doesn't re-read - // self.hierarchy_session (same race as the init path). - let clusters_snapshot: Vec = - session.clusters.clone(); - drop(session_guard); - return self - .build_batch_0_domain_discovery(&clusters_snapshot) - .await; - } + let graph_guard = self.graph.read().await; + let graph = graph_guard.as_ref().unwrap(); - if session.batches_completed > session.clusters.len() { - // All batches complete - *session_guard = None; - return Ok(format!( - "All {} batches complete. Hierarchy has been applied.", - total_batches - )); - } + let action = { + let mut session_guard = self.hierarchy_session.write().await; - // Return next file assignment batch - let batch_idx = session.batches_completed - 1; // -1 because batch 0 is domain discovery - let cluster = session.clusters[batch_idx].clone(); - let functional_areas = session.functional_areas.clone().unwrap_or_default(); + // Initialize if absent — fresh or cleared-out-from-under-us. + if session_guard.is_none() { + let new_clusters = + rpg_encoder::hierarchy::cluster_files_for_hierarchy(graph, 70); + let snapshot = new_clusters.clone(); + *session_guard = Some(HierarchySession { + clusters: new_clusters, + functional_areas: None, + assignments: std::collections::HashMap::new(), + batches_completed: 0, + }); + Action::EmitBatch0(snapshot) + } else { + let session = session_guard.as_mut().unwrap(); + let total_batches = session.clusters.len() + 1; + + if session.batches_completed == 0 { + Action::EmitBatch0(session.clusters.clone()) + } else if session.batches_completed > session.clusters.len() { + *session_guard = None; + Action::AllDone { total_batches } + } else { + let batch_idx = session.batches_completed - 1; + Action::EmitBatchN { + batch_idx, + cluster: session.clusters[batch_idx].clone(), + functional_areas: session.functional_areas.clone().unwrap_or_default(), + total_batches, + } + } + } + }; - drop(session_guard); + // Release graph lock before the potentially-expensive batch + // rendering — we've snapshotted everything we need. + drop(graph_guard); - return self - .build_cluster_batch(batch_idx + 1, total_batches, &cluster, &functional_areas) - .await; + match action { + Action::EmitBatch0(clusters) => { + return self.build_batch_0_domain_discovery(&clusters).await; + } + Action::AllDone { total_batches } => { + return Ok(format!( + "All {} batches complete. Hierarchy has been applied.", + total_batches + )); + } + Action::EmitBatchN { + batch_idx, + cluster, + functional_areas, + total_batches, + } => { + return self + .build_cluster_batch( + batch_idx + 1, + total_batches, + &cluster, + &functional_areas, + ) + .await; + } + } } // Non-sharded workflow (≤100 files) - original single-shot behavior From 86d455f68bcf5e34964a0df47018fd2fde20533b Mon Sep 17 00:00:00 2001 From: userFRM Date: Tue, 14 Apr 2026 18:10:44 +0200 Subject: [PATCH 17/19] fix: pass graph through hierarchy helpers + prune stale on reload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two more findings from Codex: Graph-replace race in hierarchy helpers (HIGH): build_batch_0_domain_discovery and build_cluster_batch dropped graph_guard before rendering and then re-read self.graph, unwrapping the option. A concurrent set_project_root to a root with no graph could panic; any graph replacement could render against the wrong snapshot. Refactored both helpers to take `&RPGraph` as a parameter; the caller keeps graph_guard alive across the render so the helper's reference stays valid and there's no second read. reload_rpg wiped stale-feature drift (MEDIUM): the documented CLI / isolated-subagent flow only re-lifts entities with no features — stale entities (features present but outdated) survive. Clearing stale_entity_ids on every successful reload erased that backlog and let lifting_status falsely report 100% coverage. Now we prune by entity existence in the newly-loaded graph instead of wholesale clearing. Verified with cargo fmt, cargo clippy -p rpg-mcp --all-targets -- -D warnings, cargo test -p rpg-mcp. Co-Authored-By: Claude Opus 4.6 (1M context) --- CHANGELOG.md | 23 ++++++++++++------ crates/rpg-mcp/src/hierarchy_helpers.rs | 32 +++++++++++++++---------- crates/rpg-mcp/src/tools.rs | 30 ++++++++++++++--------- 3 files changed, 54 insertions(+), 31 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d1bb27d..002d09d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,13 +21,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/). sharded path acquired `hierarchy_session.write()` first and then `graph.read()`, which formed a deadlock cycle with `update_rpg`'s graph-then-session order under concurrent scheduling. -- `build_batch_0_domain_discovery` now takes clusters as a parameter - instead of re-reading `self.hierarchy_session`. The previous design - left a TOCTOU window where a concurrent `build_rpg`/`update_rpg` - could clear the session between installation and domain-discovery - rendering; the re-read would then panic on `unwrap()`. Callers - snapshot the clusters while holding the session lock and pass them - through. +- `build_batch_0_domain_discovery` and `build_cluster_batch` now take + `&RPGraph` and (where applicable) clusters as parameters instead of + re-reading `self.graph` / `self.hierarchy_session`. The previous + design left two TOCTOU windows: a session-clear race where a + concurrent `build_rpg`/`update_rpg` could panic the helper on + `session_guard.as_mut().unwrap()`, and a graph-replace race where a + concurrent `set_project_root` could panic on `graph.as_ref().unwrap()`. + Callers snapshot everything they need under the appropriate locks and + pass references through; the helpers no longer touch server state + beyond the project root. +- `reload_rpg` now prunes the stale-tracking set against the newly + loaded graph instead of clearing it wholesale. The CLI / isolated + sub-agent re-lift flow only refreshes entities with no features — + stale entities (features present but outdated) survive it, so + clearing the set would let `lifting_status` report 100% coverage + while re-lift work remained. - `set_project_root` no longer preserves the previous project's in-memory config when the new project has a malformed `.rpg/config.toml`. It now falls back to `RpgConfig::default()` on diff --git a/crates/rpg-mcp/src/hierarchy_helpers.rs b/crates/rpg-mcp/src/hierarchy_helpers.rs index f5e64cd..5f0ee31 100644 --- a/crates/rpg-mcp/src/hierarchy_helpers.rs +++ b/crates/rpg-mcp/src/hierarchy_helpers.rs @@ -1,24 +1,26 @@ //! Helper functions for sharded hierarchy construction workflow. use crate::server::RpgServer; -use rpg_core::graph::{EntityKind, normalize_path}; +use rpg_core::graph::{EntityKind, RPGraph, normalize_path}; impl RpgServer { /// Build batch 0: Domain discovery from representative files across all clusters. /// - /// Takes `clusters` as a direct parameter rather than re-reading - /// `self.hierarchy_session` inside this function. The caller's already - /// held the session lock when it decided to emit batch 0, so passing - /// the data through avoids a TOCTOU window where a concurrent - /// `build_rpg`/`update_rpg` could clear the session between the - /// caller's decision and our re-read. + /// Takes `graph` and `clusters` as parameters rather than re-reading + /// `self.graph` / `self.hierarchy_session` inside this function. The + /// caller holds the relevant locks at decision time and passes + /// snapshots through; that closes two races: + /// 1. session-clear: a concurrent build_rpg/update_rpg could clear + /// hierarchy_session between the caller's decision and our re-read. + /// 2. graph-replace: a concurrent set_project_root could swap + /// self.graph to `None` between the caller's decision and our + /// re-read, panicking on `unwrap()`. pub(crate) async fn build_batch_0_domain_discovery( &self, + graph: &RPGraph, clusters: &[rpg_encoder::hierarchy::FileCluster], ) -> Result { let total_clusters = clusters.len(); - let guard = self.graph.read().await; - let graph = guard.as_ref().unwrap(); let root = self.project_root().await; let repo_name = root @@ -85,17 +87,21 @@ impl RpgServer { Ok(output) } - /// Build file assignment batch for a specific cluster + /// Build file assignment batch for a specific cluster. + /// + /// Takes `graph` as a parameter for the same reasons as + /// `build_batch_0_domain_discovery` — the caller holds graph.read() + /// at decision time and passes the reference through so a concurrent + /// `set_project_root` can't swap `self.graph` to `None` and panic us + /// during rendering. pub(crate) async fn build_cluster_batch( &self, + graph: &RPGraph, batch_num: usize, total_batches: usize, cluster: &rpg_encoder::hierarchy::FileCluster, functional_areas: &[String], ) -> Result { - let guard = self.graph.read().await; - let graph = guard.as_ref().unwrap(); - let root = self.project_root().await; let repo_name = root .file_name() diff --git a/crates/rpg-mcp/src/tools.rs b/crates/rpg-mcp/src/tools.rs index 40e606f..b77b834 100644 --- a/crates/rpg-mcp/src/tools.rs +++ b/crates/rpg-mcp/src/tools.rs @@ -2208,12 +2208,19 @@ impl RpgServer { match storage::load(&project_root) { Ok(g) => { let entities = g.metadata.total_entities; - // Reset drift tracking only on success — the on-disk graph - // might already reflect submissions from an external CLI - // run, so the in-memory stale set no longer applies. On - // failure we keep the previous graph *and* its drift backlog - // so a transient read error doesn't silently erase state. - *self.stale_entity_ids.write().await = std::collections::HashSet::new(); + // Prune (don't clear) the drift-tracking set against the + // newly-loaded graph. Wholesale clearing was wrong because + // the documented CLI / isolated-subagent flow only re-lifts + // entities with no features — stale entities (features + // present but outdated) survive that flow, so clearing + // would let lifting_status report "100% coverage" even + // though re-lift work remains. Pruning by entity-existence + // keeps that backlog visible while dropping IDs that were + // removed in the new graph. + { + let mut stale = self.stale_entity_ids.write().await; + stale.retain(|id| g.entities.contains_key(id)); + } *self.graph.write().await = Some(g); // Sync embedding index incrementally #[cfg(feature = "embeddings")] @@ -2735,13 +2742,13 @@ impl RpgServer { } }; - // Release graph lock before the potentially-expensive batch - // rendering — we've snapshotted everything we need. - drop(graph_guard); - + // Keep `graph_guard` held across rendering. Both helpers now + // take `&RPGraph` so they don't re-read `self.graph`, which + // would otherwise expose us to a concurrent `set_project_root` + // that could swap the graph to `None` mid-render. match action { Action::EmitBatch0(clusters) => { - return self.build_batch_0_domain_discovery(&clusters).await; + return self.build_batch_0_domain_discovery(graph, &clusters).await; } Action::AllDone { total_batches } => { return Ok(format!( @@ -2757,6 +2764,7 @@ impl RpgServer { } => { return self .build_cluster_batch( + graph, batch_idx + 1, total_batches, &cluster, From 4d76e87dce273f45bb1195ea80c35e75c64286dd Mon Sep 17 00:00:00 2001 From: userFRM Date: Tue, 14 Apr 2026 20:19:42 +0200 Subject: [PATCH 18/19] fix: independent-audit findings (startup stale loss + auto_lift drain + 4 polish) Two MEDIUM bugs found by an independent reviewer that survived all prior review rounds: Startup stale-tracking loss (MEDIUM, main.rs): the startup auto-update discarded summary.modified_entity_ids. Modifications between the last lift and a session restart silently dropped off lifting_status, even though the same modifications would have been tracked correctly if they happened mid-session via auto_sync_if_stale or update_rpg. Now the startup path feeds the IDs into stale_entity_ids and prunes against the current graph. Also seeds last_auto_sync_changeset with the real empty- workdir hash so the first tool call short-circuits cleanly. auto_lift stale leak (MEDIUM, tools.rs): when invoked with a non-`*` scope, the lift pipeline freshens features for every in-scope entity including ones that were previously stale. The default `*` scope is safe because resolve_scope filters to feature-empty entities, but explicit scopes bypass that filter. Without a drain, lifting_status would keep reporting those entities as stale forever. Now we snapshot features before the pipeline, diff after, and drain stale entries for any ID whose features changed. Plus four LOW polish items from the same review: - set_project_root tool description was Claude-Code-specific in its example. Reads runtime-neutral now. - get_entities_for_lifting batch-0 NOTE referenced "batches 2..N", off-by-one against the 0-based batch_index parameter it accepts. Reads "do not request further batches in this context". - auto_sync_if_stale reordered inner writes to match the declared lock rank (stale=3 before auto-sync markers=5). Functionally equivalent (each write is statement-per-lock) but matches the invariant doc as a clean exemplar. - build_rpg now prunes stale_entity_ids against the new graph so dead IDs don't accumulate across rebuilds. Verified with cargo fmt, cargo clippy -p rpg-mcp --all-targets -- -D warnings, cargo test -p rpg-mcp. Co-Authored-By: Claude Opus 4.6 (1M context) --- CHANGELOG.md | 24 ++++++++++++++++ crates/rpg-mcp/src/main.rs | 27 ++++++++++++++++-- crates/rpg-mcp/src/server.rs | 14 +++++++--- crates/rpg-mcp/src/tools.rs | 53 ++++++++++++++++++++++++++++++++++-- 4 files changed, 110 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 002d09d..02bc60c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,30 @@ and this project adheres to [Semantic Versioning](https://semver.org/). stale entities (features present but outdated) survive it, so clearing the set would let `lifting_status` report 100% coverage while re-lift work remained. +- Server startup auto-update now feeds `summary.modified_entity_ids` + into the stale-tracking set. Previously the result was discarded: + modifications between the last lift and a session restart silently + dropped off the dashboard, even though the same modifications would + have been tracked correctly if they happened mid-session via + `auto_sync_if_stale` or `update_rpg`. +- `auto_lift` now drains stale entries for the entities its pipeline + re-lifted. The non-`*` scope path freshens features for every + in-scope entity (including ones that were stale), so stale entries + for those IDs become invalid after the call. Without the drain, + `lifting_status` would keep counting them. +- `build_rpg` now prunes the stale-tracking set against the newly built + graph so dead entity IDs don't accumulate across rebuilds. +- `set_project_root` tool description is no longer Claude-Code-specific + in its example. Reads as `MCP server launched in your home directory + but you want to work on ~/myproject` — applies to every runtime. +- `get_entities_for_lifting` batch-0 dispatch NOTE no longer references + "batches 2..N" (off-by-one against the 0-based `batch_index` parameter + it actually takes). Reads as "do not request further batches in this + context". +- `auto_sync_if_stale` reorders its inner writes to follow the canonical + lock rank (stale before auto-sync markers). Functionally equivalent + because each write is statement-per-lock, but matches the declared + invariant so the file reads as a clean exemplar. - `set_project_root` no longer preserves the previous project's in-memory config when the new project has a malformed `.rpg/config.toml`. It now falls back to `RpgConfig::default()` on diff --git a/crates/rpg-mcp/src/main.rs b/crates/rpg-mcp/src/main.rs index cf98eff..abd1a7b 100644 --- a/crates/rpg-mcp/src/main.rs +++ b/crates/rpg-mcp/src/main.rs @@ -93,6 +93,21 @@ async fn main() -> Result<()> { Ok(s) => { graph.metadata.paradigms = paradigm_names; let _ = storage::save(&server.project_root().await, graph); + // Persist stale entity IDs from the startup sync so + // lifting_status sees them on the first query. Every + // other path that produces a summary feeds + // `modified_entity_ids` into `stale_entity_ids` + // (`auto_sync_if_stale`, `update_rpg`). The startup + // path is the one exception — without this, modified + // entities from between the last lift and this startup + // are silently dropped across the session boundary. + { + let mut stale = server.stale_entity_ids.write().await; + for id in &s.modified_entity_ids { + stale.insert(id.clone()); + } + stale.retain(|id| graph.entities.contains_key(id)); + } eprintln!( " Auto-update complete: +{} -{} ~{}", s.entities_added, s.entities_removed, s.entities_modified @@ -103,9 +118,17 @@ async fn main() -> Result<()> { } else { eprintln!(" Graph is up to date."); } - // Seed auto-sync HEAD so the first query doesn't redundantly re-sync + // Seed auto-sync markers for a clean post-startup workdir so + // the first query short-circuits at server.rs's (HEAD, + // changeset) match instead of redundantly re-running the + // workdir diff. Must use the real empty-workdir changeset + // hash (not an empty string) for the match to fire. + let project_root = server.project_root().await; *server.last_auto_sync_head.write().await = - rpg_encoder::evolution::get_head_sha(&server.project_root().await).ok(); + rpg_encoder::evolution::get_head_sha(&project_root).ok(); + *server.last_auto_sync_changeset.write().await = + Some(RpgServer::compute_changeset_hash(&[], &project_root)); + *server.last_auto_sync_workdir_paths.write().await = std::collections::HashSet::new(); } } diff --git a/crates/rpg-mcp/src/server.rs b/crates/rpg-mcp/src/server.rs index c2cf4dc..fac65c3 100644 --- a/crates/rpg-mcp/src/server.rs +++ b/crates/rpg-mcp/src/server.rs @@ -322,15 +322,18 @@ impl RpgServer { Ok(summary) => { graph.metadata.paradigms = paradigm_names; let _ = storage::save(&project_root, graph); - *self.last_auto_sync_head.write().await = Some(current_head); - *self.last_auto_sync_changeset.write().await = Some(current_changeset); - *self.last_auto_sync_workdir_paths.write().await = current_paths; // Persist stale entity IDs so lifting_status can surface // stale-feature drift in subsequent calls. These entities // still count as "lifted" by coverage(), so without this // set, lifting_status would report "100% coverage" while // search_node returns outdated features. + // + // Ordered before the last_auto_sync_* writes to follow the + // canonical lock rank declared on RpgServer (stale=3 before + // auto-sync markers=5). Each statement releases its write + // lock before the next is acquired, so order would be moot, + // but matching rank keeps the file readable as an exemplar. { let mut stale = self.stale_entity_ids.write().await; for id in &summary.modified_entity_ids { @@ -339,6 +342,9 @@ impl RpgServer { // Prune entries for entities that no longer exist stale.retain(|id| graph.entities.contains_key(id)); } + *self.last_auto_sync_head.write().await = Some(current_head); + *self.last_auto_sync_changeset.write().await = Some(current_changeset); + *self.last_auto_sync_workdir_paths.write().await = current_paths; if summary.entities_added == 0 && summary.entities_modified == 0 @@ -428,7 +434,7 @@ impl RpgServer { /// added/modified/renamed file. Deleted files hash their path only. /// Same changeset + same stat = same hash = no re-sync. Second save of the /// same file changes mtime → different hash → re-sync fires. - fn compute_changeset_hash( + pub(crate) fn compute_changeset_hash( changes: &[rpg_encoder::evolution::FileChange], project_root: &std::path::Path, ) -> String { diff --git a/crates/rpg-mcp/src/tools.rs b/crates/rpg-mcp/src/tools.rs index b77b834..edf7b86 100644 --- a/crates/rpg-mcp/src/tools.rs +++ b/crates/rpg-mcp/src/tools.rs @@ -407,7 +407,7 @@ impl RpgServer { } #[tool( - description = "Switch the active project root for this session. Use this when you started the session from one directory but want RPG to operate on a different project — for example, launched Claude Code from your home directory but want to work on ~/myproject. The server loads the graph from the new directory's .rpg/graph.json if present, resets all session state (lifting sessions, auto-sync markers, pending routing), and points every subsequent tool call at the new root. Path is tilde-expanded and canonicalized.", + description = "Switch the active project root for this session. Use this when the server was started from one directory but you want RPG to operate on a different project — e.g. the MCP server launched in your home directory but you want to work on ~/myproject. The server loads the graph from the new directory's .rpg/graph.json if present, resets all session state (lifting sessions, auto-sync markers, pending routing), and points every subsequent tool call at the new root. Path is tilde-expanded and canonicalized.", annotations( destructive_hint = false, idempotent_hint = true, @@ -753,6 +753,19 @@ impl RpgServer { self.pending_routing.write().await.clear(); clear_pending_routing(&self.project_root().await); + // Prune the drift-tracking set against the new graph. build_rpg + // preserves features for entities whose IDs survive the rebuild, + // so a stale entity that still exists is correctly still stale. + // But IDs removed in the rebuild should drop out of the set so it + // doesn't accumulate dead references over many rebuilds. + { + let graph_guard = self.graph.read().await; + if let Some(ref g) = *graph_guard { + let mut stale = self.stale_entity_ids.write().await; + stale.retain(|id| g.entities.contains_key(id)); + } + } + let lang_display = if languages.len() == 1 { languages[0].name().to_string() } else { @@ -945,6 +958,19 @@ impl RpgServer { let project_root = self.project_root().await.clone(); let scope_owned = scope.to_string(); + // Snapshot pre-lift fingerprints so we can drain re-lifted IDs + // from `stale_entity_ids` afterwards. For a non-`*` scope the + // pipeline freshens features for every in-scope entity (ignoring + // the "already has features" gate that resolve_scope=`*` enforces), + // so any in-scope stale entity is no longer stale once the + // pipeline returns. Without this drain, lifting_status would + // keep reporting the count. + let pre_lift_features: std::collections::HashMap> = graph + .entities + .iter() + .map(|(id, e)| (id.clone(), e.semantic_features.clone())) + .collect(); + // Run the blocking pipeline on the current thread (tells tokio we're blocking). // This is safe because MCP stdio is serial — no concurrent requests. let report = tokio::task::block_in_place(|| { @@ -962,8 +988,31 @@ impl RpgServer { }) .map_err(|e| format!("Lift failed: {}", e))?; + // Drain stale entries for entities the pipeline freshened + // (features changed or transitioned from empty → present). + // Done while still holding the graph write lock so the + // before/after snapshot is consistent. + let refreshed_ids: Vec = graph + .entities + .iter() + .filter_map(|(id, e)| { + let prev = pre_lift_features.get(id); + let changed = match prev { + None => !e.semantic_features.is_empty(), + Some(p) => p != &e.semantic_features, + }; + if changed { Some(id.clone()) } else { None } + }) + .collect(); drop(guard); + if !refreshed_ids.is_empty() { + let mut stale = self.stale_entity_ids.write().await; + for id in &refreshed_ids { + stale.remove(id); + } + } + // Clear sessions — entity list changed *self.lifting_session.write().await = None; *self.hierarchy_session.write().await = None; @@ -1288,7 +1337,7 @@ impl RpgServer { let batch_tokens = self.config.read().await.encoding.max_batch_tokens; let approx_total_k = (total_batches * batch_tokens).div_ceil(1000); output.push_str(&format!( - "\nNOTE: {} batches queued (~{}K tokens of source total). If your runtime supports sub-agent dispatch or a cheaper model, stop here — do not request batches 2..N — and invoke `lifting_status` for the delegation pattern. Continue the sequential loop only if no dispatch is available.\n\n", + "\nNOTE: {} batches queued (~{}K tokens of source total). If your runtime supports sub-agent dispatch or a cheaper model, stop here — do not request further batches in this context — and invoke `lifting_status` for the delegation pattern. Continue the sequential loop only if no dispatch is available.\n\n", total_batches, approx_total_k, )); } From 816b678f92f91b7a567f31c38b50d2ec5ddbe97c Mon Sep 17 00:00:00 2001 From: userFRM Date: Tue, 14 Apr 2026 20:39:50 +0200 Subject: [PATCH 19/19] polish: close remaining LOW findings from second independent audit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - auto_lift drain now pulls the in-scope entity IDs from resolve_scope and drains them unconditionally, instead of diffing pre/post features. The diff-gated approach missed the identical-features edge case (a cosmetic edit re-lifts to the same output and stale would persist). - Misleading comment in auto_lift claiming "MCP stdio is serial — no concurrent requests" was factually wrong about rmcp 0.14.0 (which does tokio::spawn per request). Corrected to cite the actual synchronization primitives: the `lift_in_progress` atomic and the graph write lock held across the pipeline. - Dropped the "clean exemplar" rationale from the auto_sync_if_stale lock-order comment. The ordering change still holds, but the claim that it exemplifies canonical order was inconsistent — `update_rpg` and `set_project_root` use different inner-lock orders and the doc's explicit escape hatch (statement-per-lock) covers all three. - Rewrote CHANGELOG 0.8.3 buckets. The Added bucket had swallowed many items that are Fixed (prior bugs) or Changed (behavioral modifications to existing features). Re-sorted: new capabilities in Added, behavioral changes in Changed, bug fixes in Fixed. The Keep-a-Changelog semantics were violated by treating every new bullet as Added. Verified with cargo fmt, cargo clippy -p rpg-mcp --all-targets -- -D warnings, cargo test -p rpg-mcp. Co-Authored-By: Claude Opus 4.6 (1M context) --- CHANGELOG.md | 274 +++++++++++++++++------------------ crates/rpg-mcp/src/server.rs | 9 +- crates/rpg-mcp/src/tools.rs | 59 ++++---- 3 files changed, 164 insertions(+), 178 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 02bc60c..178a222 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,184 +9,180 @@ and this project adheres to [Semantic Versioning](https://semver.org/). ### Added -- `build_rpg` response now emits an action-oriented NEXT STEP directing the - agent to lift the graph immediately — small scope lifts in the current - context, large scope dispatches a sub-agent. Previously a passive - "Tip: use get_entities_for_lifting" hint that agents routinely skipped. -- Canonical lock-order invariant documented on the `RpgServer` struct so - reviewers don't have to re-derive it from scattered call sites. -- `build_semantic_hierarchy` sharded init now takes `graph.read()` to - compute clusters *before* acquiring `hierarchy_session.write()`, - respecting the declared graph-before-session order. Previously the - sharded path acquired `hierarchy_session.write()` first and then - `graph.read()`, which formed a deadlock cycle with `update_rpg`'s - graph-then-session order under concurrent scheduling. -- `build_batch_0_domain_discovery` and `build_cluster_batch` now take - `&RPGraph` and (where applicable) clusters as parameters instead of - re-reading `self.graph` / `self.hierarchy_session`. The previous - design left two TOCTOU windows: a session-clear race where a - concurrent `build_rpg`/`update_rpg` could panic the helper on - `session_guard.as_mut().unwrap()`, and a graph-replace race where a - concurrent `set_project_root` could panic on `graph.as_ref().unwrap()`. - Callers snapshot everything they need under the appropriate locks and - pass references through; the helpers no longer touch server state - beyond the project root. -- `reload_rpg` now prunes the stale-tracking set against the newly - loaded graph instead of clearing it wholesale. The CLI / isolated - sub-agent re-lift flow only refreshes entities with no features — - stale entities (features present but outdated) survive it, so - clearing the set would let `lifting_status` report 100% coverage - while re-lift work remained. -- Server startup auto-update now feeds `summary.modified_entity_ids` - into the stale-tracking set. Previously the result was discarded: - modifications between the last lift and a session restart silently - dropped off the dashboard, even though the same modifications would - have been tracked correctly if they happened mid-session via - `auto_sync_if_stale` or `update_rpg`. -- `auto_lift` now drains stale entries for the entities its pipeline - re-lifted. The non-`*` scope path freshens features for every - in-scope entity (including ones that were stale), so stale entries - for those IDs become invalid after the call. Without the drain, - `lifting_status` would keep counting them. -- `build_rpg` now prunes the stale-tracking set against the newly built - graph so dead entity IDs don't accumulate across rebuilds. -- `set_project_root` tool description is no longer Claude-Code-specific - in its example. Reads as `MCP server launched in your home directory - but you want to work on ~/myproject` — applies to every runtime. -- `get_entities_for_lifting` batch-0 dispatch NOTE no longer references - "batches 2..N" (off-by-one against the 0-based `batch_index` parameter - it actually takes). Reads as "do not request further batches in this - context". -- `auto_sync_if_stale` reorders its inner writes to follow the canonical - lock rank (stale before auto-sync markers). Functionally equivalent - because each write is statement-per-lock, but matches the declared - invariant so the file reads as a clean exemplar. -- `set_project_root` no longer preserves the previous project's - in-memory config when the new project has a malformed - `.rpg/config.toml`. It now falls back to `RpgConfig::default()` on - parse failure — project switch must not silently cross-contaminate - encoding/batch settings. (Same-project `reload_rpg` keeps the - previous config on parse failure, which is correct for that flow.) -- `lifting_status` tracks stale-feature drift across calls. A persistent - per-server set records entities whose source was modified after they - were lifted; the dashboard reports `stale_features: N entities modified - since last lift` and the NEXT STEP state machine prompts re-lift even - when coverage is 100%. +- `lifting_status` now tracks stale-feature drift across calls. A + persistent per-server set records entities whose source was modified + after they were lifted; the dashboard reports `stale_features: N + entities modified since last lift` and the NEXT STEP state machine + prompts re-lift even when coverage is 100%. - `get_entities_for_lifting(scope="*")` now returns stale entities alongside unlifted ones, so a single call covers both "never lifted" - and "lifted-but-outdated" work. Stale entities bypass the auto-lift - skip check and flow through the normal LLM batch loop. + and "lifted-but-outdated" work. - `lifting_status` emits a sub-agent dispatch recommendation when the remaining work (unlifted + stale) is ≥100 entities. The response contains `LOOP` / `DISPATCH` / `FALLBACK` blocks so callers delegate - directly without first loading a batch of source into their own context. -- `get_entities_for_lifting` batch-0 emits a one-line dispatch hint when - ≥10 token-aware batches are queued, pointing back to `lifting_status` - for the full recommendation. -- `submit_lift_results` NEXT action becomes scale-aware — when remaining - work ≥100 entities, it redirects the caller to `lifting_status` instead - of encouraging another foreground batch. -- Auto-sync notice now prescribes a verb. It distinguishes per-update - delta from pre-existing backlog and separates new-entity drift from - stale-feature drift, so an agent that commits code and sees the notice - is told to re-lift rather than informed of a count. -- `server_instructions.md`: new `USE RPG FIRST` top section with a + directly without first loading a batch of source into their own + context. +- `get_entities_for_lifting` batch-0 emits a one-line dispatch hint + when ≥10 token-aware batches are queued, pointing back to + `lifting_status` for the full recommendation. +- `submit_lift_results` NEXT action is now scale-aware — when remaining + work ≥100 entities, it redirects the caller to `lifting_status` + instead of encouraging another foreground batch. +- `build_rpg` response now emits an action-oriented NEXT STEP directing + the agent to lift immediately. Small scope lifts inline; large scope + dispatches a sub-agent with the LOOP pattern embedded. +- New `USE RPG FIRST` top section in `server_instructions.md` with a mapping table from shell patterns (`grep -r`, `cat`, `find`, `wc -l`, chained greps) to the RPG tool that replaces them. -- `server_instructions.md`: new `DRIFT MAINTENANCE` section explaining - the three auto-sync notice variants and framing re-lift as part of - definition-of-done for any task that wrote code. +- New `DRIFT MAINTENANCE` section in `server_instructions.md` + explaining the three auto-sync notice variants and framing re-lift + as part of definition-of-done for any task that wrote code. - Tool descriptions for `search_node`, `fetch_node`, `explore_rpg`, `rpg_info`, `semantic_snapshot`, `context_pack`, `impact_radius`, `plan_change`, `analyze_health`, `detect_cycles`, `find_paths`, and `slice_between` now open with a `PREFER THIS OVER ...` marker naming the shell command or workflow they replace. -- `.claude/skills/rpg/SKILL.md` and `README.md` carry the same RPG-first - mapping table as the server prompt. -- `reload_config_with_warning` helper (`server.rs`) distinguishes missing - `.rpg/config.toml` (silent default) from malformed TOML (stderr warning, - keeps previous in-memory config). -- Crate-visible `LARGE_SCOPE_ENTITIES` (100) and `LARGE_SCOPE_BATCHES` (10) - constants replace duplicated magic numbers. Doc comments describe the +- `.claude/skills/rpg/SKILL.md` and `README.md` carry the same + RPG-first mapping table as the server prompt. +- Crate-visible `LARGE_SCOPE_ENTITIES` (100) and `LARGE_SCOPE_BATCHES` + (10) constants replace duplicated magic numbers across + `server.rs`/`tools.rs` with doc comments describing the heuristic-vs-authoritative relationship. +- Canonical lock-order invariant documented on the `RpgServer` struct + so reviewers don't have to re-derive it from scattered call sites. +- `reload_config_with_warning` helper on `RpgServer` that distinguishes + missing `.rpg/config.toml` (silent default) from malformed TOML + (stderr warning, keeps previous in-memory config). ### Changed - `lifting_status` NEXT STEP is runtime-neutral. No specific runtime's - dispatch syntax appears in the response; callers use whatever sub-agent - or cheaper-model mechanism their runtime exposes. Explicit fallbacks: - scoped lifting for callers with no delegation mechanism, and - `rpg-encoder lift --provider anthropic|openai` for callers with an API - key and no sub-agent support. + dispatch syntax appears in the response; callers use whatever + sub-agent or cheaper-model mechanism their runtime exposes. Explicit + fallbacks: scoped lifting for callers with no delegation mechanism, + and `rpg-encoder lift --provider anthropic|openai` for callers with + an API key and no sub-agent support. - Batch-size estimates in NEXT STEP messages read from the live `encoding.max_batch_tokens` config instead of a hard-coded `~12K` figure, so the estimate scales when the user overrides the budget. - `NEXT STEP:` remains a single parseable line; dispatch detail is emitted in labeled blocks immediately below. -- `server_instructions.md` LIFTING FLOW sub-section rewritten and - shortened. -- `get_routing_candidates` response header no longer includes the graph - revision hash — it moved to the NEXT_ACTION block at the bottom. Keeps - the stable preamble (instructions + entity table) cache-eligible while - still surfacing the revision where the agent needs to read it back. -- `update_rpg` now feeds `summary.modified_entity_ids` into the - stale-tracking set so its `needs_relift: N` reply aligns with what - `lifting_status` and `get_entities_for_lifting(scope="*")` report. -- `reload_rpg` clears the stale-tracking set only after the graph reload - succeeds, so a transient read error no longer wipes the drift backlog - while leaving the previous graph in memory. -- `set_project_root` resets the stale-tracking set as part of the - project-scoped state reset. +- Auto-sync notice now prescribes a verb: it distinguishes per-update + delta from pre-existing backlog and separates new-entity drift from + stale-feature drift, so an agent that commits code and sees the + notice is told to re-lift rather than informed of a count. - CLI fallback in large-scope guidance is gated to cases with actual unlifted work, with a note that `rpg-encoder lift` does not re-lift stale entities (it filters to entities with no features). +- `get_routing_candidates` response header no longer includes the + graph revision hash — it moved to the NEXT_ACTION block at the + bottom. Keeps the stable preamble (instructions + entity table) + cache-eligible while still surfacing the revision where the agent + needs to read it back. +- `server_instructions.md` LIFTING FLOW sub-section rewritten and + shortened. +- `set_project_root` on project switch now loads the new project's + `.rpg/config.toml` independently; on parse failure it falls back to + `RpgConfig::default()` rather than preserving the previous project's + config. Same-project `reload_rpg` preserves the previous config on + parse failure (different flow, different correctness requirement). ### Fixed - `lifting_status` previously reported `Graph is complete` as soon as - every entity had some features, ignoring stale features from modified - sources. The state machine now considers `remaining + stale_features` - combined. -- `get_entities_for_lifting(scope="*")` previously returned zero entities - when all drift was stale (features present, sources modified) because - `resolve_scope` filters to entities with no features. + every entity had some features, ignoring stale features from + modified sources. The state machine now considers + `remaining + stale_features` combined. +- `get_entities_for_lifting(scope="*")` previously returned zero + entities when all drift was stale (features present, sources + modified) because `resolve_scope` filters to entities with no + features. It now augments the resolved scope with tracked stale + entities and routes them through the LLM loop. - Auto-sync notice previously conflated per-update delta with global backlog, so a one-line edit on a partially-lifted repo could claim "50 new entities unlifted" when only 1 was actually new. -- `finalize_lifting` fallback guidance previously said to call it after - each scoped subtree. That auto-routes pending entities against - incomplete signals and locks the hierarchy early. Guidance now says - to call `finalize_lifting` once after all scopes complete. -- `rpg-encoder lift --provider ...` (CLI fallback) left the MCP server - on a stale in-memory graph. All docs that mention the CLI fallback - now specify that the caller must call `reload_rpg` afterward. +- `finalize_lifting` fallback guidance previously said to call it + after each scoped subtree. That auto-routes pending entities + against incomplete signals and locks the hierarchy early. Guidance + now says to call `finalize_lifting` once after all scopes complete. +- `rpg-encoder lift --provider ...` (CLI fallback) left the MCP + server on a stale in-memory graph. All docs that mention the CLI + fallback now specify that the caller must call `reload_rpg` + afterward. - `set_project_root` and `reload_rpg` previously used - `unwrap_or_default()` on config loads, collapsing missing-config and - malformed-TOML into identical silent behavior. Malformed TOML now - surfaces a stderr warning and leaves the in-memory config untouched. -- `set_project_root` failed to refresh `self.config` on project switch; - the server kept serving the previous project's encoding settings. + `unwrap_or_default()` on config loads, collapsing missing-config + and malformed-TOML into identical silent behavior. +- `set_project_root` failed to refresh `self.config` on project + switch; the server kept serving the previous project's encoding + settings. - `lifting_status` large-scope recommendation previously ran off raw unlifted count, before auto-lift had reduced the set. On repos full of trivial entities (getters, setters, constructors) it could recommend delegation for ~0 LLM calls. The large-scope branch now - hedges — it signals likely-large and defers the authoritative check - to the post-auto-lift batch count in `get_entities_for_lifting`. -- `rpg_info` error wording ("No RPG found") was miscited as a friendly - status string; corrected to "any RPG tool returns 'No RPG found'". -- `build_rpg` NEXT STEP previously counted `Module` entities in its - "unlifted" total, while `lifting_status` and `get_entities_for_lifting` - exclude them. The two could disagree by hundreds of entities on large - codebases, tripping the delegation threshold in `build_rpg` when - `lifting_status` would still recommend foreground lifting. Both paths - now use `lifting_coverage()` (non-module) for the count. + signals likely-large and defers the authoritative check to the + post-auto-lift batch count in `get_entities_for_lifting`. +- `rpg_info` error wording ("No RPG found") was miscited as a + friendly status string; corrected to "any RPG tool returns 'No RPG + found'". +- `build_rpg` NEXT STEP and `lifted: X/Y` header previously counted + `Module` entities against the unlifted total, while + `lifting_status` and `get_entities_for_lifting` exclude them. The + two could disagree by hundreds of entities on large codebases, + tripping the delegation threshold in `build_rpg` when + `lifting_status` would still recommend foreground lifting. Both + paths now use `lifting_coverage()` (non-module) for the count, and + the header reads `liftable_entities: X/Y`. - `submit_lift_results` previously emitted `DONE` as soon as coverage reached 100%, which could terminate a stale-only re-lift loop after - batch 1 while later batches were still queued. The NEXT/DONE branch - now counts unlifted + stale remaining. -- Auto-lifted features for entities that were previously flagged stale - now drain the stale-tracking set, so the count doesn't inflate when - the auto-lifter writes fresh features directly. + batch 1 while later batches were still queued. The NEXT/DONE + branch now counts unlifted + stale remaining. +- `update_rpg` now feeds `summary.modified_entity_ids` into the + stale-tracking set so its `needs_relift: N` reply aligns with what + `lifting_status` and `get_entities_for_lifting(scope="*")` report. +- Server startup auto-update now feeds `modified_entity_ids` into + the stale-tracking set and seeds the auto-sync changeset hash for a + clean workdir. Previously modifications between the last lift and + a session restart silently dropped off the dashboard. +- `auto_lift` on a non-`*` scope now drains stale entries for every + in-scope entity. The pipeline freshens features for each + regardless of existing state, so stale markers for those IDs are + invalid after the call; the unconditional drain also handles the + identical-features case where a cosmetic edit re-lifts to the + same output. +- Auto-lifted features for entities previously flagged stale now + drain the stale-tracking set inline in + `get_entities_for_lifting`, so the count doesn't inflate when the + auto-lifter writes fresh features directly. +- `reload_rpg` now prunes the stale-tracking set against the newly + loaded graph rather than clearing it wholesale. The CLI / isolated + sub-agent re-lift flow only refreshes entities with no features — + stale entities survive it, so clearing would let `lifting_status` + report 100% coverage while re-lift work remained. +- `reload_rpg` drift-tracking reset now sits on the success path, + after `storage::load` returns `Ok`. Transient read errors no longer + wipe the backlog while leaving the previous graph in memory. +- `build_rpg` now prunes the stale-tracking set against the newly + built graph so dead entity IDs don't accumulate across rebuilds. +- `build_semantic_hierarchy` sharded init no longer acquires + `hierarchy_session.write()` before `graph.read()`. The original + order formed a deadlock cycle with `update_rpg`'s + graph-then-session order under concurrent scheduling. The init + path now collapses decision + snapshot into a single + `hierarchy_session.write()` held under `graph.read()` and packages + the work into an `Action` enum so there's no peek-then-trust. +- `build_batch_0_domain_discovery` and `build_cluster_batch` now take + `&RPGraph` and (where applicable) clusters as parameters instead + of re-reading `self.graph` / `self.hierarchy_session`. Closes two + TOCTOU windows: a session-clear race where a concurrent + `build_rpg`/`update_rpg` could panic on `session.as_mut().unwrap()`, + and a graph-replace race where a concurrent `set_project_root` + could panic on `graph.as_ref().unwrap()`. +- `set_project_root` tool description is no longer Claude-Code-specific + in its example; reads runtime-neutral. +- `get_entities_for_lifting` batch-0 dispatch NOTE no longer + references "batches 2..N" (off-by-one against the 0-based + `batch_index` parameter). Reads "do not request further batches in + this context". ## [0.8.2] - 2026-04-14 diff --git a/crates/rpg-mcp/src/server.rs b/crates/rpg-mcp/src/server.rs index fac65c3..9132753 100644 --- a/crates/rpg-mcp/src/server.rs +++ b/crates/rpg-mcp/src/server.rs @@ -329,11 +329,10 @@ impl RpgServer { // set, lifting_status would report "100% coverage" while // search_node returns outdated features. // - // Ordered before the last_auto_sync_* writes to follow the - // canonical lock rank declared on RpgServer (stale=3 before - // auto-sync markers=5). Each statement releases its write - // lock before the next is acquired, so order would be moot, - // but matching rank keeps the file readable as an exemplar. + // Each inner write below is statement-per-lock — no two + // inner locks are held at once while we're also holding + // graph.write(), so the order between them is irrelevant + // for correctness (see the lock-order doc on RpgServer). { let mut stale = self.stale_entity_ids.write().await; for id in &summary.modified_entity_ids { diff --git a/crates/rpg-mcp/src/tools.rs b/crates/rpg-mcp/src/tools.rs index edf7b86..74d8190 100644 --- a/crates/rpg-mcp/src/tools.rs +++ b/crates/rpg-mcp/src/tools.rs @@ -958,21 +958,27 @@ impl RpgServer { let project_root = self.project_root().await.clone(); let scope_owned = scope.to_string(); - // Snapshot pre-lift fingerprints so we can drain re-lifted IDs - // from `stale_entity_ids` afterwards. For a non-`*` scope the - // pipeline freshens features for every in-scope entity (ignoring - // the "already has features" gate that resolve_scope=`*` enforces), - // so any in-scope stale entity is no longer stale once the - // pipeline returns. Without this drain, lifting_status would - // keep reporting the count. - let pre_lift_features: std::collections::HashMap> = graph - .entities - .iter() - .map(|(id, e)| (id.clone(), e.semantic_features.clone())) - .collect(); + // Compute the in-scope entity IDs up front so we can drain + // them from `stale_entity_ids` after the pipeline runs. For a + // non-`*` scope the pipeline freshens features for every + // in-scope entity (the `*` scope auto-filters to feature-empty + // entities, but explicit scopes don't), so any stale entity + // in scope is no longer stale once the pipeline returns. We + // drain *unconditionally* by ID rather than diffing features + // before/after, because a deterministic re-lift can produce + // identical features for a cosmetic source change — the + // entity is still freshly lifted, just to the same value. + let in_scope_ids: HashSet = { + let lift_scope = rpg_encoder::lift::resolve_scope(graph, scope); + lift_scope.entity_ids.into_iter().collect() + }; - // Run the blocking pipeline on the current thread (tells tokio we're blocking). - // This is safe because MCP stdio is serial — no concurrent requests. + // Run the blocking pipeline on the current thread (tells tokio + // we're blocking). Safe because (1) the `lift_in_progress` + // atomic above rejects concurrent `auto_lift` calls, and + // (2) the graph write lock we hold below serializes against + // every other tool that touches the graph for the pipeline's + // duration. let report = tokio::task::block_in_place(|| { let config = rpg_lift::LiftConfig { provider: provider.as_ref(), @@ -988,29 +994,14 @@ impl RpgServer { }) .map_err(|e| format!("Lift failed: {}", e))?; - // Drain stale entries for entities the pipeline freshened - // (features changed or transitioned from empty → present). - // Done while still holding the graph write lock so the - // before/after snapshot is consistent. - let refreshed_ids: Vec = graph - .entities - .iter() - .filter_map(|(id, e)| { - let prev = pre_lift_features.get(id); - let changed = match prev { - None => !e.semantic_features.is_empty(), - Some(p) => p != &e.semantic_features, - }; - if changed { Some(id.clone()) } else { None } - }) - .collect(); drop(guard); - if !refreshed_ids.is_empty() { + // Drain stale tracking for every in-scope ID. After the + // pipeline, those entities have authoritative features (LLM + // or auto-lift), regardless of whether the features changed. + if !in_scope_ids.is_empty() { let mut stale = self.stale_entity_ids.write().await; - for id in &refreshed_ids { - stale.remove(id); - } + stale.retain(|id| !in_scope_ids.contains(id)); } // Clear sessions — entity list changed