From 1a080a006f8b136b67ae47a5d0c4083045672d96 Mon Sep 17 00:00:00 2001 From: ozymandiashh <234437643+ozymandiashh@users.noreply.github.com> Date: Tue, 5 May 2026 04:13:04 +0300 Subject: [PATCH 1/3] feat(optimize): MCP tool coverage detector with cache-aware costing Adds a per-tool optimizer finding for MCP servers whose schema is loaded on every turn but rarely invoked. Builds on the existing server-level `detectUnusedMcp` (zero invocations) by reporting partial-use cases: "loaded 54 tools, called 0" or "loaded 26 tools, called 2 (8% coverage)". Inventory comes from Claude Code's JSONL `attachment.deferred_tools_delta` entries: `addedNames` lists the exact tools available at that turn, including every fully-qualified `mcp____` name. We union across all delta entries in a session (not just the first) because tool availability can change mid-session when the user reloads MCP config or a subagent inherits a different tool set. Names that don't match the `mcp____` shape with both segments non-empty are rejected at extraction so downstream `split('__')` consumers can't be poisoned. Token-savings estimates are cache-aware. MCP tool schemas live in the cached prefix of the system prompt: a session pays the full input price on each cache-creation turn (rebuilds happen every ~5 minutes of inactivity) and the cache-read discount on subsequent turns. Each call's contribution is capped at its observed `cacheCreationInputTokens` / `cacheReadInputTokens` so we never claim more MCP overhead than the call's own cache buckets could contain. When multiple servers are flagged, costing happens in a single combined pass: the per-call cap applies to the total unused-schema budget across all flagged servers, not per server. Two flagged servers cannot both independently claim the same call's cache bucket, which would otherwise overstate `tokensSaved` and misclassify findings as high impact. A session counts toward `loadedSessions` (and toward the cost estimate) only if its observed inventory included the server. Pure invocation-only sessions, where the server appears in `mcpBreakdown` or `call.mcpTools` without any matching `deferred_tools_delta`, do not satisfy the `>= 2 sessions` threshold on their own. The same invariant applies in `estimateMcpSchemaCost` so the two passes agree. Coverage is computed against the inventory only: invocations of names not present in any observed inventory (older config, hallucinated tool, typo) do not inflate `toolsInvoked` and cannot drive `unusedCount` negative. `toolsInvoked` is derived as `inventory.size - unusedTools.length` to keep both numbers consistent. `detectUnusedMcp` and the new detector are explicitly disjoint: `detectUnusedMcp` skips servers that the coverage detector will report, not every server that happens to be in any inventory, so a small inventoried-but-uninvoked server below the coverage thresholds still gets flagged as "configured but never called." Thresholds for the coverage finding: - > 10 tools available (small servers are noise) - < 20% coverage - >= 2 sessions with observed inventory - High impact when total effective tokens >= 200_000 or >= 3 servers flagged Smoke-tested on a real account: 7 servers flagged across 93 sessions (`office-word-mcp` 0/54, `notebooklm-mcp` 0/38, `office-ppt-mcp` 0/37, `excel-mcp-server` 0/25, `github-mcp-server` 2/26, `peekaboo` 3/22, plus `claude_ai_Asana`). Combined-cap costing keeps `tokensSaved` honest. Changes: - src/types.ts: optional `mcpInventory: string[]` on `SessionSummary`. Provider-agnostic field; currently populated only by the Claude parser. - src/parser.ts: `extractMcpInventory` walks all entries, validates fully-qualified names, returns sorted unique list. `buildSessionSummary` passes it through; field is omitted when empty so JSON exports stay clean. - src/optimize.ts: `aggregateMcpCoverage`, `estimateMcpSchemaCost` (single- and multi-server signatures), `detectMcpToolCoverage`. Wired into `scanAndDetect`. `detectUnusedMcp` updated to disjoint with the new detector. - tests/mcp-coverage.test.ts: 23 cases covering aggregation, costing, combined-cap behaviour, threshold gates, invocation-only-session filtering, foreign-tool invocations, cache rebuild events, write+read on the same call, multi-server pluralisation. - tests/parser-mcp-inventory.test.ts: 12 cases for the JSONL extractor including malformed name rejection and tolerant attachment parsing. - CHANGELOG.md: entry under Unreleased / Added (CLI). Closes #2 --- CHANGELOG.md | 13 + src/optimize.ts | 322 +++++++++++++++++++++ src/parser.ts | 54 +++- src/types.ts | 6 + tests/mcp-coverage.test.ts | 450 +++++++++++++++++++++++++++++ tests/parser-mcp-inventory.test.ts | 126 ++++++++ 6 files changed, 970 insertions(+), 1 deletion(-) create mode 100644 tests/mcp-coverage.test.ts create mode 100644 tests/parser-mcp-inventory.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index b31e30b7..e5022c49 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,18 @@ # Changelog +## Unreleased + +### Added (CLI) +- **MCP tool coverage detector.** New `optimize` finding flags MCP servers + whose tool inventory is largely unused. Inventory is observed from the + Claude `deferred_tools_delta` JSONL attachments (exact tool names per + session) instead of guessed at five tools per server. Token-savings + estimates are cache-aware: schema bytes pay full input price on the first + cache-creation turn of a session, then carry at the cache-read discount + on subsequent turns, capped per call so we never claim more overhead + than the call's own cache buckets could contain. Threshold: + >10 tools available, <20% coverage, observed in ≥2 sessions. Closes #2. + ## 0.9.6 - 2026-05-03 ### Added (CLI) diff --git a/src/optimize.ts b/src/optimize.ts index 7077b297..7882660e 100644 --- a/src/optimize.ts +++ b/src/optimize.ts @@ -53,6 +53,18 @@ const LOW_RATIO_MEDIUM_THRESHOLD = 3 const MIN_API_CALLS_FOR_CACHE = 10 const CACHE_EXCESS_HIGH_THRESHOLD = 15000 const UNUSED_MCP_HIGH_THRESHOLD = 3 +// MCP tool coverage detector thresholds. A server only earns a finding when +// every condition holds: the inventory is large enough to matter, real-world +// usage is poor, and we observed it in enough sessions to trust the signal. +const MCP_COVERAGE_MIN_TOOLS = 10 +const MCP_COVERAGE_MIN_SESSIONS = 2 +const MCP_COVERAGE_LOW_THRESHOLD = 0.20 +const MCP_COVERAGE_HIGH_IMPACT_TOKENS = 200_000 +// Anthropic prices cached input reads at roughly 10% of fresh input. We use +// this to keep "ongoing" overhead estimates honest: most MCP schema bytes +// live in the cached prefix and only get charged at the discount rate after +// the first turn of a session. +const CACHE_READ_DISCOUNT = 0.10 const GHOST_AGENTS_HIGH_THRESHOLD = 5 const GHOST_AGENTS_MEDIUM_THRESHOLD = 2 const GHOST_SKILLS_HIGH_THRESHOLD = 10 @@ -477,6 +489,298 @@ export function detectDuplicateReads(calls: ToolCall[], dateRange?: DateRange): } } +/** + * Per-server breakdown of MCP tool inventory vs invocations, computed from the + * `mcpInventory` field captured by the Claude parser. + * + * Each session that loaded a server contributes its observed tool list to + * the union for that server. Invocations come from the existing + * `mcpBreakdown` per-call counts plus the parser's `call.tools` stream. + */ +export type McpServerCoverage = { + server: string + toolsAvailable: number + toolsInvoked: number + unusedTools: string[] + invocations: number + loadedSessions: number + coverageRatio: number +} + +/** + * Aggregate MCP inventory and invocations across the projects in scope. + * + * Returns one entry per `mcp____*` namespace observed in any + * session's `mcpInventory`. Counts of invocations come from + * `session.mcpBreakdown` (per-server call totals already maintained by the + * parser). + */ +export function aggregateMcpCoverage(projects: ProjectSummary[]): McpServerCoverage[] { + type ServerAcc = { + inventory: Set + invokedTools: Set + invocations: number + loadedSessions: number + } + const servers = new Map() + + function getOrInit(server: string): ServerAcc { + let acc = servers.get(server) + if (!acc) { + acc = { inventory: new Set(), invokedTools: new Set(), invocations: 0, loadedSessions: 0 } + servers.set(server, acc) + } + return acc + } + + for (const project of projects) { + for (const session of project.sessions) { + // Only sessions with an observed inventory count toward `loadedSessions`. + // Pure invocation-only sessions (server seen via `call.mcpTools` or + // `session.mcpBreakdown` without any matching `deferred_tools_delta`) + // could otherwise satisfy the `MCP_COVERAGE_MIN_SESSIONS` threshold + // without giving us evidence that the schema was actually loaded. + const inventoriedServers = new Set() + const sessionInvoked = new Map>() + + // Inventory: union of tools observed available in this session. + for (const fqn of session.mcpInventory ?? []) { + const parts = fqn.split('__') + if (parts.length < 3 || parts[0] !== 'mcp') continue + const server = parts[1] + if (!server) continue + const tool = parts.slice(2).join('__') + if (!tool) continue + const acc = getOrInit(server) + acc.inventory.add(fqn) + inventoriedServers.add(server) + } + + // Invoked tools: walk turns to collect per-tool invocations. We can't + // get this from session.mcpBreakdown alone because that's keyed by + // server, not tool. + for (const turn of session.turns) { + for (const call of turn.assistantCalls) { + for (const fqn of call.mcpTools) { + const parts = fqn.split('__') + if (parts.length < 3 || parts[0] !== 'mcp') continue + const server = parts[1] + if (!server) continue + let invoked = sessionInvoked.get(server) + if (!invoked) { + invoked = new Set() + sessionInvoked.set(server, invoked) + } + invoked.add(fqn) + } + } + } + + // Invocation totals: trust mcpBreakdown which was already aggregated + // turn-by-turn, including any invocations the inventory pass missed. + for (const [server, data] of Object.entries(session.mcpBreakdown)) { + const acc = getOrInit(server) + acc.invocations += data.calls + } + + for (const [server, invoked] of sessionInvoked) { + const acc = getOrInit(server) + for (const fqn of invoked) acc.invokedTools.add(fqn) + } + + for (const server of inventoriedServers) { + getOrInit(server).loadedSessions += 1 + } + } + } + + const result: McpServerCoverage[] = [] + for (const [server, acc] of servers) { + if (acc.inventory.size === 0) continue + // Coverage is only meaningful against tools we actually observed in the + // inventory: invocations of tools never inventoried (older config, typo, + // etc.) would otherwise inflate the numerator and could even drive + // `unusedCount` negative. + const invokedInInventory = new Set() + for (const fqn of acc.invokedTools) { + if (acc.inventory.has(fqn)) invokedInInventory.add(fqn) + } + const unusedTools = Array.from(acc.inventory).filter(t => !invokedInInventory.has(t)).sort() + const toolsInvoked = acc.inventory.size - unusedTools.length + result.push({ + server, + toolsAvailable: acc.inventory.size, + toolsInvoked, + unusedTools, + invocations: acc.invocations, + loadedSessions: acc.loadedSessions, + coverageRatio: acc.inventory.size === 0 ? 0 : toolsInvoked / acc.inventory.size, + }) + } + result.sort((a, b) => b.toolsAvailable - a.toolsAvailable) + return result +} + +/** + * Cache-aware token cost estimate for the unused-tool overhead of one or + * more servers, summed across all sessions that loaded any of them. + * + * Returns three buckets: + * - `cacheWriteTokens`: schema bytes paid at full input price (each + * cache-creation event in a session that loaded one of the servers). + * - `cacheReadTokens`: schema bytes carried at the cache-read discount on + * subsequent turns (ongoing overhead). + * - `effectiveInputTokens`: equivalent fresh-input tokens, weighted by + * cache pricing. Used to estimate dollar cost downstream by multiplying + * by the project's input rate. + * + * We cap each call's contribution at the observed cache-creation / + * cache-read totals for that call: it is not meaningful to claim more MCP + * overhead than the call's own cache bucket could possibly contain. The + * cap is applied once across the combined unused-schema budget for all + * flagged servers, not per server, so two flagged servers cannot both + * independently claim the same call's cache bucket. + * + * Anthropic caches expire after roughly 5 minutes of inactivity, so a long + * session can rebuild the cache multiple times. Every call that reports + * `cacheCreationInputTokens > 0` is treated as another rebuild, not just + * the very first one. + * + * "Loaded" is defined exclusively by observed inventory: a session that + * invoked a server without ever emitting a `deferred_tools_delta` for it + * does not count, matching the invariant `aggregateMcpCoverage` uses for + * `loadedSessions`. + */ +export function estimateMcpSchemaCost( + unusedToolCounts: Record | number, + projects: ProjectSummary[], + serverOrServers: string | string[], +): { cacheWriteTokens: number; cacheReadTokens: number; effectiveInputTokens: number } { + // Backward-compatible single-server signature used by tests. + const servers = Array.isArray(serverOrServers) ? serverOrServers : [serverOrServers] + const counts: Record = typeof unusedToolCounts === 'number' + ? { [serverOrServers as string]: unusedToolCounts } + : unusedToolCounts + + const totalUnusedSchemaTokens = servers.reduce( + (s, srv) => s + (counts[srv] ?? 0) * TOKENS_PER_MCP_TOOL, + 0, + ) + if (totalUnusedSchemaTokens === 0) { + return { cacheWriteTokens: 0, cacheReadTokens: 0, effectiveInputTokens: 0 } + } + + const serverSet = new Set(servers) + let cacheWriteTokens = 0 + let cacheReadTokens = 0 + + for (const project of projects) { + for (const session of project.sessions) { + // A session counts only if its observed inventory included at least + // one of the flagged servers — same invariant `aggregateMcpCoverage` + // uses for `loadedSessions`. + let loaded = false + for (const fqn of session.mcpInventory ?? []) { + const seg = fqn.split('__')[1] + if (seg && serverSet.has(seg)) { loaded = true; break } + } + if (!loaded) continue + + for (const turn of session.turns) { + for (const call of turn.assistantCalls) { + // Both buckets can be non-zero on the same call (cache rebuild + // alongside a partial read), so account for them independently. + // The cap is applied to the combined unused-schema budget so + // multiple flagged servers cannot all claim the same call. + if (call.usage.cacheCreationInputTokens > 0) { + cacheWriteTokens += Math.min(totalUnusedSchemaTokens, call.usage.cacheCreationInputTokens) + } + if (call.usage.cacheReadInputTokens > 0) { + cacheReadTokens += Math.min(totalUnusedSchemaTokens, call.usage.cacheReadInputTokens) + } + } + } + } + } + + const effectiveInputTokens = cacheWriteTokens + cacheReadTokens * CACHE_READ_DISCOUNT + return { cacheWriteTokens, cacheReadTokens, effectiveInputTokens } +} + +/** + * Find MCP servers whose tool inventory is largely unused. Replaces the + * older server-only `detectUnusedMcp` (which only flagged servers with + * literal zero invocations). + * + * A server is flagged when, taken together: + * - it exposed more than `MCP_COVERAGE_MIN_TOOLS` tools, + * - we saw it loaded in at least `MCP_COVERAGE_MIN_SESSIONS` sessions, + * - the coverage ratio is below `MCP_COVERAGE_LOW_THRESHOLD`. + * + * Token-savings estimates use the cache-aware accounting from + * `estimateMcpSchemaCost` so we don't mistake cached-prefix carry-over for + * fresh-input billing. + */ +export function detectMcpToolCoverage( + projects: ProjectSummary[], +): WasteFinding | null { + const coverage = aggregateMcpCoverage(projects) + if (coverage.length === 0) return null + + const flagged = coverage.filter(c => + c.toolsAvailable > MCP_COVERAGE_MIN_TOOLS + && c.loadedSessions >= MCP_COVERAGE_MIN_SESSIONS + && c.coverageRatio < MCP_COVERAGE_LOW_THRESHOLD, + ) + if (flagged.length === 0) return null + + flagged.sort((a, b) => (b.toolsAvailable - b.toolsInvoked) - (a.toolsAvailable - a.toolsInvoked)) + + const lines: string[] = [] + const removeCommands: string[] = [] + const unusedCountsByServer: Record = {} + const flaggedServers: string[] = [] + + for (const c of flagged) { + unusedCountsByServer[c.server] = c.toolsAvailable - c.toolsInvoked + flaggedServers.push(c.server) + const pct = Math.round(c.coverageRatio * 100) + lines.push( + `${c.server}: ${c.toolsInvoked}/${c.toolsAvailable} tools used (${pct}% coverage) across ${c.loadedSessions} session${c.loadedSessions === 1 ? '' : 's'}`, + ) + removeCommands.push(`claude mcp remove ${c.server}`) + } + + // Single combined cost pass: caps each call's contribution at the + // total unused-schema budget across all flagged servers, so two + // flagged servers cannot independently claim the same call's cache + // bucket and overstate `tokensSaved`. + const cost = estimateMcpSchemaCost(unusedCountsByServer, projects, flaggedServers) + const tokensSaved = Math.round(cost.effectiveInputTokens) + const impact: Impact = tokensSaved >= MCP_COVERAGE_HIGH_IMPACT_TOKENS + ? 'high' + : flagged.length >= UNUSED_MCP_HIGH_THRESHOLD + ? 'high' + : 'medium' + + return { + title: `${flagged.length} MCP server${flagged.length === 1 ? '' : 's'} with low tool coverage`, + explanation: + `Schema for unused tools is loaded into the system prompt every session and ` + + `carried in the cached prefix on every turn. ` + + `${lines.join('; ')}.`, + impact, + tokensSaved, + fix: { + type: 'command', + label: flagged.length === 1 + ? 'Remove the underused server, or trim its tools in your MCP config:' + : 'Remove underused servers, or trim their tools in your MCP config:', + text: removeCommands.join('\n'), + }, + } +} + export function detectUnusedMcp( calls: ToolCall[], projects: ProjectSummary[], @@ -497,10 +801,27 @@ export function detectUnusedMcp( } } + // Servers that the new coverage detector will flag fall under its + // jurisdiction (per-tool granularity, cache-aware costing) and we + // suppress them here to avoid double-flagging. Importantly, we suppress + // only the servers that actually clear the coverage detector's + // thresholds — a small, inventoried-but-uninvoked server that the + // coverage detector skips would otherwise become a blind spot. + const coverageReportedServers = new Set( + aggregateMcpCoverage(projects) + .filter(c => + c.toolsAvailable > MCP_COVERAGE_MIN_TOOLS + && c.loadedSessions >= MCP_COVERAGE_MIN_SESSIONS + && c.coverageRatio < MCP_COVERAGE_LOW_THRESHOLD, + ) + .map(c => c.server), + ) + const now = Date.now() const unused: string[] = [] for (const entry of configured.values()) { if (calledServers.has(entry.normalized)) continue + if (coverageReportedServers.has(entry.normalized)) continue if (entry.mtime > 0 && now - entry.mtime < MCP_NEW_CONFIG_GRACE_MS) continue unused.push(entry.original) } @@ -973,6 +1294,7 @@ export async function scanAndDetect( () => detectJunkReads(toolCalls, dateRange), () => detectDuplicateReads(toolCalls, dateRange), () => detectUnusedMcp(toolCalls, projects, projectCwds), + () => detectMcpToolCoverage(projects), () => detectBloatedClaudeMd(projectCwds), () => detectBashBloat(), ] diff --git a/src/parser.ts b/src/parser.ts index 6af996fc..09cf99c3 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -203,10 +203,54 @@ function groupIntoTurns(entries: JournalEntry[], seenMsgIds: Set): Parse return turns } +/** + * Extract MCP tool inventory observed across a session's JSONL entries. + * + * Claude Code emits `attachment.type === "deferred_tools_delta"` entries whose + * `addedNames` array lists every tool currently available at that turn (built-in + * tools plus all `mcp____` names exposed by configured MCP + * servers). Tool inventory can change mid-session if the user reloads MCP + * config, so we union every occurrence rather than trusting only the first. + * + * Built-in tools are filtered out: only `mcp__*` identifiers survive. + */ +// Fully-qualified MCP tool name shape: `mcp____`. Both server +// and tool segments must be non-empty. Names like `mcp__server` (no tool +// segment) or `mcp__server__` (trailing empty tool) would silently pollute +// the inventory and break downstream `split('__')` consumers, so they're +// rejected here. +function isMcpToolName(name: string): boolean { + if (!name.startsWith('mcp__')) return false + const rest = name.slice(5) // strip `mcp__` + const sep = rest.indexOf('__') + if (sep <= 0) return false // missing or empty server + if (sep >= rest.length - 2) return false // missing or empty tool + return true +} + +export function extractMcpInventory(entries: JournalEntry[]): string[] { + const inventory = new Set() + for (const entry of entries) { + const att = entry['attachment'] + if (!att || typeof att !== 'object') continue + const a = att as { type?: unknown; addedNames?: unknown } + if (a.type !== 'deferred_tools_delta') continue + if (!Array.isArray(a.addedNames)) continue + for (const name of a.addedNames) { + if (typeof name !== 'string') continue + if (!isMcpToolName(name)) continue + inventory.add(name) + } + } + if (inventory.size === 0) return [] + return Array.from(inventory).sort() +} + function buildSessionSummary( sessionId: string, project: string, turns: ClassifiedTurn[], + mcpInventory?: string[], ): SessionSummary { const modelBreakdown: SessionSummary['modelBreakdown'] = Object.create(null) const toolBreakdown: SessionSummary['toolBreakdown'] = Object.create(null) @@ -311,6 +355,7 @@ function buildSessionSummary( bashBreakdown, categoryBreakdown, skillBreakdown, + ...(mcpInventory && mcpInventory.length > 0 ? { mcpInventory } : {}), } } @@ -362,7 +407,14 @@ async function parseSessionFile( } const classified = turns.map(classifyTurn) - return buildSessionSummary(sessionId, project, classified) + // Inventory is extracted from the full entry stream, not just the + // turns we kept after date filtering: tool availability is set up + // once at the start of a session (with possible mid-session reloads), + // and we want to reflect what was loaded even if the user only ran + // turns inside a narrow date window. + const mcpInventory = extractMcpInventory(entries) + + return buildSessionSummary(sessionId, project, classified, mcpInventory) } async function collectJsonlFiles(dirPath: string): Promise { diff --git a/src/types.ts b/src/types.ts index ab675154..e5562e8f 100644 --- a/src/types.ts +++ b/src/types.ts @@ -121,6 +121,12 @@ export type SessionSummary = { bashBreakdown: Record categoryBreakdown: Record skillBreakdown: Record + // Observed MCP tools available in this session, captured from + // `attachment.deferred_tools_delta.addedNames` entries. Union across all + // turns. Each name is a fully-qualified `mcp____` identifier. + // Built-in tools (Bash, Edit, etc.) are filtered out. Provider-agnostic field; + // currently populated only by the Claude parser. + mcpInventory?: string[] } export type ProjectSummary = { diff --git a/tests/mcp-coverage.test.ts b/tests/mcp-coverage.test.ts new file mode 100644 index 00000000..1d078d2d --- /dev/null +++ b/tests/mcp-coverage.test.ts @@ -0,0 +1,450 @@ +import { describe, it, expect } from 'vitest' + +import { + aggregateMcpCoverage, + detectMcpToolCoverage, + estimateMcpSchemaCost, +} from '../src/optimize.js' +import type { + ClassifiedTurn, + ParsedApiCall, + ProjectSummary, + SessionSummary, + TaskCategory, + TokenUsage, +} from '../src/types.js' + +// --------------------------------------------------------------------------- +// Test fixtures +// --------------------------------------------------------------------------- + +const ZERO_USAGE: TokenUsage = { + inputTokens: 0, + outputTokens: 0, + cacheCreationInputTokens: 0, + cacheReadInputTokens: 0, + cachedInputTokens: 0, + reasoningTokens: 0, + webSearchRequests: 0, +} + +function makeCall(opts: { + tools?: string[] + cacheCreation?: number + cacheRead?: number + cost?: number +} = {}): ParsedApiCall { + const tools = opts.tools ?? [] + return { + provider: 'claude', + model: 'Opus 4.7', + usage: { + ...ZERO_USAGE, + cacheCreationInputTokens: opts.cacheCreation ?? 0, + cacheReadInputTokens: opts.cacheRead ?? 0, + }, + costUSD: opts.cost ?? 0, + tools, + mcpTools: tools.filter(t => t.startsWith('mcp__')), + skills: [], + hasAgentSpawn: false, + hasPlanMode: false, + speed: 'standard', + timestamp: '2026-05-04T00:00:00Z', + bashCommands: [], + deduplicationKey: 'k', + } +} + +function makeTurn(calls: ParsedApiCall[]): ClassifiedTurn { + return { + userMessage: '', + assistantCalls: calls, + timestamp: '2026-05-04T00:00:00Z', + sessionId: 's1', + category: 'coding', + retries: 0, + hasEdits: false, + } +} + +function makeSession(opts: { + sessionId?: string + inventory?: string[] + turns?: ClassifiedTurn[] + mcpBreakdown?: Record +}): SessionSummary { + const turns = opts.turns ?? [] + const apiCalls = turns.reduce((s, t) => s + t.assistantCalls.length, 0) + const emptyCategoryBreakdown = {} as Record + return { + sessionId: opts.sessionId ?? 's1', + project: 'p', + firstTimestamp: '2026-05-04T00:00:00Z', + lastTimestamp: '2026-05-04T00:00:00Z', + totalCostUSD: 0, + totalInputTokens: 0, + totalOutputTokens: 0, + totalCacheReadTokens: 0, + totalCacheWriteTokens: 0, + apiCalls, + turns, + modelBreakdown: {}, + toolBreakdown: {}, + mcpBreakdown: opts.mcpBreakdown ?? {}, + bashBreakdown: {}, + categoryBreakdown: emptyCategoryBreakdown, + skillBreakdown: {}, + ...(opts.inventory ? { mcpInventory: opts.inventory } : {}), + } +} + +function project(sessions: SessionSummary[]): ProjectSummary { + return { + project: 'p', + projectPath: '/tmp/p', + sessions, + totalCostUSD: 0, + totalApiCalls: sessions.reduce((s, ses) => s + ses.apiCalls, 0), + } +} + +// --------------------------------------------------------------------------- +// aggregateMcpCoverage +// --------------------------------------------------------------------------- + +describe('aggregateMcpCoverage', () => { + it('returns empty list when no session has MCP inventory', () => { + const projects = [project([makeSession({})])] + expect(aggregateMcpCoverage(projects)).toEqual([]) + }) + + it('reports per-server tools available, invoked, and unused', () => { + const inventory = [ + 'mcp__hf__hub_repo_search', + 'mcp__hf__paper_search', + 'mcp__hf__hf_doc_search', + ] + const turns = [ + makeTurn([makeCall({ tools: ['mcp__hf__hub_repo_search'] })]), + ] + const sessions = [ + makeSession({ inventory, turns, mcpBreakdown: { hf: { calls: 1 } } }), + ] + const result = aggregateMcpCoverage([project(sessions)]) + + expect(result).toHaveLength(1) + expect(result[0]!.server).toBe('hf') + expect(result[0]!.toolsAvailable).toBe(3) + expect(result[0]!.toolsInvoked).toBe(1) + expect(result[0]!.unusedTools).toEqual([ + 'mcp__hf__hf_doc_search', + 'mcp__hf__paper_search', + ]) + expect(result[0]!.coverageRatio).toBeCloseTo(1 / 3, 5) + expect(result[0]!.invocations).toBe(1) + expect(result[0]!.loadedSessions).toBe(1) + }) + + it('unions inventory across multiple sessions for the same server', () => { + const sessions = [ + makeSession({ sessionId: 'a', inventory: ['mcp__x__a', 'mcp__x__b'] }), + makeSession({ sessionId: 'b', inventory: ['mcp__x__b', 'mcp__x__c'] }), + ] + const result = aggregateMcpCoverage([project(sessions)]) + expect(result[0]!.toolsAvailable).toBe(3) + expect(result[0]!.loadedSessions).toBe(2) + }) + + it('separates servers with similar names', () => { + const sessions = [ + makeSession({ inventory: ['mcp__hf__a', 'mcp__hugface__a'] }), + ] + const result = aggregateMcpCoverage([project(sessions)]) + expect(result.map(r => r.server).sort()).toEqual(['hf', 'hugface']) + }) + + it('skips invocations without inventory (foreign server, no inventory observed)', () => { + // A server can show up only via a call. We still report it so the + // operator knows it was invoked, but coverage is 0/0 and it is not a + // candidate for the unused-coverage finding. + const turns = [makeTurn([makeCall({ tools: ['mcp__ghost__t1'] })])] + const sessions = [ + makeSession({ turns, mcpBreakdown: { ghost: { calls: 1 } } }), + ] + const result = aggregateMcpCoverage([project(sessions)]) + // No inventory entry -> aggregator drops the server from the report + // because we cannot reason about coverage without an inventory baseline. + expect(result).toEqual([]) + }) +}) + +// --------------------------------------------------------------------------- +// estimateMcpSchemaCost — cache-aware accounting +// --------------------------------------------------------------------------- + +describe('estimateMcpSchemaCost', () => { + it('charges first cacheCreation turn at full price, subsequent turns at cache-read', () => { + const turns = [ + makeTurn([makeCall({ cacheCreation: 50_000 })]), // first turn: write + makeTurn([makeCall({ cacheRead: 60_000 })]), // ongoing: read + makeTurn([makeCall({ cacheRead: 60_000 })]), + ] + const sessions = [makeSession({ + inventory: Array.from({ length: 30 }, (_, i) => `mcp__svc__t${i}`), + turns, + mcpBreakdown: { svc: { calls: 0 } }, + })] + // 30 unused tools * 400 token estimate = 12_000 schema tokens + // cap by call cache buckets so we never overclaim + const cost = estimateMcpSchemaCost(30, [project(sessions)], 'svc') + expect(cost.cacheWriteTokens).toBe(12_000) // capped by 50k creation, 12k schema fits + expect(cost.cacheReadTokens).toBe(24_000) // 12k + 12k across two ongoing turns + // effective = write + read * 0.10 (cache discount) + expect(cost.effectiveInputTokens).toBeCloseTo(12_000 + 24_000 * 0.10, 5) + }) + + it('caps by available cache bucket so we never overclaim', () => { + const turns = [makeTurn([makeCall({ cacheCreation: 1_000 })])] + const sessions = [makeSession({ + inventory: Array.from({ length: 30 }, (_, i) => `mcp__svc__t${i}`), + turns, + mcpBreakdown: { svc: { calls: 0 } }, + })] + // 30*400 = 12k schema tokens, but the call only had 1k cache-creation, + // so we should not claim more than 1k of overhead for that turn. + const cost = estimateMcpSchemaCost(30, [project(sessions)], 'svc') + expect(cost.cacheWriteTokens).toBe(1_000) + }) + + it('returns zero when no unused tools', () => { + const sessions = [makeSession({ + inventory: ['mcp__svc__t1'], + turns: [makeTurn([makeCall({ cacheCreation: 5000 })])], + })] + const cost = estimateMcpSchemaCost(0, [project(sessions)], 'svc') + expect(cost).toEqual({ cacheWriteTokens: 0, cacheReadTokens: 0, effectiveInputTokens: 0 }) + }) + + it('counts cache write AND cache read on the same call', () => { + // A long session can have a cache rebuild mid-stream where one call + // reports both buckets. The estimator must charge both, not skip the + // read because of the write. + const turns = [makeTurn([ + makeCall({ cacheCreation: 50_000, cacheRead: 30_000 }), + ])] + const sessions = [makeSession({ + inventory: Array.from({ length: 30 }, (_, i) => `mcp__svc__t${i}`), + turns, + mcpBreakdown: { svc: { calls: 0 } }, + })] + const cost = estimateMcpSchemaCost(30, [project(sessions)], 'svc') + expect(cost.cacheWriteTokens).toBe(12_000) // capped at 50k creation + expect(cost.cacheReadTokens).toBe(12_000) // capped at 30k read + }) + + it('counts every cache rebuild, not just the first one', () => { + // Sessions that span more than 5 minutes can rebuild the cache + // multiple times. The estimator should treat every cacheCreation + // bucket as another write. + const turns = [makeTurn([ + makeCall({ cacheCreation: 50_000 }), + makeCall({ cacheCreation: 50_000 }), // rebuild after cache TTL + makeCall({ cacheRead: 60_000 }), + ])] + const sessions = [makeSession({ + inventory: Array.from({ length: 30 }, (_, i) => `mcp__svc__t${i}`), + turns, + mcpBreakdown: { svc: { calls: 0 } }, + })] + const cost = estimateMcpSchemaCost(30, [project(sessions)], 'svc') + expect(cost.cacheWriteTokens).toBe(24_000) // both rebuilds counted + expect(cost.cacheReadTokens).toBe(12_000) + }) + + it('skips sessions where the server was never loaded', () => { + const turns = [makeTurn([makeCall({ cacheCreation: 100_000 })])] + const sessions = [makeSession({ + inventory: ['mcp__other__t1'], + turns, + })] + const cost = estimateMcpSchemaCost(10, [project(sessions)], 'svc') + expect(cost.cacheWriteTokens).toBe(0) + }) + + it('requires observed inventory for the server, not just invocations', () => { + // Session invoked the server (mcpBreakdown set, mcpTools called) but + // never reported a deferred_tools_delta for it. Cost should be 0 to + // stay consistent with aggregateMcpCoverage's loadedSessions rule. + const turns = [makeTurn([ + makeCall({ tools: ['mcp__svc__t1'], cacheCreation: 100_000 }), + ])] + const sessions = [makeSession({ + // No inventory at all + turns, + mcpBreakdown: { svc: { calls: 1 } }, + })] + const cost = estimateMcpSchemaCost(10, [project(sessions)], 'svc') + expect(cost.cacheWriteTokens).toBe(0) + expect(cost.cacheReadTokens).toBe(0) + }) + + it('caps combined unused-schema budget across multiple flagged servers', () => { + // Two flagged servers, each with 30 unused tools (12k schema each = + // 24k combined). One call has a 50k cache-creation bucket. The + // combined cap means total write tokens reported is min(24k, 50k) = + // 24k, not 24k + 24k = 48k. + const inventory = [ + ...Array.from({ length: 30 }, (_, i) => `mcp__a__t${i}`), + ...Array.from({ length: 30 }, (_, i) => `mcp__b__t${i}`), + ] + const turns = [makeTurn([makeCall({ cacheCreation: 50_000 })])] + const sessions = [makeSession({ inventory, turns })] + const cost = estimateMcpSchemaCost( + { a: 30, b: 30 }, + [project(sessions)], + ['a', 'b'], + ) + expect(cost.cacheWriteTokens).toBe(24_000) + }) + + it('still works with the single-server signature (backward compat)', () => { + const turns = [makeTurn([makeCall({ cacheCreation: 50_000 })])] + const sessions = [makeSession({ + inventory: Array.from({ length: 30 }, (_, i) => `mcp__svc__t${i}`), + turns, + })] + const cost = estimateMcpSchemaCost(30, [project(sessions)], 'svc') + expect(cost.cacheWriteTokens).toBe(12_000) + }) +}) + +// --------------------------------------------------------------------------- +// detectMcpToolCoverage — finding emission with thresholds +// --------------------------------------------------------------------------- + +describe('detectMcpToolCoverage', () => { + it('returns null when no inventory exists at all', () => { + expect(detectMcpToolCoverage([project([makeSession({})])])).toBeNull() + }) + + it('does not flag a server with healthy coverage', () => { + const inventory = Array.from({ length: 20 }, (_, i) => `mcp__svc__t${i}`) + const turns = [makeTurn( + Array.from({ length: 8 }, (_, i) => makeCall({ tools: [`mcp__svc__t${i}`] })), + )] + const sessions = [ + makeSession({ sessionId: 'a', inventory, turns }), + makeSession({ sessionId: 'b', inventory, turns }), + ] + // 8/20 = 40% coverage, above the 20% threshold -> no finding + expect(detectMcpToolCoverage([project(sessions)])).toBeNull() + }) + + it('does not flag a server with too few tools (signal too noisy)', () => { + // Below MCP_COVERAGE_MIN_TOOLS=10 + const inventory = ['mcp__svc__a', 'mcp__svc__b'] + const sessions = [ + makeSession({ sessionId: 'a', inventory }), + makeSession({ sessionId: 'b', inventory }), + ] + expect(detectMcpToolCoverage([project(sessions)])).toBeNull() + }) + + it('does not flag if seen in only one session (insufficient evidence)', () => { + const inventory = Array.from({ length: 20 }, (_, i) => `mcp__svc__t${i}`) + const sessions = [makeSession({ inventory })] + expect(detectMcpToolCoverage([project(sessions)])).toBeNull() + }) + + it('flags a large server with low coverage across multiple sessions', () => { + const inventory = Array.from({ length: 30 }, (_, i) => `mcp__hf__t${i}`) + const turns = [makeTurn([ + makeCall({ tools: ['mcp__hf__t0'], cacheCreation: 100_000 }), + ])] + const sessions = [ + makeSession({ sessionId: 'a', inventory, turns, mcpBreakdown: { hf: { calls: 1 } } }), + makeSession({ sessionId: 'b', inventory, turns, mcpBreakdown: { hf: { calls: 1 } } }), + ] + const finding = detectMcpToolCoverage([project(sessions)]) + expect(finding).not.toBeNull() + expect(finding!.title).toContain('1 MCP server') + expect(finding!.title).toContain('low tool coverage') + expect(finding!.explanation).toContain('hf') + expect(finding!.explanation).toContain('1/30') + expect(finding!.fix.type).toBe('command') + expect((finding!.fix as { text: string }).text).toContain('claude mcp remove hf') + expect(finding!.tokensSaved).toBeGreaterThan(0) + }) + + it('escalates impact to high when token waste crosses the threshold', () => { + const inventory = Array.from({ length: 60 }, (_, i) => `mcp__big__t${i}`) + // 60 tools * 400 tokens = 24k schema. With many sessions and large + // cache-creation buckets, total effective tokens easily clear 200k. + const turns = [makeTurn([ + makeCall({ tools: ['mcp__big__t0'], cacheCreation: 50_000 }), + makeCall({ cacheRead: 60_000 }), + makeCall({ cacheRead: 60_000 }), + ])] + // Need enough sessions so the per-session ~28.8k effective tokens + // (24k write + 48k read × 0.10) sum past the 200k high-impact threshold. + const sessions = Array.from({ length: 8 }, (_, i) => + makeSession({ sessionId: `s${i}`, inventory, turns, mcpBreakdown: { big: { calls: 1 } } }), + ) + const finding = detectMcpToolCoverage([project(sessions)]) + expect(finding).not.toBeNull() + expect(finding!.impact).toBe('high') + }) + + it('does not count invocation-only sessions toward loadedSessions', () => { + // Server `svc` has inventory in only one session, but is invoked in + // a second session that never observed the schema. Pre-fix this + // would have satisfied the >=2 session threshold; it must not now. + const inventory = Array.from({ length: 20 }, (_, i) => `mcp__svc__t${i}`) + const turns = [makeTurn([ + makeCall({ tools: ['mcp__svc__t0'], cacheCreation: 50_000 }), + ])] + const sessions = [ + makeSession({ sessionId: 'a', inventory, turns, mcpBreakdown: { svc: { calls: 1 } } }), + // No inventory — this shouldn't be considered a "loaded" session. + makeSession({ sessionId: 'b', turns, mcpBreakdown: { svc: { calls: 1 } } }), + ] + expect(detectMcpToolCoverage([project(sessions)])).toBeNull() + }) + + it('does not let invocations of un-inventoried tools inflate coverage', () => { + // Inventory has 20 tools, none invoked. Calls hit a 21st tool that + // never appeared in any deferred_tools_delta (could be a renamed/ + // removed tool from an older session config). Coverage must stay 0% + // and unusedCount must not go negative. + const inventory = Array.from({ length: 20 }, (_, i) => `mcp__svc__t${i}`) + const turns = [makeTurn([makeCall({ tools: ['mcp__svc__ghost'] })])] + const sessions = [ + makeSession({ sessionId: 'a', inventory, turns, mcpBreakdown: { svc: { calls: 1 } } }), + makeSession({ sessionId: 'b', inventory, turns, mcpBreakdown: { svc: { calls: 1 } } }), + ] + const result = aggregateMcpCoverage([project(sessions)]) + expect(result[0]!.toolsAvailable).toBe(20) + expect(result[0]!.toolsInvoked).toBe(0) + expect(result[0]!.coverageRatio).toBe(0) + expect(result[0]!.unusedTools).toHaveLength(20) + }) + + it('handles multiple flagged servers and pluralises the title', () => { + const sessions: SessionSummary[] = [] + for (const server of ['svc1', 'svc2']) { + const inventory = Array.from({ length: 20 }, (_, i) => `mcp__${server}__t${i}`) + const turns = [makeTurn([ + makeCall({ tools: [`mcp__${server}__t0`], cacheCreation: 50_000 }), + ])] + sessions.push( + makeSession({ sessionId: `${server}-a`, inventory, turns, mcpBreakdown: { [server]: { calls: 1 } } }), + makeSession({ sessionId: `${server}-b`, inventory, turns, mcpBreakdown: { [server]: { calls: 1 } } }), + ) + } + const finding = detectMcpToolCoverage([project(sessions)]) + expect(finding).not.toBeNull() + expect(finding!.title).toContain('2 MCP servers') + expect((finding!.fix as { text: string }).text.split('\n')).toHaveLength(2) + }) +}) diff --git a/tests/parser-mcp-inventory.test.ts b/tests/parser-mcp-inventory.test.ts new file mode 100644 index 00000000..cbbe34ce --- /dev/null +++ b/tests/parser-mcp-inventory.test.ts @@ -0,0 +1,126 @@ +import { describe, it, expect } from 'vitest' + +import { extractMcpInventory } from '../src/parser.js' +import type { JournalEntry } from '../src/types.js' + +function entry(overrides: Partial & Record): JournalEntry { + return { type: 'attachment', ...overrides } as JournalEntry +} + +describe('extractMcpInventory', () => { + it('returns empty array when no entries have an attachment', () => { + expect(extractMcpInventory([entry({ type: 'user' })])).toEqual([]) + }) + + it('returns empty array when no deferred_tools_delta is present', () => { + expect(extractMcpInventory([ + entry({ attachment: { type: 'something_else', addedNames: ['mcp__a__b'] } }), + ])).toEqual([]) + }) + + it('extracts mcp__server__tool names from a single delta', () => { + const result = extractMcpInventory([ + entry({ + attachment: { + type: 'deferred_tools_delta', + addedNames: ['Bash', 'Edit', 'mcp__hf__hub_repo_search', 'mcp__hf__paper_search'], + }, + }), + ]) + expect(result).toEqual(['mcp__hf__hub_repo_search', 'mcp__hf__paper_search']) + }) + + it('filters out built-in tools (no mcp__ prefix)', () => { + const result = extractMcpInventory([ + entry({ + attachment: { + type: 'deferred_tools_delta', + addedNames: ['Bash', 'Edit', 'WebFetch', 'mcp__svc__t1'], + }, + }), + ]) + expect(result).toEqual(['mcp__svc__t1']) + }) + + it('rejects malformed names: empty server segment', () => { + const result = extractMcpInventory([ + entry({ + attachment: { + type: 'deferred_tools_delta', + addedNames: ['mcp____tool', 'mcp__svc__t1'], + }, + }), + ]) + expect(result).toEqual(['mcp__svc__t1']) + }) + + it('rejects malformed names: missing tool segment (no second `__`)', () => { + const result = extractMcpInventory([ + entry({ + attachment: { + type: 'deferred_tools_delta', + addedNames: ['mcp__server', 'mcp__svc__t1'], + }, + }), + ]) + expect(result).toEqual(['mcp__svc__t1']) + }) + + it('rejects malformed names: empty tool segment (trailing `__`)', () => { + const result = extractMcpInventory([ + entry({ + attachment: { + type: 'deferred_tools_delta', + addedNames: ['mcp__server__', 'mcp__svc__t1'], + }, + }), + ]) + expect(result).toEqual(['mcp__svc__t1']) + }) + + it('unions across multiple delta entries (incremental adds)', () => { + const result = extractMcpInventory([ + entry({ attachment: { type: 'deferred_tools_delta', addedNames: ['mcp__a__t1'] } }), + entry({ attachment: { type: 'deferred_tools_delta', addedNames: ['mcp__a__t2', 'mcp__b__t1'] } }), + ]) + expect(result).toEqual(['mcp__a__t1', 'mcp__a__t2', 'mcp__b__t1']) + }) + + it('deduplicates names seen in multiple deltas', () => { + const result = extractMcpInventory([ + entry({ attachment: { type: 'deferred_tools_delta', addedNames: ['mcp__a__t1', 'mcp__a__t1'] } }), + entry({ attachment: { type: 'deferred_tools_delta', addedNames: ['mcp__a__t1'] } }), + ]) + expect(result).toEqual(['mcp__a__t1']) + }) + + it('tolerates missing or non-string addedNames', () => { + const result = extractMcpInventory([ + entry({ attachment: { type: 'deferred_tools_delta' } }), + entry({ attachment: { type: 'deferred_tools_delta', addedNames: 'not-an-array' } }), + entry({ attachment: { type: 'deferred_tools_delta', addedNames: [42, null, 'mcp__svc__t1', undefined] } }), + ]) + expect(result).toEqual(['mcp__svc__t1']) + }) + + it('tolerates malformed attachment object', () => { + const result = extractMcpInventory([ + entry({ attachment: null }), + entry({ attachment: 'string-not-object' }), + entry({ attachment: { type: 'deferred_tools_delta', addedNames: ['mcp__svc__t1'] } }), + ]) + expect(result).toEqual(['mcp__svc__t1']) + }) + + it('returns names in sorted order', () => { + const result = extractMcpInventory([ + entry({ + attachment: { + type: 'deferred_tools_delta', + addedNames: ['mcp__zzz__a', 'mcp__aaa__z', 'mcp__mmm__m'], + }, + }), + ]) + expect(result).toEqual(['mcp__aaa__z', 'mcp__mmm__m', 'mcp__zzz__a']) + }) +}) From e46b20b9272537f1df42842a49a8a4148c7a7368 Mon Sep 17 00:00:00 2001 From: ozymandiashh <234437643+ozymandiashh@users.noreply.github.com> Date: Tue, 5 May 2026 05:11:00 +0300 Subject: [PATCH 2/3] fix(optimize): reuse mcp coverage and type schema estimator --- src/optimize.ts | 48 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/src/optimize.ts b/src/optimize.ts index 7882660e..04d95c55 100644 --- a/src/optimize.ts +++ b/src/optimize.ts @@ -507,6 +507,12 @@ export type McpServerCoverage = { coverageRatio: number } +type McpSchemaCostEstimate = { + cacheWriteTokens: number + cacheReadTokens: number + effectiveInputTokens: number +} + /** * Aggregate MCP inventory and invocations across the projects in scope. * @@ -651,16 +657,36 @@ export function aggregateMcpCoverage(projects: ProjectSummary[]): McpServerCover * does not count, matching the invariant `aggregateMcpCoverage` uses for * `loadedSessions`. */ +export function estimateMcpSchemaCost( + unusedToolCount: number, + projects: ProjectSummary[], + server: string, +): McpSchemaCostEstimate +export function estimateMcpSchemaCost( + unusedToolCountsByServer: Record, + projects: ProjectSummary[], + servers: string[], +): McpSchemaCostEstimate export function estimateMcpSchemaCost( unusedToolCounts: Record | number, projects: ProjectSummary[], serverOrServers: string | string[], -): { cacheWriteTokens: number; cacheReadTokens: number; effectiveInputTokens: number } { - // Backward-compatible single-server signature used by tests. - const servers = Array.isArray(serverOrServers) ? serverOrServers : [serverOrServers] - const counts: Record = typeof unusedToolCounts === 'number' - ? { [serverOrServers as string]: unusedToolCounts } - : unusedToolCounts +): McpSchemaCostEstimate { + let servers: string[] + let counts: Record + if (typeof unusedToolCounts === 'number') { + if (typeof serverOrServers !== 'string') { + throw new TypeError('single-server MCP cost estimates require a string server name') + } + servers = [serverOrServers] + counts = { [serverOrServers]: unusedToolCounts } + } else { + if (!Array.isArray(serverOrServers)) { + throw new TypeError('multi-server MCP cost estimates require a string[] server list') + } + servers = serverOrServers + counts = unusedToolCounts + } const totalUnusedSchemaTokens = servers.reduce( (s, srv) => s + (counts[srv] ?? 0) * TOKENS_PER_MCP_TOOL, @@ -723,8 +749,8 @@ export function estimateMcpSchemaCost( */ export function detectMcpToolCoverage( projects: ProjectSummary[], + coverage = aggregateMcpCoverage(projects), ): WasteFinding | null { - const coverage = aggregateMcpCoverage(projects) if (coverage.length === 0) return null const flagged = coverage.filter(c => @@ -785,6 +811,7 @@ export function detectUnusedMcp( calls: ToolCall[], projects: ProjectSummary[], projectCwds: Set, + mcpCoverage = aggregateMcpCoverage(projects), ): WasteFinding | null { const configured = loadMcpConfigs(projectCwds) if (configured.size === 0) return null @@ -808,7 +835,7 @@ export function detectUnusedMcp( // thresholds — a small, inventoried-but-uninvoked server that the // coverage detector skips would otherwise become a blind spot. const coverageReportedServers = new Set( - aggregateMcpCoverage(projects) + mcpCoverage .filter(c => c.toolsAvailable > MCP_COVERAGE_MIN_TOOLS && c.loadedSessions >= MCP_COVERAGE_MIN_SESSIONS @@ -1286,6 +1313,7 @@ export async function scanAndDetect( const costRate = computeInputCostRate(projects) const { toolCalls, projectCwds, apiCalls, userMessages } = await scanSessions(dateRange) + const mcpCoverage = aggregateMcpCoverage(projects) const findings: WasteFinding[] = [] const syncDetectors: Array<() => WasteFinding | null> = [ @@ -1293,8 +1321,8 @@ export async function scanAndDetect( () => detectLowReadEditRatio(toolCalls), () => detectJunkReads(toolCalls, dateRange), () => detectDuplicateReads(toolCalls, dateRange), - () => detectUnusedMcp(toolCalls, projects, projectCwds), - () => detectMcpToolCoverage(projects), + () => detectUnusedMcp(toolCalls, projects, projectCwds, mcpCoverage), + () => detectMcpToolCoverage(projects, mcpCoverage), () => detectBloatedClaudeMd(projectCwds), () => detectBashBloat(), ] From 735f41bc6c89916e417e719e3fe58ec5386fbe95 Mon Sep 17 00:00:00 2001 From: iamtoruk Date: Mon, 4 May 2026 20:11:50 -0700 Subject: [PATCH 3/3] Fix cache-write pricing and shell-quote server names in fix commands - Use 1.25x multiplier for cache-write tokens to match Anthropic's actual pricing (was incorrectly using 1x) - Shell-quote server names in `claude mcp remove` fix text to prevent issues with unusual server names --- src/optimize.ts | 13 +++++++------ tests/mcp-coverage.test.ts | 6 +++--- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/optimize.ts b/src/optimize.ts index 04d95c55..1f2a4cfb 100644 --- a/src/optimize.ts +++ b/src/optimize.ts @@ -60,10 +60,11 @@ const MCP_COVERAGE_MIN_TOOLS = 10 const MCP_COVERAGE_MIN_SESSIONS = 2 const MCP_COVERAGE_LOW_THRESHOLD = 0.20 const MCP_COVERAGE_HIGH_IMPACT_TOKENS = 200_000 -// Anthropic prices cached input reads at roughly 10% of fresh input. We use -// this to keep "ongoing" overhead estimates honest: most MCP schema bytes -// live in the cached prefix and only get charged at the discount rate after -// the first turn of a session. +// Anthropic prices cache writes at 125% of base input and cache reads at +// roughly 10% of base input. We use these to keep overhead estimates honest: +// most MCP schema bytes live in the cached prefix and only get charged at +// the discount rate after the first turn of a session. +const CACHE_WRITE_MULTIPLIER = 1.25 const CACHE_READ_DISCOUNT = 0.10 const GHOST_AGENTS_HIGH_THRESHOLD = 5 const GHOST_AGENTS_MEDIUM_THRESHOLD = 2 @@ -729,7 +730,7 @@ export function estimateMcpSchemaCost( } } - const effectiveInputTokens = cacheWriteTokens + cacheReadTokens * CACHE_READ_DISCOUNT + const effectiveInputTokens = cacheWriteTokens * CACHE_WRITE_MULTIPLIER + cacheReadTokens * CACHE_READ_DISCOUNT return { cacheWriteTokens, cacheReadTokens, effectiveInputTokens } } @@ -774,7 +775,7 @@ export function detectMcpToolCoverage( lines.push( `${c.server}: ${c.toolsInvoked}/${c.toolsAvailable} tools used (${pct}% coverage) across ${c.loadedSessions} session${c.loadedSessions === 1 ? '' : 's'}`, ) - removeCommands.push(`claude mcp remove ${c.server}`) + removeCommands.push(`claude mcp remove '${c.server}'`) } // Single combined cost pass: caps each call's contribution at the diff --git a/tests/mcp-coverage.test.ts b/tests/mcp-coverage.test.ts index 1d078d2d..c2a45950 100644 --- a/tests/mcp-coverage.test.ts +++ b/tests/mcp-coverage.test.ts @@ -200,8 +200,8 @@ describe('estimateMcpSchemaCost', () => { const cost = estimateMcpSchemaCost(30, [project(sessions)], 'svc') expect(cost.cacheWriteTokens).toBe(12_000) // capped by 50k creation, 12k schema fits expect(cost.cacheReadTokens).toBe(24_000) // 12k + 12k across two ongoing turns - // effective = write + read * 0.10 (cache discount) - expect(cost.effectiveInputTokens).toBeCloseTo(12_000 + 24_000 * 0.10, 5) + // effective = write * 1.25 + read * 0.10 (cache pricing) + expect(cost.effectiveInputTokens).toBeCloseTo(12_000 * 1.25 + 24_000 * 0.10, 5) }) it('caps by available cache bucket so we never overclaim', () => { @@ -373,7 +373,7 @@ describe('detectMcpToolCoverage', () => { expect(finding!.explanation).toContain('hf') expect(finding!.explanation).toContain('1/30') expect(finding!.fix.type).toBe('command') - expect((finding!.fix as { text: string }).text).toContain('claude mcp remove hf') + expect((finding!.fix as { text: string }).text).toContain("claude mcp remove 'hf'") expect(finding!.tokensSaved).toBeGreaterThan(0) })