diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index c99110a4b3c1..51cb5f40da61 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -2812,7 +2812,8 @@ export async function runEmbeddedAttempt( try { // Idempotent cleanup for legacy sessions with persisted image payloads. - // Called each run; only mutates already-answered user turns that still carry image blocks. + // Only mutates user turns older than a few assistant replies so recent + // history stays byte-identical for prompt-cache prefix matching. const didPruneImages = pruneProcessedHistoryImages(activeSession.messages); if (didPruneImages) { activeSession.agent.replaceMessages(activeSession.messages); diff --git a/src/agents/pi-embedded-runner/run/history-image-prune.test.ts b/src/agents/pi-embedded-runner/run/history-image-prune.test.ts index 03e532eda2e4..f5045d7f8b7c 100644 --- a/src/agents/pi-embedded-runner/run/history-image-prune.test.ts +++ b/src/agents/pi-embedded-runner/run/history-image-prune.test.ts @@ -28,23 +28,44 @@ function expectPrunedImageMessage( describe("pruneProcessedHistoryImages", () => { const image: ImageContent = { type: "image", data: "abc", mimeType: "image/png" }; + const assistantTurn = () => castAgentMessage({ role: "assistant", content: "ack" }); + const userText = () => castAgentMessage({ role: "user", content: "more" }); - it("prunes image blocks from user messages that already have assistant replies", () => { + it("prunes image blocks from user messages older than 3 assistant turns", () => { const messages: AgentMessage[] = [ castAgentMessage({ role: "user", content: [{ type: "text", text: "See /tmp/photo.png" }, { ...image }], }), - castAgentMessage({ - role: "assistant", - content: "got it", - }), + assistantTurn(), + userText(), + assistantTurn(), + userText(), + assistantTurn(), ]; const content = expectPrunedImageMessage(messages, "expected user array content"); expect(content[0]?.type).toBe("text"); }); + it("keeps image blocks within the last 3 assistant turns to preserve prompt cache", () => { + const messages: AgentMessage[] = [ + castAgentMessage({ + role: "user", + content: [{ type: "text", text: "See /tmp/photo.png" }, { ...image }], + }), + assistantTurn(), + userText(), + assistantTurn(), + ]; + + const didMutate = pruneProcessedHistoryImages(messages); + + expect(didMutate).toBe(false); + const content = expectArrayMessageContent(messages[0], "expected user array content"); + expect(content[1]).toMatchObject({ type: "image", data: "abc" }); + }); + it("does not prune latest user message when no assistant response exists yet", () => { const messages: AgentMessage[] = [ castAgentMessage({ @@ -61,20 +82,47 @@ describe("pruneProcessedHistoryImages", () => { expect(content[1]).toMatchObject({ type: "image", data: "abc" }); }); - it("prunes image blocks from toolResult messages that already have assistant replies", () => { + it("prunes image blocks from toolResult messages older than 3 assistant turns", () => { const messages: AgentMessage[] = [ castAgentMessage({ role: "toolResult", toolName: "read", content: [{ type: "text", text: "screenshot bytes" }, { ...image }], }), + assistantTurn(), + userText(), + assistantTurn(), + userText(), + assistantTurn(), + ]; + + expectPrunedImageMessage(messages, "expected toolResult array content"); + }); + + it("prunes only old images while preserving recent ones", () => { + const messages: AgentMessage[] = [ castAgentMessage({ - role: "assistant", - content: "ack", + role: "user", + content: [{ type: "text", text: "old" }, { ...image }], }), + assistantTurn(), + userText(), + assistantTurn(), + castAgentMessage({ + role: "user", + content: [{ type: "text", text: "recent" }, { ...image }], + }), + assistantTurn(), ]; - expectPrunedImageMessage(messages, "expected toolResult array content"); + const didMutate = pruneProcessedHistoryImages(messages); + expect(didMutate).toBe(true); + + const oldContent = expectArrayMessageContent(messages[0], "expected old user content"); + expect(oldContent[1]).toMatchObject({ type: "text", text: PRUNED_HISTORY_IMAGE_MARKER }); + + const recentContent = expectArrayMessageContent(messages[4], "expected recent user content"); + expect(recentContent[1]).toMatchObject({ type: "image", data: "abc" }); }); it("does not change messages when no assistant turn exists", () => { diff --git a/src/agents/pi-embedded-runner/run/history-image-prune.ts b/src/agents/pi-embedded-runner/run/history-image-prune.ts index 4e92bb08f01a..0138cc1ccdc6 100644 --- a/src/agents/pi-embedded-runner/run/history-image-prune.ts +++ b/src/agents/pi-embedded-runner/run/history-image-prune.ts @@ -2,24 +2,37 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core"; export const PRUNED_HISTORY_IMAGE_MARKER = "[image data removed - already processed by model]"; +/** + * Number of most-recent assistant turns whose preceding user/toolResult image blocks are + * kept intact. Pruning these would diverge the request bytes from what the provider + * cached on the previous turn, invalidating the prompt-cache prefix. + */ +const PRESERVE_RECENT_ASSISTANT_TURNS = 3; + /** * Idempotent cleanup for legacy sessions that persisted image blocks in history. - * Called each run; mutates only user turns that already have an assistant reply. + * Called each run; mutates only user turns that are older than + * {@link PRESERVE_RECENT_ASSISTANT_TURNS} assistant replies so recent turns remain + * byte-identical for prompt caching. */ export function pruneProcessedHistoryImages(messages: AgentMessage[]): boolean { - let lastAssistantIndex = -1; + let assistantSeen = 0; + let pruneBeforeIndex = -1; for (let i = messages.length - 1; i >= 0; i--) { if (messages[i]?.role === "assistant") { - lastAssistantIndex = i; - break; + assistantSeen++; + if (assistantSeen >= PRESERVE_RECENT_ASSISTANT_TURNS) { + pruneBeforeIndex = i; + break; + } } } - if (lastAssistantIndex < 0) { + if (pruneBeforeIndex < 0) { return false; } let didMutate = false; - for (let i = 0; i < lastAssistantIndex; i++) { + for (let i = 0; i < pruneBeforeIndex; i++) { const message = messages[i]; if ( !message ||