From b4d458ab882aa46a8bded4206c18f54cbfc37354 Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Fri, 14 Nov 2025 00:36:50 +0000
Subject: [PATCH] Add tool result attachments to assistant content

Co-authored-by: jck411 <jck411@gmail.com>
---
 src/backend/chat/streaming/handler.py | 62 +++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/src/backend/chat/streaming/handler.py b/src/backend/chat/streaming/handler.py
index 9c1283d..d26d558 100644
--- a/src/backend/chat/streaming/handler.py
+++ b/src/backend/chat/streaming/handler.py
@@ -230,6 +230,7 @@ async def stream_conversation(
             generation_id: str | None = None
             reasoning_segments: list[dict[str, Any]] = []
             seen_reasoning: set[tuple[str, str]] = set()
+            tool_result_attachment_ids: list[str] = []  # Track attachments from tool results
             try:
                 async for event in self._client.stream_chat_raw(payload):
                     data = event.get("data")
@@ -522,6 +523,61 @@ async def stream_conversation(
             )
             new_attachment_ids = list(content_builder.created_attachment_ids)
 
+            # If assistant finished without tool calls and there are attachments from tool results,
+            # inject them into the assistant's content so they appear in the chat
+            if (
+                finish_reason == "stop"
+                and not tool_calls
+                and tool_result_attachment_ids
+                and assistant_content is not None
+            ):
+                # Check if content already includes these attachments
+                existing_attachment_ids: set[str] = set()
+                if isinstance(assistant_content, list):
+                    for fragment in assistant_content:
+                        if (
+                            isinstance(fragment, dict)
+                            and fragment.get("type") == "image_url"
+                        ):
+                            metadata = fragment.get("metadata")
+                            if isinstance(metadata, dict):
+                                att_id = metadata.get("attachment_id")
+                                if isinstance(att_id, str):
+                                    existing_attachment_ids.add(att_id)
+                
+                # Add missing attachments from tool results
+                missing_attachment_ids = [
+                    aid for aid in tool_result_attachment_ids
+                    if aid not in existing_attachment_ids
+                ]
+                
+                if missing_attachment_ids:
+                    # Convert assistant_content to list if it's a string
+                    if isinstance(assistant_content, str):
+                        content_parts: list[dict[str, Any]] = [
+                            {"type": "text", "text": assistant_content}
+                        ]
+                    else:
+                        content_parts = list(assistant_content) if isinstance(assistant_content, list) else []
+                    
+                    # Add image parts for missing attachments
+                    for attachment_id in missing_attachment_ids:
+                        content_parts.append(
+                            {
+                                "type": "image_url",
+                                "image_url": {
+                                    "url": ""
+                                },  # Will be filled by attachment_urls service
+                                "metadata": {"attachment_id": attachment_id},
+                            }
+                        )
+                    
+                    assistant_content = content_parts
+                    # Register these attachments as created by this assistant turn
+                    for attachment_id in missing_attachment_ids:
+                        if attachment_id not in new_attachment_ids:
+                            new_attachment_ids.append(attachment_id)
+
             assistant_turn = AssistantTurn(
                 content=assistant_content if assistant_content else None,
                 tool_calls=tool_calls,
@@ -600,6 +656,9 @@ async def stream_conversation(
             }
             routing_headers = None
 
+            # Reset tool result attachments for next turn
+            tool_result_attachment_ids = []
+            
             if not assistant_turn.tool_calls:
                 break
 
@@ -761,6 +820,9 @@ async def stream_conversation(
                 cleaned_text, attachment_ids = _parse_attachment_references(result_text)
 
                 if attachment_ids:
+                    # Track these attachment IDs for potential injection into assistant message
+                    tool_result_attachment_ids.extend(attachment_ids)
+                    
                     # Convert to multimodal content with image references
                     content_parts: list[dict[str, Any]] = []