From b00d01812c1ff0761b3f8d8b40117d027d6546cc Mon Sep 17 00:00:00 2001
From: Hari Patel <patelhariv18@gmail.com>
Date: Sat, 7 Mar 2026 20:40:33 +0530
Subject: [PATCH 1/3] fix: save image to temp file and include path in prompt
 for Claude CLI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Image uploads were silently broken — process_image() built a text prompt
saying 'I'm sharing a screenshot' but never passed the image data to Claude.
The Claude CLI SDK only accepts text prompts, so the image was unreachable.

Fix: download the image bytes to /tmp/claude_bot_files/image_<uuid>.<ext>
and embed the file path in the prompt so Claude CLI can read the image
directly from disk.

All four prompt builders (_create_screenshot_prompt, _create_diagram_prompt,
_create_ui_prompt, _create_generic_prompt) updated to accept and include
the image_path parameter.

Closes #137
---
 src/bot/features/image_handler.py | 95 +++++++++++++------------------
 1 file changed, 39 insertions(+), 56 deletions(-)
diff --git a/src/bot/features/image_handler.py b/src/bot/features/image_handler.py
index f9cf3798..2f8ddc6e 100644
--- a/src/bot/features/image_handler.py
+++ b/src/bot/features/image_handler.py
@@ -10,6 +10,7 @@
 
 import base64
 from dataclasses import dataclass
+from pathlib import Path
 from typing import Dict, Optional
 
 from telegram import PhotoSize
@@ -38,36 +39,44 @@ def __init__(self, config: Settings):
     async def process_image(
         self, photo: PhotoSize, caption: Optional[str] = None
     ) -> ProcessedImage:
-        """Process uploaded image"""
+        """Process uploaded image — save to temp file and build a path-based prompt."""
+        import uuid
 
-        # Download image
+        # Download image bytes
         file = await photo.get_file()
         image_bytes = await file.download_as_bytearray()
 
-        # Detect image type
-        image_type = self._detect_image_type(image_bytes)
+        # Detect format and save to temp file so Claude CLI can read it
+        fmt = self._detect_format(bytes(image_bytes))
+        ext = fmt if fmt != "unknown" else "jpg"
+        temp_dir = Path("/tmp/claude_bot_files")
+        temp_dir.mkdir(exist_ok=True)
+        image_path = temp_dir / f"image_{uuid.uuid4()}.{ext}"
+        image_path.write_bytes(bytes(image_bytes))
 
-        # Create appropriate prompt
+        # Detect image type for prompt tailoring
+        image_type = self._detect_image_type(bytes(image_bytes))
+
+        # Build prompt with actual file path so Claude CLI can see the image
         if image_type == "screenshot":
-            prompt = self._create_screenshot_prompt(caption)
+            prompt = self._create_screenshot_prompt(caption, image_path)
         elif image_type == "diagram":
-            prompt = self._create_diagram_prompt(caption)
+            prompt = self._create_diagram_prompt(caption, image_path)
         elif image_type == "ui_mockup":
-            prompt = self._create_ui_prompt(caption)
+            prompt = self._create_ui_prompt(caption, image_path)
         else:
-            prompt = self._create_generic_prompt(caption)
+            prompt = self._create_generic_prompt(caption, image_path)
 
-        # Convert to base64 for Claude (if supported in future)
         base64_image = base64.b64encode(image_bytes).decode("utf-8")
-
         return ProcessedImage(
             prompt=prompt,
             image_type=image_type,
             base64_data=base64_image,
             size=len(image_bytes),
             metadata={
-                "format": self._detect_format(image_bytes),
+                "format": fmt,
                 "has_caption": caption is not None,
+                "temp_path": str(image_path),
             },
         )
 
@@ -93,61 +102,35 @@ def _detect_format(self, image_bytes: bytes) -> str:
         else:
             return "unknown"
 
-    def _create_screenshot_prompt(self, caption: Optional[str]) -> str:
+    def _create_screenshot_prompt(
+        self, caption: Optional[str], image_path: Path
+    ) -> str:
         """Create prompt for screenshot analysis"""
-        base_prompt = """I'm sharing a screenshot with you. Please analyze it and help me with:
-
-1. Identifying what application or website this is from
-2. Understanding the UI elements and their purpose
-3. Any issues or improvements you notice
-4. Answering any specific questions I have
-
-"""
+        base = f"I'm sharing a screenshot with you. The image is saved at: {image_path}\n\nPlease analyze it and help me with:\n1. Identifying what application or website this is from\n2. Understanding the UI elements and their purpose\n3. Any issues or improvements you notice\n4. Answering any specific questions I have\n"
         if caption:
-            base_prompt += f"Specific request: {caption}"
+            base += f"\nSpecific request: {caption}"
+        return base
 
-        return base_prompt
-
-    def _create_diagram_prompt(self, caption: Optional[str]) -> str:
+    def _create_diagram_prompt(self, caption: Optional[str], image_path: Path) -> str:
         """Create prompt for diagram analysis"""
-        base_prompt = """I'm sharing a diagram with you. Please help me:
-
-1. Understand the components and their relationships
-2. Identify the type of diagram (flowchart, architecture, etc.)
-3. Explain any technical concepts shown
-4. Suggest improvements or clarifications
-
-"""
+        base = f"I'm sharing a diagram with you. The image is saved at: {image_path}\n\nPlease help me:\n1. Understand the components and their relationships\n2. Identify the type of diagram\n3. Explain any technical concepts shown\n4. Suggest improvements or clarifications\n"
         if caption:
-            base_prompt += f"Specific request: {caption}"
+            base += f"\nSpecific request: {caption}"
+        return base
 
-        return base_prompt
-
-    def _create_ui_prompt(self, caption: Optional[str]) -> str:
+    def _create_ui_prompt(self, caption: Optional[str], image_path: Path) -> str:
         """Create prompt for UI mockup analysis"""
-        base_prompt = """I'm sharing a UI mockup with you. Please analyze:
-
-1. The layout and visual hierarchy
-2. User experience considerations
-3. Accessibility aspects
-4. Implementation suggestions
-5. Any potential improvements
-
-"""
+        base = f"I'm sharing a UI mockup with you. The image is saved at: {image_path}\n\nPlease analyze:\n1. The layout and visual hierarchy\n2. UX improvements\n3. Accessibility concerns\n"
         if caption:
-            base_prompt += f"Specific request: {caption}"
+            base += f"\nSpecific request: {caption}"
+        return base
 
-        return base_prompt
-
-    def _create_generic_prompt(self, caption: Optional[str]) -> str:
+    def _create_generic_prompt(self, caption: Optional[str], image_path: Path) -> str:
         """Create generic image analysis prompt"""
-        base_prompt = """I'm sharing an image with you. Please analyze it and provide relevant insights.
-
-"""
+        base = f"I'm sharing an image with you. The image is saved at: {image_path}\n\nPlease analyze and describe what you see.\n"
         if caption:
-            base_prompt += f"Context: {caption}"
-
-        return base_prompt
+            base += f"\nSpecific request: {caption}"
+        return base
 
     def supports_format(self, filename: str) -> bool:
         """Check if image format is supported"""

From 62f27f7ed3a4d19c0a925154f6dfecb5a2baed84 Mon Sep 17 00:00:00 2001
From: Hari Patel <patelhariv18@gmail.com>
Date: Sun, 8 Mar 2026 10:17:33 +0530
Subject: [PATCH 2/3] fix: address review feedback on image upload handler

- Move uuid import to module scope (CLAUDE.md style requirement)
- Validate image format before writing to disk; raise ValueError for unknown formats
- Use restrictive permissions: dir 0o700, file 0o600
- Assign image_bytes once, remove redundant bytes() conversions
- Explicitly instruct Claude to use Read tool for image path in all prompts
- Add finally block in agentic_photo to delete temp file after run_command
- Retain base64_data field with comment clarifying it is for future SDK support
---
 src/bot/features/image_handler.py | 41 +++++++++++++++++--------------
 src/bot/orchestrator.py           |  6 +++++
 2 files changed, 29 insertions(+), 18 deletions(-)

diff --git a/src/bot/features/image_handler.py b/src/bot/features/image_handler.py
index 2f8ddc6e..94672444 100644
--- a/src/bot/features/image_handler.py
+++ b/src/bot/features/image_handler.py
@@ -9,7 +9,8 @@
 """
 
 import base64
-from dataclasses import dataclass
+import uuid
+from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Dict, Optional
 
@@ -17,6 +18,8 @@
 
 from src.config import Settings
 
+_TEMP_DIR = Path("/tmp/claude_bot_files")
+
 
 @dataclass
 class ProcessedImage:
@@ -24,9 +27,9 @@ class ProcessedImage:
 
     prompt: str
     image_type: str
-    base64_data: str
     size: int
-    metadata: Dict[str, any] = None
+    metadata: Dict[str, object] = field(default_factory=dict)
+    base64_data: str = ""
 
 
 class ImageHandler:
@@ -40,22 +43,23 @@ async def process_image(
         self, photo: PhotoSize, caption: Optional[str] = None
     ) -> ProcessedImage:
         """Process uploaded image — save to temp file and build a path-based prompt."""
-        import uuid
-
         # Download image bytes
         file = await photo.get_file()
-        image_bytes = await file.download_as_bytearray()
+        image_bytes = bytes(await file.download_as_bytearray())
 
         # Detect format and save to temp file so Claude CLI can read it
-        fmt = self._detect_format(bytes(image_bytes))
-        ext = fmt if fmt != "unknown" else "jpg"
-        temp_dir = Path("/tmp/claude_bot_files")
-        temp_dir.mkdir(exist_ok=True)
-        image_path = temp_dir / f"image_{uuid.uuid4()}.{ext}"
-        image_path.write_bytes(bytes(image_bytes))
+        fmt = self._detect_format(image_bytes)
+        if fmt == "unknown":
+            raise ValueError(
+                "Unsupported image format. Please upload a PNG, JPEG, GIF, or WebP image."
+            )
+        _TEMP_DIR.mkdir(mode=0o700, exist_ok=True)
+        image_path = _TEMP_DIR / f"image_{uuid.uuid4()}.{fmt}"
+        image_path.write_bytes(image_bytes)
+        image_path.chmod(0o600)
 
         # Detect image type for prompt tailoring
-        image_type = self._detect_image_type(bytes(image_bytes))
+        image_type = self._detect_image_type(image_bytes)
 
         # Build prompt with actual file path so Claude CLI can see the image
         if image_type == "screenshot":
@@ -67,17 +71,18 @@ async def process_image(
         else:
             prompt = self._create_generic_prompt(caption, image_path)
 
+        # Retained for future multimodal SDK support — not currently used
         base64_image = base64.b64encode(image_bytes).decode("utf-8")
         return ProcessedImage(
             prompt=prompt,
             image_type=image_type,
-            base64_data=base64_image,
             size=len(image_bytes),
             metadata={
                 "format": fmt,
                 "has_caption": caption is not None,
                 "temp_path": str(image_path),
             },
+            base64_data=base64_image,
         )
 
     def _detect_image_type(self, image_bytes: bytes) -> str:
@@ -106,28 +111,28 @@ def _create_screenshot_prompt(
         self, caption: Optional[str], image_path: Path
     ) -> str:
         """Create prompt for screenshot analysis"""
-        base = f"I'm sharing a screenshot with you. The image is saved at: {image_path}\n\nPlease analyze it and help me with:\n1. Identifying what application or website this is from\n2. Understanding the UI elements and their purpose\n3. Any issues or improvements you notice\n4. Answering any specific questions I have\n"
+        base = f"I'm sharing a screenshot with you. Please read the image file at this path using your Read tool: {image_path}\n\nPlease analyze it and help me with:\n1. Identifying what application or website this is from\n2. Understanding the UI elements and their purpose\n3. Any issues or improvements you notice\n4. Answering any specific questions I have\n"
         if caption:
             base += f"\nSpecific request: {caption}"
         return base
 
     def _create_diagram_prompt(self, caption: Optional[str], image_path: Path) -> str:
         """Create prompt for diagram analysis"""
-        base = f"I'm sharing a diagram with you. The image is saved at: {image_path}\n\nPlease help me:\n1. Understand the components and their relationships\n2. Identify the type of diagram\n3. Explain any technical concepts shown\n4. Suggest improvements or clarifications\n"
+        base = f"I'm sharing a diagram with you. Please read the image file at this path using your Read tool: {image_path}\n\nPlease help me:\n1. Understand the components and their relationships\n2. Identify the type of diagram\n3. Explain any technical concepts shown\n4. Suggest improvements or clarifications\n"
         if caption:
             base += f"\nSpecific request: {caption}"
         return base
 
     def _create_ui_prompt(self, caption: Optional[str], image_path: Path) -> str:
         """Create prompt for UI mockup analysis"""
-        base = f"I'm sharing a UI mockup with you. The image is saved at: {image_path}\n\nPlease analyze:\n1. The layout and visual hierarchy\n2. UX improvements\n3. Accessibility concerns\n"
+        base = f"I'm sharing a UI mockup with you. Please read the image file at this path using your Read tool: {image_path}\n\nPlease analyze:\n1. The layout and visual hierarchy\n2. UX improvements\n3. Accessibility concerns\n"
         if caption:
             base += f"\nSpecific request: {caption}"
         return base
 
     def _create_generic_prompt(self, caption: Optional[str], image_path: Path) -> str:
         """Create generic image analysis prompt"""
-        base = f"I'm sharing an image with you. The image is saved at: {image_path}\n\nPlease analyze and describe what you see.\n"
+        base = f"I'm sharing an image with you. Please read the image file at this path using your Read tool: {image_path}\n\nPlease analyze and describe what you see.\n"
         if caption:
             base += f"\nSpecific request: {caption}"
         return base
diff --git a/src/bot/orchestrator.py b/src/bot/orchestrator.py
index ac1d5304..9ff0b81c 100644
--- a/src/bot/orchestrator.py
+++ b/src/bot/orchestrator.py
@@ -1276,6 +1276,7 @@ async def agentic_photo(
         await chat.send_action("typing")
         progress_msg = await update.message.reply_text("Working...")
 
+        processed_image = None
         try:
             photo = update.message.photo[-1]
             processed_image = await image_handler.process_image(
@@ -1297,6 +1298,11 @@ async def agentic_photo(
             logger.error(
                 "Claude photo processing failed", error=str(e), user_id=user_id
             )
+        finally:
+            if processed_image is not None:
+                temp_path = processed_image.metadata.get("temp_path")
+                if temp_path:
+                    Path(temp_path).unlink(missing_ok=True)
 
     async def agentic_voice(
         self, update: Update, context: ContextTypes.DEFAULT_TYPE

From 48612f295fa070b48247a09cbf55f5757197021e Mon Sep 17 00:00:00 2001
From: Hari Patel <patelhariv18@gmail.com>
Date: Sun, 8 Mar 2026 10:30:09 +0530
Subject: [PATCH 3/3] fix: add partial-write cleanup and type hints on prompt
 builders

- Wrap write_bytes + chmod in try/except to unlink partial file on failure
- Add type hints to all four _create_*_prompt methods (CLAUDE.md compliance)
---
 src/bot/features/image_handler.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/bot/features/image_handler.py b/src/bot/features/image_handler.py
index 94672444..5b6397b9 100644
--- a/src/bot/features/image_handler.py
+++ b/src/bot/features/image_handler.py
@@ -55,8 +55,12 @@ async def process_image(
             )
         _TEMP_DIR.mkdir(mode=0o700, exist_ok=True)
         image_path = _TEMP_DIR / f"image_{uuid.uuid4()}.{fmt}"
-        image_path.write_bytes(image_bytes)
-        image_path.chmod(0o600)
+        try:
+            image_path.write_bytes(image_bytes)
+            image_path.chmod(0o600)
+        except Exception:
+            image_path.unlink(missing_ok=True)
+            raise
 
         # Detect image type for prompt tailoring
         image_type = self._detect_image_type(image_bytes)