From b00d01812c1ff0761b3f8d8b40117d027d6546cc Mon Sep 17 00:00:00 2001 From: Hari Patel Date: Sat, 7 Mar 2026 20:40:33 +0530 Subject: [PATCH 1/3] fix: save image to temp file and include path in prompt for Claude CLI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Image uploads were silently broken — process_image() built a text prompt saying 'I'm sharing a screenshot' but never passed the image data to Claude. The Claude CLI SDK only accepts text prompts, so the image was unreachable. Fix: download the image bytes to /tmp/claude_bot_files/image_. and embed the file path in the prompt so Claude CLI can read the image directly from disk. All four prompt builders (_create_screenshot_prompt, _create_diagram_prompt, _create_ui_prompt, _create_generic_prompt) updated to accept and include the image_path parameter. Closes #137 --- src/bot/features/image_handler.py | 95 +++++++++++++------------------ 1 file changed, 39 insertions(+), 56 deletions(-) diff --git a/src/bot/features/image_handler.py b/src/bot/features/image_handler.py index f9cf3798..2f8ddc6e 100644 --- a/src/bot/features/image_handler.py +++ b/src/bot/features/image_handler.py @@ -10,6 +10,7 @@ import base64 from dataclasses import dataclass +from pathlib import Path from typing import Dict, Optional from telegram import PhotoSize @@ -38,36 +39,44 @@ def __init__(self, config: Settings): async def process_image( self, photo: PhotoSize, caption: Optional[str] = None ) -> ProcessedImage: - """Process uploaded image""" + """Process uploaded image — save to temp file and build a path-based prompt.""" + import uuid - # Download image + # Download image bytes file = await photo.get_file() image_bytes = await file.download_as_bytearray() - # Detect image type - image_type = self._detect_image_type(image_bytes) + # Detect format and save to temp file so Claude CLI can read it + fmt = self._detect_format(bytes(image_bytes)) + ext = fmt if fmt != "unknown" else "jpg" + temp_dir = Path("/tmp/claude_bot_files") + temp_dir.mkdir(exist_ok=True) + image_path = temp_dir / f"image_{uuid.uuid4()}.{ext}" + image_path.write_bytes(bytes(image_bytes)) - # Create appropriate prompt + # Detect image type for prompt tailoring + image_type = self._detect_image_type(bytes(image_bytes)) + + # Build prompt with actual file path so Claude CLI can see the image if image_type == "screenshot": - prompt = self._create_screenshot_prompt(caption) + prompt = self._create_screenshot_prompt(caption, image_path) elif image_type == "diagram": - prompt = self._create_diagram_prompt(caption) + prompt = self._create_diagram_prompt(caption, image_path) elif image_type == "ui_mockup": - prompt = self._create_ui_prompt(caption) + prompt = self._create_ui_prompt(caption, image_path) else: - prompt = self._create_generic_prompt(caption) + prompt = self._create_generic_prompt(caption, image_path) - # Convert to base64 for Claude (if supported in future) base64_image = base64.b64encode(image_bytes).decode("utf-8") - return ProcessedImage( prompt=prompt, image_type=image_type, base64_data=base64_image, size=len(image_bytes), metadata={ - "format": self._detect_format(image_bytes), + "format": fmt, "has_caption": caption is not None, + "temp_path": str(image_path), }, ) @@ -93,61 +102,35 @@ def _detect_format(self, image_bytes: bytes) -> str: else: return "unknown" - def _create_screenshot_prompt(self, caption: Optional[str]) -> str: + def _create_screenshot_prompt( + self, caption: Optional[str], image_path: Path + ) -> str: """Create prompt for screenshot analysis""" - base_prompt = """I'm sharing a screenshot with you. Please analyze it and help me with: - -1. Identifying what application or website this is from -2. Understanding the UI elements and their purpose -3. Any issues or improvements you notice -4. Answering any specific questions I have - -""" + base = f"I'm sharing a screenshot with you. The image is saved at: {image_path}\n\nPlease analyze it and help me with:\n1. Identifying what application or website this is from\n2. Understanding the UI elements and their purpose\n3. Any issues or improvements you notice\n4. Answering any specific questions I have\n" if caption: - base_prompt += f"Specific request: {caption}" + base += f"\nSpecific request: {caption}" + return base - return base_prompt - - def _create_diagram_prompt(self, caption: Optional[str]) -> str: + def _create_diagram_prompt(self, caption: Optional[str], image_path: Path) -> str: """Create prompt for diagram analysis""" - base_prompt = """I'm sharing a diagram with you. Please help me: - -1. Understand the components and their relationships -2. Identify the type of diagram (flowchart, architecture, etc.) -3. Explain any technical concepts shown -4. Suggest improvements or clarifications - -""" + base = f"I'm sharing a diagram with you. The image is saved at: {image_path}\n\nPlease help me:\n1. Understand the components and their relationships\n2. Identify the type of diagram\n3. Explain any technical concepts shown\n4. Suggest improvements or clarifications\n" if caption: - base_prompt += f"Specific request: {caption}" + base += f"\nSpecific request: {caption}" + return base - return base_prompt - - def _create_ui_prompt(self, caption: Optional[str]) -> str: + def _create_ui_prompt(self, caption: Optional[str], image_path: Path) -> str: """Create prompt for UI mockup analysis""" - base_prompt = """I'm sharing a UI mockup with you. Please analyze: - -1. The layout and visual hierarchy -2. User experience considerations -3. Accessibility aspects -4. Implementation suggestions -5. Any potential improvements - -""" + base = f"I'm sharing a UI mockup with you. The image is saved at: {image_path}\n\nPlease analyze:\n1. The layout and visual hierarchy\n2. UX improvements\n3. Accessibility concerns\n" if caption: - base_prompt += f"Specific request: {caption}" + base += f"\nSpecific request: {caption}" + return base - return base_prompt - - def _create_generic_prompt(self, caption: Optional[str]) -> str: + def _create_generic_prompt(self, caption: Optional[str], image_path: Path) -> str: """Create generic image analysis prompt""" - base_prompt = """I'm sharing an image with you. Please analyze it and provide relevant insights. - -""" + base = f"I'm sharing an image with you. The image is saved at: {image_path}\n\nPlease analyze and describe what you see.\n" if caption: - base_prompt += f"Context: {caption}" - - return base_prompt + base += f"\nSpecific request: {caption}" + return base def supports_format(self, filename: str) -> bool: """Check if image format is supported""" From 62f27f7ed3a4d19c0a925154f6dfecb5a2baed84 Mon Sep 17 00:00:00 2001 From: Hari Patel Date: Sun, 8 Mar 2026 10:17:33 +0530 Subject: [PATCH 2/3] fix: address review feedback on image upload handler - Move uuid import to module scope (CLAUDE.md style requirement) - Validate image format before writing to disk; raise ValueError for unknown formats - Use restrictive permissions: dir 0o700, file 0o600 - Assign image_bytes once, remove redundant bytes() conversions - Explicitly instruct Claude to use Read tool for image path in all prompts - Add finally block in agentic_photo to delete temp file after run_command - Retain base64_data field with comment clarifying it is for future SDK support --- src/bot/features/image_handler.py | 41 +++++++++++++++++-------------- src/bot/orchestrator.py | 6 +++++ 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/src/bot/features/image_handler.py b/src/bot/features/image_handler.py index 2f8ddc6e..94672444 100644 --- a/src/bot/features/image_handler.py +++ b/src/bot/features/image_handler.py @@ -9,7 +9,8 @@ """ import base64 -from dataclasses import dataclass +import uuid +from dataclasses import dataclass, field from pathlib import Path from typing import Dict, Optional @@ -17,6 +18,8 @@ from src.config import Settings +_TEMP_DIR = Path("/tmp/claude_bot_files") + @dataclass class ProcessedImage: @@ -24,9 +27,9 @@ class ProcessedImage: prompt: str image_type: str - base64_data: str size: int - metadata: Dict[str, any] = None + metadata: Dict[str, object] = field(default_factory=dict) + base64_data: str = "" class ImageHandler: @@ -40,22 +43,23 @@ async def process_image( self, photo: PhotoSize, caption: Optional[str] = None ) -> ProcessedImage: """Process uploaded image — save to temp file and build a path-based prompt.""" - import uuid - # Download image bytes file = await photo.get_file() - image_bytes = await file.download_as_bytearray() + image_bytes = bytes(await file.download_as_bytearray()) # Detect format and save to temp file so Claude CLI can read it - fmt = self._detect_format(bytes(image_bytes)) - ext = fmt if fmt != "unknown" else "jpg" - temp_dir = Path("/tmp/claude_bot_files") - temp_dir.mkdir(exist_ok=True) - image_path = temp_dir / f"image_{uuid.uuid4()}.{ext}" - image_path.write_bytes(bytes(image_bytes)) + fmt = self._detect_format(image_bytes) + if fmt == "unknown": + raise ValueError( + "Unsupported image format. Please upload a PNG, JPEG, GIF, or WebP image." + ) + _TEMP_DIR.mkdir(mode=0o700, exist_ok=True) + image_path = _TEMP_DIR / f"image_{uuid.uuid4()}.{fmt}" + image_path.write_bytes(image_bytes) + image_path.chmod(0o600) # Detect image type for prompt tailoring - image_type = self._detect_image_type(bytes(image_bytes)) + image_type = self._detect_image_type(image_bytes) # Build prompt with actual file path so Claude CLI can see the image if image_type == "screenshot": @@ -67,17 +71,18 @@ async def process_image( else: prompt = self._create_generic_prompt(caption, image_path) + # Retained for future multimodal SDK support — not currently used base64_image = base64.b64encode(image_bytes).decode("utf-8") return ProcessedImage( prompt=prompt, image_type=image_type, - base64_data=base64_image, size=len(image_bytes), metadata={ "format": fmt, "has_caption": caption is not None, "temp_path": str(image_path), }, + base64_data=base64_image, ) def _detect_image_type(self, image_bytes: bytes) -> str: @@ -106,28 +111,28 @@ def _create_screenshot_prompt( self, caption: Optional[str], image_path: Path ) -> str: """Create prompt for screenshot analysis""" - base = f"I'm sharing a screenshot with you. The image is saved at: {image_path}\n\nPlease analyze it and help me with:\n1. Identifying what application or website this is from\n2. Understanding the UI elements and their purpose\n3. Any issues or improvements you notice\n4. Answering any specific questions I have\n" + base = f"I'm sharing a screenshot with you. Please read the image file at this path using your Read tool: {image_path}\n\nPlease analyze it and help me with:\n1. Identifying what application or website this is from\n2. Understanding the UI elements and their purpose\n3. Any issues or improvements you notice\n4. Answering any specific questions I have\n" if caption: base += f"\nSpecific request: {caption}" return base def _create_diagram_prompt(self, caption: Optional[str], image_path: Path) -> str: """Create prompt for diagram analysis""" - base = f"I'm sharing a diagram with you. The image is saved at: {image_path}\n\nPlease help me:\n1. Understand the components and their relationships\n2. Identify the type of diagram\n3. Explain any technical concepts shown\n4. Suggest improvements or clarifications\n" + base = f"I'm sharing a diagram with you. Please read the image file at this path using your Read tool: {image_path}\n\nPlease help me:\n1. Understand the components and their relationships\n2. Identify the type of diagram\n3. Explain any technical concepts shown\n4. Suggest improvements or clarifications\n" if caption: base += f"\nSpecific request: {caption}" return base def _create_ui_prompt(self, caption: Optional[str], image_path: Path) -> str: """Create prompt for UI mockup analysis""" - base = f"I'm sharing a UI mockup with you. The image is saved at: {image_path}\n\nPlease analyze:\n1. The layout and visual hierarchy\n2. UX improvements\n3. Accessibility concerns\n" + base = f"I'm sharing a UI mockup with you. Please read the image file at this path using your Read tool: {image_path}\n\nPlease analyze:\n1. The layout and visual hierarchy\n2. UX improvements\n3. Accessibility concerns\n" if caption: base += f"\nSpecific request: {caption}" return base def _create_generic_prompt(self, caption: Optional[str], image_path: Path) -> str: """Create generic image analysis prompt""" - base = f"I'm sharing an image with you. The image is saved at: {image_path}\n\nPlease analyze and describe what you see.\n" + base = f"I'm sharing an image with you. Please read the image file at this path using your Read tool: {image_path}\n\nPlease analyze and describe what you see.\n" if caption: base += f"\nSpecific request: {caption}" return base diff --git a/src/bot/orchestrator.py b/src/bot/orchestrator.py index ac1d5304..9ff0b81c 100644 --- a/src/bot/orchestrator.py +++ b/src/bot/orchestrator.py @@ -1276,6 +1276,7 @@ async def agentic_photo( await chat.send_action("typing") progress_msg = await update.message.reply_text("Working...") + processed_image = None try: photo = update.message.photo[-1] processed_image = await image_handler.process_image( @@ -1297,6 +1298,11 @@ async def agentic_photo( logger.error( "Claude photo processing failed", error=str(e), user_id=user_id ) + finally: + if processed_image is not None: + temp_path = processed_image.metadata.get("temp_path") + if temp_path: + Path(temp_path).unlink(missing_ok=True) async def agentic_voice( self, update: Update, context: ContextTypes.DEFAULT_TYPE From 48612f295fa070b48247a09cbf55f5757197021e Mon Sep 17 00:00:00 2001 From: Hari Patel Date: Sun, 8 Mar 2026 10:30:09 +0530 Subject: [PATCH 3/3] fix: add partial-write cleanup and type hints on prompt builders - Wrap write_bytes + chmod in try/except to unlink partial file on failure - Add type hints to all four _create_*_prompt methods (CLAUDE.md compliance) --- src/bot/features/image_handler.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/bot/features/image_handler.py b/src/bot/features/image_handler.py index 94672444..5b6397b9 100644 --- a/src/bot/features/image_handler.py +++ b/src/bot/features/image_handler.py @@ -55,8 +55,12 @@ async def process_image( ) _TEMP_DIR.mkdir(mode=0o700, exist_ok=True) image_path = _TEMP_DIR / f"image_{uuid.uuid4()}.{fmt}" - image_path.write_bytes(image_bytes) - image_path.chmod(0o600) + try: + image_path.write_bytes(image_bytes) + image_path.chmod(0o600) + except Exception: + image_path.unlink(missing_ok=True) + raise # Detect image type for prompt tailoring image_type = self._detect_image_type(image_bytes)