From 69ee15bd6388a8e5fd9e0c0f7d3cd89d2eba2093 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=B3n=20Levy?= <levy@andes.is>
Date: Wed, 26 Nov 2025 15:09:45 +0000
Subject: [PATCH 1/8] fix(agentcore): simplify agentcore streaming

---
 .../bedrock/chat/agentcore/sse_iterator.py    | 252 ----------------
 .../bedrock/chat/agentcore/transformation.py  | 281 ++++++++++++++----
 requirements.txt                              |   4 +-
 .../llm_translation/test_bedrock_agentcore.py | 207 ++++++++++++-
 4 files changed, 431 insertions(+), 313 deletions(-)
 delete mode 100644 litellm/llms/bedrock/chat/agentcore/sse_iterator.py

diff --git a/litellm/llms/bedrock/chat/agentcore/sse_iterator.py b/litellm/llms/bedrock/chat/agentcore/sse_iterator.py
deleted file mode 100644
index 90c5ada769f0..000000000000
--- a/litellm/llms/bedrock/chat/agentcore/sse_iterator.py
+++ /dev/null
@@ -1,252 +0,0 @@
-"""
-SSE Stream Iterator for Bedrock AgentCore.
-
-Handles Server-Sent Events (SSE) streaming responses from AgentCore.
-"""
-
-import json
-from typing import TYPE_CHECKING, Any, Optional
-
-import httpx
-
-from litellm._logging import verbose_logger
-from litellm._uuid import uuid
-from litellm.types.llms.bedrock_agentcore import AgentCoreUsage
-from litellm.types.utils import Delta, ModelResponse, StreamingChoices, Usage
-
-if TYPE_CHECKING:
-    pass
-
-
-class AgentCoreSSEStreamIterator:
-    """
-    Iterator for AgentCore SSE streaming responses.
-    Supports both sync and async iteration.
-
-    CRITICAL: The line iterators are created lazily on first access and reused.
-    We must NOT create new iterators in __aiter__/__iter__ because
-    CustomStreamWrapper calls __aiter__ on every call to its __anext__,
-    which would create new iterators and cause StreamConsumed errors.
-    """
-
-    def __init__(self, response: httpx.Response, model: str):
-        self.response = response
-        self.model = model
-        self.finished = False
-        self._sync_iter: Any = None
-        self._async_iter: Any = None
-        self._sync_iter_initialized = False
-        self._async_iter_initialized = False
-
-    def __iter__(self):
-        """Initialize sync iteration - create iterator lazily on first call only."""
-        if not self._sync_iter_initialized:
-            self._sync_iter = iter(self.response.iter_lines())
-            self._sync_iter_initialized = True
-        return self
-
-    def __aiter__(self):
-        """Initialize async iteration - create iterator lazily on first call only."""
-        if not self._async_iter_initialized:
-            self._async_iter = self.response.aiter_lines().__aiter__()
-            self._async_iter_initialized = True
-        return self
-
-    def _parse_sse_line(self, line: str) -> Optional[ModelResponse]:
-        """
-        Parse a single SSE line and return a ModelResponse chunk if applicable.
-
-        AgentCore SSE format:
-        - data: {"event": {"contentBlockDelta": {"delta": {"text": "..."}}}}
-        - data: {"event": {"metadata": {"usage": {...}}}}
-        - data: {"message": {...}}
-        """
-        line = line.strip()
-        if not line or not line.startswith("data:"):
-            return None
-
-        json_str = line[5:].strip()
-        if not json_str:
-            return None
-
-        try:
-            data = json.loads(json_str)
-
-            # Skip non-dict data (some lines contain Python repr strings)
-            if not isinstance(data, dict):
-                return None
-
-            # Process content delta events
-            if "event" in data and isinstance(data["event"], dict):
-                event_payload = data["event"]
-                content_block_delta = event_payload.get("contentBlockDelta")
-
-                if content_block_delta:
-                    delta = content_block_delta.get("delta", {})
-                    text = delta.get("text", "")
-
-                    if text:
-                        # Return chunk with text
-                        chunk = ModelResponse(
-                            id=f"chatcmpl-{uuid.uuid4()}",
-                            created=0,
-                            model=self.model,
-                            object="chat.completion.chunk",
-                        )
-
-                        chunk.choices = [
-                            StreamingChoices(
-                                finish_reason=None,
-                                index=0,
-                                delta=Delta(content=text, role="assistant"),
-                            )
-                        ]
-
-                        return chunk
-
-                # Check for metadata/usage - this signals the end
-                metadata = event_payload.get("metadata")
-                if metadata and "usage" in metadata:
-                    chunk = ModelResponse(
-                        id=f"chatcmpl-{uuid.uuid4()}",
-                        created=0,
-                        model=self.model,
-                        object="chat.completion.chunk",
-                    )
-
-                    chunk.choices = [
-                        StreamingChoices(
-                            finish_reason="stop",
-                            index=0,
-                            delta=Delta(),
-                        )
-                    ]
-
-                    usage_data: AgentCoreUsage = metadata["usage"]  # type: ignore
-                    setattr(
-                        chunk,
-                        "usage",
-                        Usage(
-                            prompt_tokens=usage_data.get("inputTokens", 0),
-                            completion_tokens=usage_data.get("outputTokens", 0),
-                            total_tokens=usage_data.get("totalTokens", 0),
-                        ),
-                    )
-
-                    self.finished = True
-                    return chunk
-
-            # Check for final message (alternative finish signal)
-            if "message" in data and isinstance(data["message"], dict):
-                if not self.finished:
-                    chunk = ModelResponse(
-                        id=f"chatcmpl-{uuid.uuid4()}",
-                        created=0,
-                        model=self.model,
-                        object="chat.completion.chunk",
-                    )
-
-                    chunk.choices = [
-                        StreamingChoices(
-                            finish_reason="stop",
-                            index=0,
-                            delta=Delta(),
-                        )
-                    ]
-
-                    self.finished = True
-                    return chunk
-
-        except json.JSONDecodeError:
-            verbose_logger.debug(f"Skipping non-JSON SSE line: {line[:100]}")
-
-        return None
-
-    def _create_final_chunk(self) -> ModelResponse:
-        """Create a final chunk to signal stream completion."""
-        chunk = ModelResponse(
-            id=f"chatcmpl-{uuid.uuid4()}",
-            created=0,
-            model=self.model,
-            object="chat.completion.chunk",
-        )
-
-        chunk.choices = [
-            StreamingChoices(
-                finish_reason="stop",
-                index=0,
-                delta=Delta(),
-            )
-        ]
-
-        return chunk
-
-    def __next__(self) -> ModelResponse:
-        """
-        Sync iteration - parse SSE events and yield ModelResponse chunks.
-        
-        Uses next() on the stored iterator to properly resume between calls.
-        """
-        try:
-            if self._sync_iter is None:
-                raise StopIteration
-
-            # Keep getting lines until we have a result to return
-            while True:
-                try:
-                    line = next(self._sync_iter)
-                except StopIteration:
-                    # Stream ended - send final chunk if not already finished
-                    if not self.finished:
-                        self.finished = True
-                        return self._create_final_chunk()
-                    raise
-
-                result = self._parse_sse_line(line)
-                if result is not None:
-                    return result
-
-        except StopIteration:
-            raise
-        except httpx.StreamConsumed:
-            raise StopIteration
-        except httpx.StreamClosed:
-            raise StopIteration
-        except Exception as e:
-            verbose_logger.error(f"Error in AgentCore SSE stream: {str(e)}")
-            raise StopIteration
-
-    async def __anext__(self) -> ModelResponse:
-        """
-        Async iteration - parse SSE events and yield ModelResponse chunks.
-
-        Uses __anext__() on the stored iterator to properly resume between calls.
-        """
-        try:
-            if self._async_iter is None:
-                raise StopAsyncIteration
-
-            # Keep getting lines until we have a result to return
-            while True:
-                try:
-                    line = await self._async_iter.__anext__()
-                except StopAsyncIteration:
-                    # Stream ended - send final chunk if not already finished
-                    if not self.finished:
-                        self.finished = True
-                        return self._create_final_chunk()
-                    raise
-
-                result = self._parse_sse_line(line)
-                if result is not None:
-                    return result
-
-        except StopAsyncIteration:
-            raise
-        except httpx.StreamConsumed:
-            raise StopAsyncIteration
-        except httpx.StreamClosed:
-            raise StopAsyncIteration
-        except Exception as e:
-            verbose_logger.error(f"Error in AgentCore SSE stream: {str(e)}")
-            raise StopAsyncIteration
diff --git a/litellm/llms/bedrock/chat/agentcore/transformation.py b/litellm/llms/bedrock/chat/agentcore/transformation.py
index 7c65cad94df0..c04e416cfc8c 100644
--- a/litellm/llms/bedrock/chat/agentcore/transformation.py
+++ b/litellm/llms/bedrock/chat/agentcore/transformation.py
@@ -5,6 +5,7 @@
 """
 
 import json
+from collections.abc import AsyncGenerator
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, cast
 from urllib.parse import quote
 
@@ -17,7 +18,6 @@
 )
 from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
 from litellm.llms.bedrock.base_aws_llm import BaseAWSLLM
-from litellm.llms.bedrock.chat.agentcore.sse_iterator import AgentCoreSSEStreamIterator
 from litellm.llms.bedrock.common_utils import BedrockError
 from litellm.types.llms.bedrock_agentcore import (
     AgentCoreMessage,
@@ -25,7 +25,7 @@
     AgentCoreUsage,
 )
 from litellm.types.llms.openai import AllMessageValues
-from litellm.types.utils import Choices, Message, ModelResponse, Usage
+from litellm.types.utils import Choices, Delta, Message, ModelResponse, StreamingChoices, Usage
 
 if TYPE_CHECKING:
     from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
@@ -116,7 +116,8 @@ def sign_request(
         fake_stream: Optional[bool] = None,
     ) -> Tuple[dict, Optional[bytes]]:
         # Check if api_key (bearer token) is provided for Cognito authentication
-        jwt_token = optional_params.get("api_key")
+        # Priority: api_key parameter first, then optional_params
+        jwt_token = api_key or optional_params.get("api_key")
         if jwt_token:
             verbose_logger.debug(
                 f"AgentCore: Using Bearer token authentication (Cognito/JWT) - token: {jwt_token[:50]}..."
@@ -437,22 +438,104 @@ def _parse_sse_stream(self, response_text: str) -> AgentCoreParsedResponse:
             content=content, usage=usage_data, final_message=final_message
         )
 
-    def get_streaming_response(
+    def _stream_agentcore_response_sync(
         self,
+        response: httpx.Response,
         model: str,
-        raw_response: httpx.Response,
-    ) -> AgentCoreSSEStreamIterator:
-        """
-        Return a streaming iterator for SSE responses.
-
-        Args:
-            model: The model name
-            raw_response: Raw HTTP response with streaming data
-
-        Returns:
-            AgentCoreSSEStreamIterator: Iterator that yields ModelResponse chunks
-        """
-        return AgentCoreSSEStreamIterator(response=raw_response, model=model)
+    ):
+        """
+        Internal sync generator that parses SSE and yields ModelResponse chunks.
+        """
+        buffer = ""
+        for text_chunk in response.iter_text():
+            buffer += text_chunk
+
+            # Process complete lines
+            while '\n' in buffer:
+                line, buffer = buffer.split('\n', 1)
+                line = line.strip()
+
+                if not line or not line.startswith('data:'):
+                    continue
+
+                json_str = line[5:].strip()
+                if not json_str:
+                    continue
+
+                try:
+                    data_obj = json.loads(json_str)
+                    if not isinstance(data_obj, dict):
+                        continue
+
+                    # Process contentBlockDelta events
+                    if "event" in data_obj and isinstance(data_obj["event"], dict):
+                        event_payload = data_obj["event"]
+                        content_block_delta = event_payload.get("contentBlockDelta")
+
+                        if content_block_delta:
+                            delta = content_block_delta.get("delta", {})
+                            text = delta.get("text", "")
+
+                            if text:
+                                chunk = ModelResponse(
+                                    id=f"chatcmpl-{uuid.uuid4()}",
+                                    created=0,
+                                    model=model,
+                                    object="chat.completion.chunk",
+                                )
+                                chunk.choices = [
+                                    StreamingChoices(
+                                        finish_reason=None,
+                                        index=0,
+                                        delta=Delta(content=text, role="assistant"),
+                                    )
+                                ]
+                                yield chunk
+
+                        # Process metadata/usage
+                        metadata = event_payload.get("metadata")
+                        if metadata and "usage" in metadata:
+                            chunk = ModelResponse(
+                                id=f"chatcmpl-{uuid.uuid4()}",
+                                created=0,
+                                model=model,
+                                object="chat.completion.chunk",
+                            )
+                            chunk.choices = [
+                                StreamingChoices(
+                                    finish_reason="stop",
+                                    index=0,
+                                    delta=Delta(),
+                                )
+                            ]
+                            usage_data: AgentCoreUsage = metadata["usage"]  # type: ignore
+                            setattr(chunk, "usage", Usage(
+                                prompt_tokens=usage_data.get("inputTokens", 0),
+                                completion_tokens=usage_data.get("outputTokens", 0),
+                                total_tokens=usage_data.get("totalTokens", 0),
+                            ))
+                            yield chunk
+
+                    # Process final message
+                    if "message" in data_obj and isinstance(data_obj["message"], dict):
+                        chunk = ModelResponse(
+                            id=f"chatcmpl-{uuid.uuid4()}",
+                            created=0,
+                            model=model,
+                            object="chat.completion.chunk",
+                        )
+                        chunk.choices = [
+                            StreamingChoices(
+                                finish_reason="stop",
+                                index=0,
+                                delta=Delta(),
+                            )
+                        ]
+                        yield chunk
+
+                except json.JSONDecodeError:
+                    verbose_logger.debug(f"Skipping non-JSON SSE line: {line[:100]}")
+                    continue
 
     def get_sync_custom_stream_wrapper(
         self,
@@ -466,17 +549,14 @@ def get_sync_custom_stream_wrapper(
         client: Optional[Union[HTTPHandler, "AsyncHTTPHandler"]] = None,
         json_mode: Optional[bool] = None,
         signed_json_body: Optional[bytes] = None,
-    ) -> CustomStreamWrapper:
+    ) -> "CustomStreamWrapper":
         """
-        Get a CustomStreamWrapper for synchronous streaming.
-
-        This is called when stream=True is passed to completion().
+        Simplified sync streaming - returns a generator that yields ModelResponse chunks.
         """
         from litellm.llms.custom_httpx.http_handler import (
             HTTPHandler,
             _get_httpx_client,
         )
-        from litellm.utils import CustomStreamWrapper
 
         if client is None or not isinstance(client, HTTPHandler):
             client = _get_httpx_client(params={})
@@ -488,7 +568,7 @@ def get_sync_custom_stream_wrapper(
             api_base,
             headers=headers,
             data=signed_json_body if signed_json_body else json.dumps(data),
-            stream=True,  # THIS IS KEY - tells httpx to not buffer
+            stream=True,
             logging_obj=logging_obj,
         )
 
@@ -497,18 +577,6 @@ def get_sync_custom_stream_wrapper(
                 status_code=response.status_code, message=str(response.read())
             )
 
-        # Create iterator for SSE stream
-        completion_stream = self.get_streaming_response(
-            model=model, raw_response=response
-        )
-
-        streaming_response = CustomStreamWrapper(
-            completion_stream=completion_stream,
-            model=model,
-            custom_llm_provider=custom_llm_provider,
-            logging_obj=logging_obj,
-        )
-
         # LOGGING
         logging_obj.post_call(
             input=messages,
@@ -517,7 +585,114 @@ def get_sync_custom_stream_wrapper(
             additional_args={"complete_input_dict": data},
         )
 
-        return streaming_response
+        # Wrap the generator in CustomStreamWrapper
+        from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
+
+        return CustomStreamWrapper(
+            completion_stream=self._stream_agentcore_response_sync(response, model),
+            model=model,
+            custom_llm_provider="bedrock",
+            logging_obj=logging_obj,
+        )
+
+    async def _stream_agentcore_response(
+        self,
+        response: httpx.Response,
+        model: str,
+    ) -> AsyncGenerator[ModelResponse, None]:
+        """
+        Internal async generator that parses SSE and yields ModelResponse chunks.
+        """
+        buffer = ""
+        async for text_chunk in response.aiter_text():
+            buffer += text_chunk
+
+            # Process complete lines
+            while '\n' in buffer:
+                line, buffer = buffer.split('\n', 1)
+                line = line.strip()
+
+                if not line or not line.startswith('data:'):
+                    continue
+
+                json_str = line[5:].strip()
+                if not json_str:
+                    continue
+
+                try:
+                    data_obj = json.loads(json_str)
+                    if not isinstance(data_obj, dict):
+                        continue
+
+                    # Process contentBlockDelta events
+                    if "event" in data_obj and isinstance(data_obj["event"], dict):
+                        event_payload = data_obj["event"]
+                        content_block_delta = event_payload.get("contentBlockDelta")
+
+                        if content_block_delta:
+                            delta = content_block_delta.get("delta", {})
+                            text = delta.get("text", "")
+
+                            if text:
+                                chunk = ModelResponse(
+                                    id=f"chatcmpl-{uuid.uuid4()}",
+                                    created=0,
+                                    model=model,
+                                    object="chat.completion.chunk",
+                                )
+                                chunk.choices = [
+                                    StreamingChoices(
+                                        finish_reason=None,
+                                        index=0,
+                                        delta=Delta(content=text, role="assistant"),
+                                    )
+                                ]
+                                yield chunk
+
+                        # Process metadata/usage
+                        metadata = event_payload.get("metadata")
+                        if metadata and "usage" in metadata:
+                            chunk = ModelResponse(
+                                id=f"chatcmpl-{uuid.uuid4()}",
+                                created=0,
+                                model=model,
+                                object="chat.completion.chunk",
+                            )
+                            chunk.choices = [
+                                StreamingChoices(
+                                    finish_reason="stop",
+                                    index=0,
+                                    delta=Delta(),
+                                )
+                            ]
+                            usage_data: AgentCoreUsage = metadata["usage"]  # type: ignore
+                            setattr(chunk, "usage", Usage(
+                                prompt_tokens=usage_data.get("inputTokens", 0),
+                                completion_tokens=usage_data.get("outputTokens", 0),
+                                total_tokens=usage_data.get("totalTokens", 0),
+                            ))
+                            yield chunk
+
+                    # Process final message
+                    if "message" in data_obj and isinstance(data_obj["message"], dict):
+                        chunk = ModelResponse(
+                            id=f"chatcmpl-{uuid.uuid4()}",
+                            created=0,
+                            model=model,
+                            object="chat.completion.chunk",
+                        )
+                        chunk.choices = [
+                            StreamingChoices(
+                                finish_reason="stop",
+                                index=0,
+                                delta=Delta(),
+                            )
+                        ]
+                        yield chunk
+
+                except json.JSONDecodeError:
+                    verbose_logger.debug(f"Skipping non-JSON SSE line: {line[:100]}")
+                    continue
 
     async def get_async_custom_stream_wrapper(
         self,
@@ -531,17 +706,14 @@ async def get_async_custom_stream_wrapper(
         client: Optional["AsyncHTTPHandler"] = None,
         json_mode: Optional[bool] = None,
         signed_json_body: Optional[bytes] = None,
-    ) -> CustomStreamWrapper:
+    ) -> "CustomStreamWrapper":
         """
-        Get a CustomStreamWrapper for asynchronous streaming.
-
-        This is called when stream=True is passed to acompletion().
+        Simplified async streaming - returns an async generator that yields ModelResponse chunks.
         """
         from litellm.llms.custom_httpx.http_handler import (
             AsyncHTTPHandler,
             get_async_httpx_client,
         )
-        from litellm.utils import CustomStreamWrapper
 
         if client is None or not isinstance(client, AsyncHTTPHandler):
             client = get_async_httpx_client(
@@ -555,7 +727,7 @@ async def get_async_custom_stream_wrapper(
             api_base,
             headers=headers,
             data=signed_json_body if signed_json_body else json.dumps(data),
-            stream=True,  # THIS IS KEY - tells httpx to not buffer
+            stream=True,
             logging_obj=logging_obj,
         )
 
@@ -564,18 +736,6 @@ async def get_async_custom_stream_wrapper(
                 status_code=response.status_code, message=str(await response.aread())
             )
 
-        # Create iterator for SSE stream
-        completion_stream = self.get_streaming_response(
-            model=model, raw_response=response
-        )
-
-        streaming_response = CustomStreamWrapper(
-            completion_stream=completion_stream,
-            model=model,
-            custom_llm_provider=custom_llm_provider,
-            logging_obj=logging_obj,
-        )
-
         # LOGGING
         logging_obj.post_call(
             input=messages,
@@ -584,7 +744,15 @@ async def get_async_custom_stream_wrapper(
             additional_args={"complete_input_dict": data},
         )
 
-        return streaming_response
+        # Wrap the async generator in CustomStreamWrapper
+        from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
+
+        return CustomStreamWrapper(
+            completion_stream=self._stream_agentcore_response(response, model),
+            model=model,
+            custom_llm_provider="bedrock",
+            logging_obj=logging_obj,
+        )
 
     @property
     def has_custom_stream_wrapper(self) -> bool:
@@ -692,4 +860,5 @@ def should_fake_stream(
         stream: Optional[bool],
         custom_llm_provider: Optional[str] = None,
     ) -> bool:
-        return True
+        # AgentCore supports true streaming - don't buffer
+        return False
diff --git a/requirements.txt b/requirements.txt
index 5f00a269a7c4..3e1c510ec046 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,7 +10,7 @@ uvicorn==0.31.1 # server dep
 gunicorn==23.0.0 # server dep
 fastuuid==0.13.5 # for uuid4
 uvloop==0.21.0 # uvicorn dep, gives us much better performance under load
-boto3==1.36.0 # aws bedrock/sagemaker calls
+boto3==1.40.53 # aws bedrock/sagemaker calls (has bedrock-agentcore-control, compatible with aioboto3)
 redis==5.2.1 # redis caching
 prisma==0.11.0 # for db
 nodejs-wheel-binaries==24.12.0 ## required by prisma for migrations, prevents runtime download (updated from nodejs-bin for security fixes)
@@ -58,7 +58,7 @@ click==8.1.7 # for proxy cli
 rich==13.7.1 # for litellm proxy cli
 jinja2==3.1.6 # for prompt templates
 aiohttp==3.13.3 # for network calls
-aioboto3==13.4.0 # for async sagemaker calls
+aioboto3==15.5.0 # for async sagemaker calls (updated to match boto3 1.40.73)
 tenacity==8.5.0  # for retrying requests, when litellm.num_retries set
 pydantic>=2.11,<3 # proxy + openai req. + mcp
 jsonschema>=4.23.0,<5.0.0 # validating json schema - aligned with openapi-core + mcp
diff --git a/tests/llm_translation/test_bedrock_agentcore.py b/tests/llm_translation/test_bedrock_agentcore.py
index 3afb01482ac3..3ea5b9bac064 100644
--- a/tests/llm_translation/test_bedrock_agentcore.py
+++ b/tests/llm_translation/test_bedrock_agentcore.py
@@ -12,10 +12,9 @@
 )
 
 import litellm
-from unittest.mock import MagicMock, patch
-import pytest
-
+from unittest.mock import MagicMock, Mock, patch
 import pytest
+import httpx
 
 @pytest.mark.parametrize(
     "model", [
@@ -367,3 +366,205 @@ def test_bedrock_agentcore_without_api_key_uses_sigv4():
         assert "X-Amzn-Bedrock-AgentCore-Runtime-Session-Id" in headers
         assert headers["X-Amzn-Bedrock-AgentCore-Runtime-Session-Id"] == "sigv4-test-session"
 
+
+def test_agentcore_parse_json_response():
+    """
+    Unit test for JSON response parsing (non-streaming)
+    Verifies that content-type: application/json responses are parsed correctly
+    """
+    from litellm.llms.bedrock.chat.agentcore.transformation import AmazonAgentCoreConfig
+
+    config = AmazonAgentCoreConfig()
+
+    # Create a mock JSON response
+    mock_response = Mock(spec=httpx.Response)
+    mock_response.headers = {"content-type": "application/json"}
+    mock_response.json.return_value = {
+        "result": {
+            "role": "assistant",
+            "content": [{"text": "Hello from JSON response"}]
+        }
+    }
+
+    # Parse the response
+    parsed = config._get_parsed_response(mock_response)
+
+    # Verify content extraction
+    assert parsed["content"] == "Hello from JSON response"
+    # JSON responses don't include usage data
+    assert parsed["usage"] is None
+    # Final message should be the result object
+    assert parsed["final_message"] == mock_response.json.return_value["result"]
+
+
+def test_agentcore_parse_sse_response():
+    """
+    Unit test for SSE response parsing (streaming response consumed as text)
+    Verifies that text/event-stream responses are parsed correctly
+    """
+    from litellm.llms.bedrock.chat.agentcore.transformation import AmazonAgentCoreConfig
+
+    config = AmazonAgentCoreConfig()
+
+    # Create a mock SSE response with multiple events
+    sse_data = """data: {"event":{"contentBlockDelta":{"delta":{"text":"Hello "}}}}
+
+data: {"event":{"contentBlockDelta":{"delta":{"text":"from SSE"}}}}
+
+data: {"event":{"metadata":{"usage":{"inputTokens":10,"outputTokens":5,"totalTokens":15}}}}
+
+data: {"message":{"role":"assistant","content":[{"text":"Hello from SSE"}]}}
+"""
+
+    mock_response = Mock(spec=httpx.Response)
+    mock_response.headers = {"content-type": "text/event-stream"}
+    mock_response.text = sse_data
+
+    # Parse the response
+    parsed = config._get_parsed_response(mock_response)
+
+    # Verify content extraction from final message
+    assert parsed["content"] == "Hello from SSE"
+    # SSE responses can include usage data
+    assert parsed["usage"] is not None
+    assert parsed["usage"]["inputTokens"] == 10
+    assert parsed["usage"]["outputTokens"] == 5
+    assert parsed["usage"]["totalTokens"] == 15
+    # Final message should be present
+    assert parsed["final_message"] is not None
+    assert parsed["final_message"]["role"] == "assistant"
+
+
+def test_agentcore_parse_sse_response_without_final_message():
+    """
+    Unit test for SSE response parsing when only deltas are present (no final message)
+    """
+    from litellm.llms.bedrock.chat.agentcore.transformation import AmazonAgentCoreConfig
+
+    config = AmazonAgentCoreConfig()
+
+    # Create a mock SSE response with only content deltas
+    sse_data = """data: {"event":{"contentBlockDelta":{"delta":{"text":"First "}}}}
+
+data: {"event":{"contentBlockDelta":{"delta":{"text":"second "}}}}
+
+data: {"event":{"contentBlockDelta":{"delta":{"text":"third"}}}}
+"""
+
+    mock_response = Mock(spec=httpx.Response)
+    mock_response.headers = {"content-type": "text/event-stream"}
+    mock_response.text = sse_data
+
+    # Parse the response
+    parsed = config._get_parsed_response(mock_response)
+
+    # Content should be concatenated from deltas
+    assert parsed["content"] == "First second third"
+    # No final message
+    assert parsed["final_message"] is None
+
+
+def test_agentcore_transform_response_json():
+    """
+    Integration test for transform_response with JSON response
+    Verifies end-to-end transformation of JSON responses to ModelResponse
+    """
+    from litellm.llms.bedrock.chat.agentcore.transformation import AmazonAgentCoreConfig
+    from litellm.types.utils import ModelResponse
+
+    config = AmazonAgentCoreConfig()
+
+    # Create mock JSON response
+    mock_response = Mock(spec=httpx.Response)
+    mock_response.headers = {"content-type": "application/json"}
+    mock_response.json.return_value = {
+        "result": {
+            "role": "assistant",
+            "content": [{"text": "Response from transform_response"}]
+        }
+    }
+    mock_response.status_code = 200
+
+    # Create model response
+    model_response = ModelResponse()
+
+    # Mock logging object
+    mock_logging = MagicMock()
+
+    # Transform the response
+    result = config.transform_response(
+        model="bedrock/agentcore/arn:aws:bedrock-agentcore:us-west-2:123456789012:runtime/test",
+        raw_response=mock_response,
+        model_response=model_response,
+        logging_obj=mock_logging,
+        request_data={},
+        messages=[{"role": "user", "content": "test"}],
+        optional_params={},
+        litellm_params={},
+        encoding=None,
+    )
+
+    # Verify ModelResponse structure
+    assert len(result.choices) == 1
+    assert result.choices[0].message.content == "Response from transform_response"
+    assert result.choices[0].message.role == "assistant"
+    assert result.choices[0].finish_reason == "stop"
+    assert result.choices[0].index == 0
+
+
+def test_agentcore_transform_response_sse():
+    """
+    Integration test for transform_response with SSE response
+    Verifies end-to-end transformation of SSE responses to ModelResponse
+    """
+    from litellm.llms.bedrock.chat.agentcore.transformation import AmazonAgentCoreConfig
+    from litellm.types.utils import ModelResponse
+
+    config = AmazonAgentCoreConfig()
+
+    # Create mock SSE response
+    sse_data = """data: {"event":{"contentBlockDelta":{"delta":{"text":"SSE "}}}}
+
+data: {"event":{"contentBlockDelta":{"delta":{"text":"response"}}}}
+
+data: {"event":{"metadata":{"usage":{"inputTokens":20,"outputTokens":10,"totalTokens":30}}}}
+
+data: {"message":{"role":"assistant","content":[{"text":"SSE response"}]}}
+"""
+
+    mock_response = Mock(spec=httpx.Response)
+    mock_response.headers = {"content-type": "text/event-stream"}
+    mock_response.text = sse_data
+    mock_response.status_code = 200
+
+    # Create model response
+    model_response = ModelResponse()
+
+    # Mock logging object
+    mock_logging = MagicMock()
+
+    # Transform the response
+    result = config.transform_response(
+        model="bedrock/agentcore/arn:aws:bedrock-agentcore:us-west-2:123456789012:runtime/test",
+        raw_response=mock_response,
+        model_response=model_response,
+        logging_obj=mock_logging,
+        request_data={},
+        messages=[{"role": "user", "content": "test"}],
+        optional_params={},
+        litellm_params={},
+        encoding=None,
+    )
+
+    # Verify ModelResponse structure
+    assert len(result.choices) == 1
+    assert result.choices[0].message.content == "SSE response"
+    assert result.choices[0].message.role == "assistant"
+    assert result.choices[0].finish_reason == "stop"
+
+    # Verify usage data from SSE metadata
+    assert hasattr(result, "usage")
+    assert result.usage.prompt_tokens == 20
+    assert result.usage.completion_tokens == 10
+    assert result.usage.total_tokens == 30
+

From 91fa1163aa1b507e3ebc62bd043ad50ff03cf1f4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=B3n=20Levy?= <levy@andes.is>
Date: Thu, 4 Dec 2025 10:55:44 +0000
Subject: [PATCH 2/8] fix(agentcore): move CustomStreamWrapper import to module
 level
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The deferred imports inside streaming methods caused initialization delays
during health check requests, leading to timeouts in ECS deployments.

- Move CustomStreamWrapper import to module-level (line 19)
- Remove deferred imports from get_sync_custom_stream_wrapper (line 588)
- Remove deferred import from get_async_custom_stream_wrapper (line 747)
- Remove from TYPE_CHECKING block to use actual import

This ensures the import happens at module load time rather than during
first request processing, preventing health check endpoint blocking.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 litellm/llms/bedrock/chat/agentcore/transformation.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/litellm/llms/bedrock/chat/agentcore/transformation.py b/litellm/llms/bedrock/chat/agentcore/transformation.py
index c04e416cfc8c..94e845e30958 100644
--- a/litellm/llms/bedrock/chat/agentcore/transformation.py
+++ b/litellm/llms/bedrock/chat/agentcore/transformation.py
@@ -16,6 +16,7 @@
 from litellm.litellm_core_utils.prompt_templates.common_utils import (
     convert_content_list_to_str,
 )
+from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
 from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
 from litellm.llms.bedrock.base_aws_llm import BaseAWSLLM
 from litellm.llms.bedrock.common_utils import BedrockError
@@ -30,14 +31,12 @@
 if TYPE_CHECKING:
     from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
     from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
-    from litellm.utils import CustomStreamWrapper
 
     LiteLLMLoggingObj = _LiteLLMLoggingObj
 else:
     LiteLLMLoggingObj = Any
     HTTPHandler = Any
     AsyncHTTPHandler = Any
-    CustomStreamWrapper = Any
 
 
 class AmazonAgentCoreConfig(BaseConfig, BaseAWSLLM):
@@ -586,8 +585,6 @@ def get_sync_custom_stream_wrapper(
         )
 
         # Wrap the generator in CustomStreamWrapper
-        from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
-
         return CustomStreamWrapper(
             completion_stream=self._stream_agentcore_response_sync(response, model),
             model=model,
@@ -745,8 +742,6 @@ async def get_async_custom_stream_wrapper(
         )
 
         # Wrap the async generator in CustomStreamWrapper
-        from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
-
         return CustomStreamWrapper(
             completion_stream=self._stream_agentcore_response(response, model),
             model=model,

From 148f50d990cba11576a48d32a9704767f42c2562 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=B3n=20Levy?= <levy@andes.is>
Date: Thu, 4 Dec 2025 12:12:54 +0000
Subject: [PATCH 3/8] test(agentcore): ensure sync response

---
 .../llm_translation/test_bedrock_agentcore.py | 74 +++++++++++++++++++
 1 file changed, 74 insertions(+)

diff --git a/tests/llm_translation/test_bedrock_agentcore.py b/tests/llm_translation/test_bedrock_agentcore.py
index 3ea5b9bac064..f00ceb8eab4e 100644
--- a/tests/llm_translation/test_bedrock_agentcore.py
+++ b/tests/llm_translation/test_bedrock_agentcore.py
@@ -568,3 +568,77 @@ def test_agentcore_transform_response_sse():
     assert result.usage.completion_tokens == 10
     assert result.usage.total_tokens == 30
 
+
+def test_agentcore_synchronous_non_streaming_response():
+    """
+    Test that synchronous (non-streaming) AgentCore calls still work correctly
+    after streaming simplification changes.
+
+    This test verifies:
+    1. Synchronous completion calls work (stream=False or no stream param)
+    2. Response is properly parsed and returned as ModelResponse
+    3. Content is extracted correctly
+    4. Usage data is calculated when not provided by API
+
+    This is a regression test for the streaming simplification changes
+    to ensure we didn't break the non-streaming code path.
+    """
+    from litellm.llms.custom_httpx.http_handler import HTTPHandler
+
+    litellm._turn_on_debug()
+    client = HTTPHandler()
+
+    # Mock a JSON response (typical for synchronous AgentCore calls)
+    mock_json_response = {
+        "result": {
+            "role": "assistant",
+            "content": [{"text": "This is a synchronous response from AgentCore."}]
+        }
+    }
+
+    # Create a mock response object
+    mock_response = Mock(spec=httpx.Response)
+    mock_response.status_code = 200
+    mock_response.headers = {"content-type": "application/json"}
+    mock_response.json.return_value = mock_json_response
+
+    with patch.object(client, "post", return_value=mock_response) as mock_post:
+        # Make a synchronous (non-streaming) completion call
+        response = litellm.completion(
+            model="bedrock/agentcore/arn:aws:bedrock-agentcore:us-west-2:888602223428:runtime/hosted_agent_r9jvp-3ySZuRHjLC",
+            messages=[
+                {
+                    "role": "user",
+                    "content": "Test synchronous response",
+                }
+            ],
+            stream=False,  # Explicitly disable streaming
+            client=client,
+        )
+
+        # Verify the response structure
+        assert response is not None
+        assert hasattr(response, "choices")
+        assert len(response.choices) > 0
+
+        # Verify content
+        message = response.choices[0].message
+        assert message is not None
+        assert message.content == "This is a synchronous response from AgentCore."
+        assert message.role == "assistant"
+
+        # Verify completion metadata
+        assert response.choices[0].finish_reason == "stop"
+        assert response.choices[0].index == 0
+
+        # Verify usage data exists (either from API or calculated)
+        assert hasattr(response, "usage")
+        assert response.usage is not None
+        assert response.usage.prompt_tokens > 0
+        assert response.usage.completion_tokens > 0
+        assert response.usage.total_tokens > 0
+
+        print(f"Synchronous response: {response}")
+        print(f"Content: {message.content}")
+        print(f"Usage: prompt={response.usage.prompt_tokens}, completion={response.usage.completion_tokens}, total={response.usage.total_tokens}")
+

From b5163c3c83e6f73b818fe120dcac5c941416def5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=B3n=20Levy?= <levy@andes.is>
Date: Thu, 4 Dec 2025 16:13:09 +0000
Subject: [PATCH 4/8] chore: upgrade boto3 to 1.40.76 in pyproject.toml

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index aa8e6fd97be4..61bb7a8d802d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -56,7 +56,7 @@ google-cloud-iam = {version = "^2.19.1", optional = true}
 resend = {version = ">=0.8.0", optional = true}
 pynacl = {version = "^1.5.0", optional = true}
 websockets = {version = "^15.0.1", optional = true}
-boto3 = {version = "1.36.0", optional = true}
+boto3 = { version = "1.40.76", optional = true }
 redisvl = {version = "^0.4.1", optional = true, markers = "python_version >= '3.9' and python_version < '3.14'"}
 mcp = {version = "^1.21.2", optional = true, python = ">=3.10"}
 litellm-proxy-extras = {version = "0.4.21", optional = true}

From 81e1789d43279aa89cc5f758813d4dfc5566783a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=B3n=20Levy?= <levy@andes.is>
Date: Thu, 4 Dec 2025 16:17:20 +0000
Subject: [PATCH 5/8] chore: added taplo.toml

---
 taplo.toml | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 taplo.toml

diff --git a/taplo.toml b/taplo.toml
new file mode 100644
index 000000000000..e5065a696162
--- /dev/null
+++ b/taplo.toml
@@ -0,0 +1,23 @@
+[formatting]
+
+# Keep your table/key order as written (project, project.scripts, dependency-groups, tool.uv, ...)
+reorder_keys = false
+
+# Force arrays to stay multiline once expanded
+array_auto_expand = true
+array_auto_collapse = false
+
+# Keep nice spacing inside arrays and inline tables
+compact_arrays = false
+compact_inline_tables = true
+
+# Don’t align `=` vertically (matches your example)
+align_entries = true
+
+# Reasonable defaults
+align_comments = true
+trailing_newline = true
+reorder_arrays = true
+reorder_inline_tables = false
+allowed_blank_lines = 2
+crlf = false

From e8f7b2e697456b9da0115019e6f3318b083a716d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Benedikt=20=C3=93skarsson?= <bensi94@hotmail.com>
Date: Thu, 8 Jan 2026 00:44:59 +0000
Subject: [PATCH 6/8] fix(bedrock): handle thinking with tool calls for Claude
 4 models

---
 .../bedrock/chat/converse_transformation.py   |  32 +-
 litellm/llms/bedrock/chat/invoke_handler.py   |  36 +-
 litellm/utils.py                              |  62 ++-
 .../chat/test_converse_transformation.py      | 506 +++++++++++-------
 4 files changed, 407 insertions(+), 229 deletions(-)

diff --git a/litellm/llms/bedrock/chat/converse_transformation.py b/litellm/llms/bedrock/chat/converse_transformation.py
index 59590e464fca..fd8c0d754730 100644
--- a/litellm/llms/bedrock/chat/converse_transformation.py
+++ b/litellm/llms/bedrock/chat/converse_transformation.py
@@ -53,7 +53,12 @@
     PromptTokensDetailsWrapper,
     Usage,
 )
-from litellm.utils import add_dummy_tool, has_tool_call_blocks, supports_reasoning
+from litellm.utils import (
+    add_dummy_tool,
+    has_tool_call_blocks,
+    last_assistant_with_tool_calls_has_no_thinking_blocks,
+    supports_reasoning,
+)
 
 from ..common_utils import (
     BedrockError,
@@ -773,7 +778,7 @@ def _translate_response_format_param(
             return optional_params
 
         """
-        Follow similar approach to anthropic - translate to a single tool call. 
+        Follow similar approach to anthropic - translate to a single tool call.
 
         When using tools in this way: - https://docs.anthropic.com/en/docs/build-with-claude/tool-use#json-mode
         - You usually want to provide a single tool
@@ -1070,9 +1075,24 @@ def _transform_request_helper(
                     llm_provider="bedrock",
                 )
 
+        # Drop thinking param if thinking is enabled but thinking_blocks are missing
+        # This prevents the error: "Expected thinking or redacted_thinking, but found tool_use"
+        # Related issues: https://github.com/BerriAI/litellm/issues/14194
+        if (
+            optional_params.get("thinking") is not None
+            and messages is not None
+            and last_assistant_with_tool_calls_has_no_thinking_blocks(messages)
+        ):
+            if litellm.modify_params:
+                optional_params.pop("thinking", None)
+                litellm.verbose_logger.warning(
+                    "Dropping 'thinking' param because the last assistant message with tool_calls "
+                    "has no thinking_blocks. The model won't use extended thinking for this turn."
+                )
+
         # Prepare and separate parameters
-        inference_params, additional_request_params, request_metadata = (
-            self._prepare_request_params(optional_params, model)
+        inference_params, additional_request_params, request_metadata = self._prepare_request_params(
+            optional_params, model
         )
 
         original_tools = inference_params.pop("tools", [])
@@ -1459,11 +1479,11 @@ def _transform_response(
             )
 
         """
-        Bedrock Response Object has optional message block 
+        Bedrock Response Object has optional message block
 
         completion_response["output"].get("message", None)
 
-        A message block looks like this (Example 1): 
+        A message block looks like this (Example 1):
         "output": {
             "message": {
                 "role": "assistant",
diff --git a/litellm/llms/bedrock/chat/invoke_handler.py b/litellm/llms/bedrock/chat/invoke_handler.py
index 49292545208e..ea612da52c60 100644
--- a/litellm/llms/bedrock/chat/invoke_handler.py
+++ b/litellm/llms/bedrock/chat/invoke_handler.py
@@ -374,6 +374,29 @@ class BedrockLLM(BaseAWSLLM):
     def __init__(self) -> None:
         super().__init__()
 
+    @staticmethod
+    def is_claude_messages_api_model(model: str) -> bool:
+        """
+        Check if the model uses the Claude Messages API (Claude 3+).
+
+        Handles:
+        - Regional prefixes: eu.anthropic.claude-*, us.anthropic.claude-*
+        - Claude 3 models: claude-3-haiku, claude-3-sonnet, claude-3-opus, claude-3-5-*, claude-3-7-*
+        - Claude 4 models: claude-opus-4, claude-sonnet-4, claude-haiku-4
+        """
+        # Normalize model string to lowercase for matching
+        model_lower = model.lower()
+
+        # Claude 3+ indicators (all use Messages API)
+        messages_api_indicators = [
+            "claude-3",      # Claude 3.x models
+            "claude-opus-4", # Claude Opus 4
+            "claude-sonnet-4", # Claude Sonnet 4
+            "claude-haiku-4",  # Claude Haiku 4
+        ]
+
+        return any(indicator in model_lower for indicator in messages_api_indicators)
+
     def convert_messages_to_prompt(
         self, model, messages, provider, custom_prompt_dict
     ) -> Tuple[str, Optional[list]]:
@@ -465,7 +488,7 @@ def process_response(  # noqa: PLR0915
                         completion_response["generations"][0]["finish_reason"]
                     )
             elif provider == "anthropic":
-                if model.startswith("anthropic.claude-3"):
+                if self.is_claude_messages_api_model(model):
                     json_schemas: dict = {}
                     _is_function_call = False
                     ## Handle Tool Calling
@@ -595,13 +618,12 @@ def process_response(  # noqa: PLR0915
                         outputText = choice["message"].get("content")
                     elif "text" in choice:  # fallback for completion format
                         outputText = choice["text"]
-                    
                     # Set finish reason
                     if "finish_reason" in choice:
                         model_response.choices[0].finish_reason = map_finish_reason(
                             choice["finish_reason"]
                         )
-                    
+
                     # Set usage if available
                     if "usage" in completion_response:
                         usage = completion_response["usage"]
@@ -842,7 +864,7 @@ def completion(  # noqa: PLR0915
                     ] = True  # cohere requires stream = True in inference params
                 data = json.dumps({"prompt": prompt, **inference_params})
         elif provider == "anthropic":
-            if model.startswith("anthropic.claude-3"):
+            if self.is_claude_messages_api_model(model):
                 # Separate system prompt from rest of message
                 system_prompt_idx: list[int] = []
                 system_messages: list[str] = []
@@ -940,13 +962,13 @@ def completion(  # noqa: PLR0915
             # Use AmazonBedrockOpenAIConfig for proper OpenAI transformation
             openai_config = AmazonBedrockOpenAIConfig()
             supported_params = openai_config.get_supported_openai_params(model=model)
-            
+
             # Filter to only supported OpenAI params
             filtered_params = {
-                k: v for k, v in inference_params.items() 
+                k: v for k, v in inference_params.items()
                 if k in supported_params
             }
-            
+
             # OpenAI uses messages format, not prompt
             data = json.dumps({"messages": messages, **filtered_params})
         else:
diff --git a/litellm/utils.py b/litellm/utils.py
index 7c7591cdbabf..61829f1c0004 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -619,7 +619,7 @@ def load_credentials_from_list(kwargs: dict):
     """
     # Access CredentialAccessor via module to trigger lazy loading if needed
     CredentialAccessor = getattr(sys.modules[__name__], 'CredentialAccessor')
-    
+
     credential_name = kwargs.get("litellm_credential_name")
     if credential_name and litellm.credential_list:
         credential_accessor = CredentialAccessor.get_credential_values(credential_name)
@@ -646,7 +646,7 @@ def _is_gemini_model(model: Optional[str], custom_llm_provider: Optional[str]) -
         if custom_llm_provider in ["vertex_ai", "vertex_ai_beta"]:
             return model is not None and "gemini" in model.lower()
         return True
-    
+
     # Check if model name contains gemini
     return model is not None and "gemini" in model.lower()
 
@@ -668,7 +668,7 @@ def _process_assistant_message_tool_calls(
     """
     role = msg_copy.get("role")
     tool_calls = msg_copy.get("tool_calls")
-    
+
     if role == "assistant" and isinstance(tool_calls, list):
         new_tool_calls = []
         for tc in tool_calls:
@@ -681,17 +681,17 @@ def _process_assistant_message_tool_calls(
             else:
                 new_tool_calls.append(tc)
                 continue
-            
+
             # Remove thought signature from ID if present
             if isinstance(tc_dict.get("id"), str):
                 if thought_signature_separator in tc_dict["id"]:
                     tc_dict["id"] = _remove_thought_signature_from_id(
                         tc_dict["id"], thought_signature_separator
                     )
-            
+
             new_tool_calls.append(tc_dict)
         msg_copy["tool_calls"] = new_tool_calls
-    
+
     return msg_copy
 
 
@@ -706,7 +706,7 @@ def _process_tool_message_id(msg_copy: dict, thought_signature_separator: str) -
             msg_copy["tool_call_id"] = _remove_thought_signature_from_id(
                 msg_copy["tool_call_id"], thought_signature_separator
             )
-    
+
     return msg_copy
 
 
@@ -717,7 +717,7 @@ def _remove_thought_signatures_from_messages(
     Remove thought signatures from tool call IDs in all messages.
     """
     processed_messages = []
-    
+
     for msg in messages:
         # Handle Pydantic models (convert to dict)
         if hasattr(msg, "model_dump"):
@@ -728,17 +728,17 @@ def _remove_thought_signatures_from_messages(
             # Unknown type, keep as is
             processed_messages.append(msg)
             continue
-        
+
         # Process assistant messages with tool_calls
         msg_dict = _process_assistant_message_tool_calls(
             msg_dict, thought_signature_separator
         )
-        
+
         # Process tool messages with tool_call_id
         msg_dict = _process_tool_message_id(msg_dict, thought_signature_separator)
-        
+
         processed_messages.append(msg_dict)
-    
+
     return processed_messages
 
 
@@ -958,7 +958,7 @@ def function_setup(  # noqa: PLR0915
                     input=buffer.getvalue(),
                     model=model,
                 )
-            
+
             ### REMOVE THOUGHT SIGNATURES FROM TOOL CALL IDS FOR NON-GEMINI MODELS ###
             # Gemini models embed thought signatures in tool call IDs. When sending
             # messages with tool calls to non-Gemini providers, we need to remove these
@@ -974,7 +974,7 @@ def function_setup(  # noqa: PLR0915
 
                     # Get custom_llm_provider to determine target provider
                     custom_llm_provider = kwargs.get("custom_llm_provider")
-                    
+
                     # If custom_llm_provider not in kwargs, try to determine it from the model
                     if not custom_llm_provider and model:
                         try:
@@ -985,18 +985,18 @@ def function_setup(  # noqa: PLR0915
                         except Exception:
                             # If we can't determine the provider, skip this processing
                             pass
-                    
+
                     # Only process if target is NOT a Gemini model
                     if not _is_gemini_model(model, custom_llm_provider):
                         verbose_logger.debug(
                             "Removing thought signatures from tool call IDs for non-Gemini model"
                         )
-                        
+
                         # Process messages to remove thought signatures
                         processed_messages = _remove_thought_signatures_from_messages(
                             messages, THOUGHT_SIGNATURE_SEPARATOR
                         )
-                        
+
                         # Update messages in kwargs or args
                         if "messages" in kwargs:
                             kwargs["messages"] = processed_messages
@@ -2976,7 +2976,7 @@ def get_optional_params_embeddings(  # noqa: PLR0915
 ):
     # Lazy load get_supported_openai_params
     get_supported_openai_params = getattr(sys.modules[__name__], 'get_supported_openai_params')
-    
+
     # retrieve all parameters passed to the function
     passed_params = locals()
     custom_llm_provider = passed_params.pop("custom_llm_provider", None)
@@ -4062,7 +4062,14 @@ def _check_valid_arg(supported_params: List[str]):
                 ),
             )
         elif "anthropic" in bedrock_base_model and bedrock_route == "invoke":
-            if bedrock_base_model.startswith("anthropic.claude-3"):
+            # Check for Claude 3+ models (Messages API) including regional prefixes and Claude 4
+            # Models like eu.anthropic.claude-opus-4-5, us.anthropic.claude-3-5-sonnet, etc.
+            bedrock_base_model_lower = bedrock_base_model.lower()
+            is_messages_api_model = any(
+                indicator in bedrock_base_model_lower
+                for indicator in ["claude-3", "claude-opus-4", "claude-sonnet-4", "claude-haiku-4"]
+            )
+            if is_messages_api_model:
                 optional_params = (
                     litellm.AmazonAnthropicClaudeConfig().map_openai_params(
                         non_default_params=non_default_params,
@@ -6936,7 +6943,7 @@ def get_valid_models(
         # init litellm_params
         #################################
         from litellm.types.router import LiteLLM_Params
-        
+
         if litellm_params is None:
             litellm_params = LiteLLM_Params(model="")
         if api_key is not None:
@@ -7470,7 +7477,7 @@ class ProviderConfigManager:
     @staticmethod
     def _build_provider_config_map() -> dict[LlmProviders, tuple[Callable, bool]]:
         """Build the provider-to-config mapping dictionary.
-        
+
         Returns a dict mapping provider to (factory_function, needs_model_parameter).
         This avoids expensive inspect.signature() calls at runtime.
         """
@@ -7619,7 +7626,7 @@ def _get_bedrock_config(model: str) -> BaseConfig:
     def _get_cohere_config(model: str) -> BaseConfig:
         """Get Cohere config based on route."""
         CohereModelInfo = getattr(sys.modules[__name__], 'CohereModelInfo')
-        route = CohereModelInfo.get_cohere_route(model)
+            route = CohereModelInfo.get_cohere_route(model)
         if route == "v2":
             return litellm.CohereV2ChatConfig()
         return litellm.CohereChatConfig()
@@ -7636,7 +7643,7 @@ def get_provider_chat_config(  # noqa: PLR0915
     ) -> Optional[BaseConfig]:
         """
         Returns the provider config for a given provider.
-        
+
         Uses O(1) dictionary lookup for fast provider resolution.
         """
         # Check JSON providers FIRST (these override standard mappings)
@@ -8333,8 +8340,7 @@ def get_provider_ocr_config(
             from litellm.llms.vertex_ai.ocr.common_utils import get_vertex_ai_ocr_config
 
             return get_vertex_ai_ocr_config(model=model)
-        
-        MistralOCRConfig = getattr(sys.modules[__name__], 'MistralOCRConfig')
+MistralOCRConfig = getattr(sys.modules[__name__], 'MistralOCRConfig')
         PROVIDER_TO_CONFIG_MAP = {
             litellm.LlmProviders.MISTRAL: MistralOCRConfig,
         }
@@ -8771,12 +8777,12 @@ def __getattr__(name: str) -> Any:
     """Lazy import handler for utils module with cached registry for improved performance."""
     # Use cached registry from _lazy_imports instead of importing tuples every time
     from litellm._lazy_imports import _get_lazy_import_registry
-    
+
     registry = _get_lazy_import_registry()
-    
+
     # Check if name is in registry and call the cached handler function
     if name in registry:
         handler_func = registry[name]
         return handler_func(name)
-    
+
     raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
diff --git a/tests/test_litellm/llms/bedrock/chat/test_converse_transformation.py b/tests/test_litellm/llms/bedrock/chat/test_converse_transformation.py
index 692866f85529..3361245f3cf3 100644
--- a/tests/test_litellm/llms/bedrock/chat/test_converse_transformation.py
+++ b/tests/test_litellm/llms/bedrock/chat/test_converse_transformation.py
@@ -275,10 +275,10 @@ def test_get_supported_openai_params():
 
 def test_get_supported_openai_params_bedrock_converse():
     """
-    Test that all documented bedrock converse models have the same set of supported openai params when using 
+    Test that all documented bedrock converse models have the same set of supported openai params when using
     `bedrock/converse/` or `bedrock/` prefix.
 
-    Note: This test is critical for routing, if we ever remove `litellm.BEDROCK_CONVERSE_MODELS`, 
+    Note: This test is critical for routing, if we ever remove `litellm.BEDROCK_CONVERSE_MODELS`,
     please update this test to read `bedrock_converse` models from the model cost map.
     """
     for model in litellm.BEDROCK_CONVERSE_MODELS:
@@ -380,7 +380,7 @@ def json(self):
         @property
         def text(self):
             return json.dumps(response_json)
-    
+
     config = AmazonConverseConfig()
     model_response = ModelResponse()
     optional_params = {
@@ -471,7 +471,7 @@ def json(self):
         @property
         def text(self):
             return json.dumps(response_json)
-    
+
     config = AmazonConverseConfig()
     model_response = ModelResponse()
     optional_params = {
@@ -525,7 +525,7 @@ def test_transform_response_with_structured_response_being_called():
                             "toolUseId": "tooluse_456",
                             "name": "json_tool_call",
                             "input": {
-                                "Current_Temperature": 62, 
+                                "Current_Temperature": 62,
                                 "Weather_Explanation": "San Francisco typically has mild, cool weather year-round due to its coastal location and marine influence. The city is known for its fog, moderate temperatures, and relatively stable climate with little seasonal variation."},
                         }
                     }
@@ -550,51 +550,51 @@ def json(self):
         @property
         def text(self):
             return json.dumps(response_json)
-    
+
     config = AmazonConverseConfig()
     model_response = ModelResponse()
     optional_params = {
         "json_mode": True,
         "tools": [
             {
-                'type': 'function', 
+                'type': 'function',
                 'function': {
-                    'name': 'get_weather', 
-                    'description': 'Get the current weather in a given location', 
+                    'name': 'get_weather',
+                    'description': 'Get the current weather in a given location',
                     'parameters': {
-                        'type': 'object', 
+                        'type': 'object',
                         'properties': {
                             'location': {
-                                'type': 'string', 
+                                'type': 'string',
                                 'description': 'The city and state, e.g. San Francisco, CA'
-                            }, 
+                            },
                             'unit': {
-                                'type': 'string', 
+                                'type': 'string',
                                 'enum': ['celsius', 'fahrenheit']
                             }
-                        }, 
+                        },
                         'required': ['location']
                     }
                 }
-            }, 
+            },
             {
-                'type': 'function', 
+                'type': 'function',
                 'function': {
-                    'name': 'json_tool_call', 
+                    'name': 'json_tool_call',
                     'parameters': {
-                        '$schema': 'http://json-schema.org/draft-07/schema#', 
-                        'type': 'object', 
-                        'required': ['Weather_Explanation', 'Current_Temperature'], 
+                        '$schema': 'http://json-schema.org/draft-07/schema#',
+                        'type': 'object',
+                        'required': ['Weather_Explanation', 'Current_Temperature'],
                         'properties': {
                             'Weather_Explanation': {
-                                'type': ['string', 'null'], 
+                                'type': ['string', 'null'],
                                 'description': '1-2 sentences explaining the weather in the location'
-                            }, 
+                            },
                             'Current_Temperature': {
-                                'type': ['number', 'null'], 
+                                'type': ['number', 'null'],
                                 'description': 'Current temperature in the location'
                             }
-                        }, 
+                        },
                         'additionalProperties': False
                     }
                 }
@@ -629,36 +629,36 @@ def test_transform_response_with_structured_response_calling_tool():
     response_json = {
         "metrics": {
             "latencyMs": 1148
-        }, 
+        },
         "output": {
-            "message": 
+            "message":
             {
                 "content": [
                     {
                         "text": "I\'ll check the current weather in San Francisco for you."
-                    }, 
+                    },
                     {
                         "toolUse": {
                             "input": {
                                 "location": "San Francisco, CA",
                                 "unit": "celsius"
-                            }, 
-                            "name": "get_weather", 
+                            },
+                            "name": "get_weather",
                             "toolUseId": "tooluse_oKk__QrqSUmufMw3Q7vGaQ"
                         }
                     }
-                ], 
+                ],
                 "role": "assistant"
             }
-        }, 
-        "stopReason": "tool_use", 
+        },
+        "stopReason": "tool_use",
         "usage": {
-            "cacheReadInputTokenCount": 0, 
-            "cacheReadInputTokens": 0, 
-            "cacheWriteInputTokenCount": 0, 
-            "cacheWriteInputTokens": 0, 
-            "inputTokens": 534, 
-            "outputTokens": 69, 
+            "cacheReadInputTokenCount": 0,
+            "cacheReadInputTokens": 0,
+            "cacheWriteInputTokenCount": 0,
+            "cacheWriteInputTokens": 0,
+            "inputTokens": 534,
+            "outputTokens": 69,
             "totalTokens": 603
         }
     }
@@ -669,51 +669,51 @@ def json(self):
         @property
         def text(self):
             return json.dumps(response_json)
-    
+
     config = AmazonConverseConfig()
     model_response = ModelResponse()
     optional_params = {
         "json_mode": True,
         "tools": [
             {
-                'type': 'function', 
+                'type': 'function',
                 'function': {
-                    'name': 'get_weather', 
-                    'description': 'Get the current weather in a given location', 
+                    'name': 'get_weather',
+                    'description': 'Get the current weather in a given location',
                     'parameters': {
-                        'type': 'object', 
+                        'type': 'object',
                         'properties': {
                             'location': {
-                                'type': 'string', 
+                                'type': 'string',
                                 'description': 'The city and state, e.g. San Francisco, CA'
-                            }, 
+                            },
                             'unit': {
-                                'type': 'string', 
+                                'type': 'string',
                                 'enum': ['celsius', 'fahrenheit']
                             }
-                        }, 
+                        },
                         'required': ['location']
                     }
                 }
-            }, 
+            },
             {
-                'type': 'function', 
+                'type': 'function',
                 'function': {
-                    'name': 'json_tool_call', 
+                    'name': 'json_tool_call',
                     'parameters': {
-                        '$schema': 'http://json-schema.org/draft-07/schema#', 
-                        'type': 'object', 
-                        'required': ['Weather_Explanation', 'Current_Temperature'], 
+                        '$schema': 'http://json-schema.org/draft-07/schema#',
+                        'type': 'object',
+                        'required': ['Weather_Explanation', 'Current_Temperature'],
                         'properties': {
                             'Weather_Explanation': {
-                                'type': ['string', 'null'], 
+                                'type': ['string', 'null'],
                                 'description': '1-2 sentences explaining the weather in the location'
-                            }, 
+                            },
                             'Current_Temperature': {
-                                'type': ['number', 'null'], 
+                                'type': ['number', 'null'],
                                 'description': 'Current temperature in the location'
                             }
-                        }, 
+                        },
                         'additionalProperties': False
                     }
                 }
@@ -743,7 +743,7 @@ def text(self):
 @pytest.mark.asyncio
 async def test_bedrock_bash_tool_acompletion():
     """Test Bedrock with bash tool for ls command using acompletion."""
-    
+
     # Test with bash tool instead of computer tool
     tools = [
         {
@@ -751,14 +751,14 @@ async def test_bedrock_bash_tool_acompletion():
             "name": "bash",
         }
     ]
-    
+
     messages = [
         {
-            "role": "user", 
+            "role": "user",
             "content": "run ls command and find all python files"
         }
     ]
-    
+
     try:
         response = await litellm.acompletion(
             model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0",
@@ -771,13 +771,13 @@ async def test_bedrock_bash_tool_acompletion():
         assert False, "Expected authentication error but got successful response"
     except Exception as e:
         error_str = str(e).lower()
-        
+
         # Check if it's an expected authentication/credentials error
         auth_error_indicators = [
-            "credentials", "authentication", "unauthorized", "access denied", 
+            "credentials", "authentication", "unauthorized", "access denied",
             "aws", "region", "profile", "token", "invalid", "signature"
         ]
-        
+
         if any(auth_error in error_str for auth_error in auth_error_indicators):
             # This is expected - request formatting succeeded, auth failed as expected
             assert True
@@ -789,7 +789,7 @@ async def test_bedrock_bash_tool_acompletion():
 @pytest.mark.asyncio
 async def test_bedrock_computer_use_acompletion():
     """Test Bedrock computer use with acompletion function."""
-    
+
     # Test with computer use tool
     tools = [
         {
@@ -800,10 +800,10 @@ async def test_bedrock_computer_use_acompletion():
             "display_number": 0,
         }
     ]
-    
+
     messages = [
         {
-            "role": "user", 
+            "role": "user",
             "content": [
                 {
                     "type": "text",
@@ -818,7 +818,7 @@ async def test_bedrock_computer_use_acompletion():
             ]
         }
     ]
-    
+
     try:
         response = await litellm.acompletion(
             model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0",
@@ -831,13 +831,13 @@ async def test_bedrock_computer_use_acompletion():
         assert False, "Expected authentication error but got successful response"
     except Exception as e:
         error_str = str(e).lower()
-        
+
         # Check if it's an expected authentication/credentials error
         auth_error_indicators = [
-            "credentials", "authentication", "unauthorized", "access denied", 
+            "credentials", "authentication", "unauthorized", "access denied",
             "aws", "region", "profile", "token", "invalid", "signature"
         ]
-        
+
         if any(auth_error in error_str for auth_error in auth_error_indicators):
             # This is expected - request formatting succeeded, auth failed as expected
             assert True
@@ -849,9 +849,9 @@ async def test_bedrock_computer_use_acompletion():
 @pytest.mark.asyncio
 async def test_transformation_directly():
     """Test the transformation directly to verify the request structure."""
-    
+
     config = AmazonConverseConfig()
-    
+
     tools = [
         {
             "type": "computer_20241022",
@@ -865,14 +865,14 @@ async def test_transformation_directly():
             "name": "bash",
         }
     ]
-    
+
     messages = [
         {
             "role": "user",
             "content": "run ls command and find all python files"
         }
     ]
-    
+
     # Transform request
     request_data = config.transform_request(
         model="anthropic.claude-3-5-sonnet-20241022-v2:0",
@@ -881,19 +881,19 @@ async def test_transformation_directly():
         litellm_params={},
         headers={}
     )
-    
+
     # Verify the structure
     assert "additionalModelRequestFields" in request_data
     additional_fields = request_data["additionalModelRequestFields"]
-    
+
     # Check that anthropic_beta is set correctly for computer use
     assert "anthropic_beta" in additional_fields
     assert additional_fields["anthropic_beta"] == ["computer-use-2024-10-22"]
-    
+
     # Check that tools are present
     assert "tools" in additional_fields
     assert len(additional_fields["tools"]) == 2
-    
+
     # Verify tool types
     tool_types = [tool.get("type") for tool in additional_fields["tools"]]
     assert "computer_20241022" in tool_types
@@ -933,7 +933,7 @@ def test_transform_request_helper_includes_anthropic_beta_and_tools_bash():
 def test_transform_request_with_multiple_tools():
     """Test transformation with multiple tools including computer, bash, and function tools."""
     config = AmazonConverseConfig()
-    
+
     # Use the exact payload from the user's error
     tools = [
         {
@@ -974,14 +974,14 @@ def test_transform_request_with_multiple_tools():
             }
         }
     ]
-    
+
     messages = [
         {
             "role": "user",
             "content": "run ls command and find all python files"
         }
     ]
-    
+
     # Transform request
     request_data = config.transform_request(
         model="anthropic.claude-3-5-sonnet-20241022-v2:0",
@@ -990,25 +990,25 @@ def test_transform_request_with_multiple_tools():
         litellm_params={},
         headers={}
     )
-    
+
     # Verify the structure
     assert "additionalModelRequestFields" in request_data
     additional_fields = request_data["additionalModelRequestFields"]
-    
+
     # Check that anthropic_beta is set correctly for computer use
     assert "anthropic_beta" in additional_fields
     assert additional_fields["anthropic_beta"] == ["computer-use-2024-10-22"]
-    
+
     # Check that tools are present
     assert "tools" in additional_fields
     assert len(additional_fields["tools"]) == 3  # computer, bash, text_editor tools
-    
+
     # Verify tool types
     tool_types = [tool.get("type") for tool in additional_fields["tools"]]
     assert "computer_20241022" in tool_types
     assert "bash_20241022" in tool_types
     assert "text_editor_20241022" in tool_types
-    
+
     # Function tools are processed separately and not included in computer use tools
     # They would be in toolConfig if present
 
@@ -1016,7 +1016,7 @@ def test_transform_request_with_multiple_tools():
 def test_transform_request_with_computer_tool_only():
     """Test transformation with only computer tool."""
     config = AmazonConverseConfig()
-    
+
     tools = [
         {
             "type": "computer_20241022",
@@ -1026,10 +1026,10 @@ def test_transform_request_with_computer_tool_only():
             "display_number": 0,
         }
     ]
-    
+
     messages = [
         {
-            "role": "user", 
+            "role": "user",
             "content": [
                 {
                     "type": "text",
@@ -1044,7 +1044,7 @@ def test_transform_request_with_computer_tool_only():
             ]
         }
     ]
-    
+
     # Transform request
     request_data = config.transform_request(
         model="anthropic.claude-3-5-sonnet-20241022-v2:0",
@@ -1053,15 +1053,15 @@ def test_transform_request_with_computer_tool_only():
         litellm_params={},
         headers={}
     )
-    
+
     # Verify the structure
     assert "additionalModelRequestFields" in request_data
     additional_fields = request_data["additionalModelRequestFields"]
-    
+
     # Check that anthropic_beta is set correctly for computer use
     assert "anthropic_beta" in additional_fields
     assert additional_fields["anthropic_beta"] == ["computer-use-2024-10-22"]
-    
+
     # Check that tools are present
     assert "tools" in additional_fields
     assert len(additional_fields["tools"]) == 1
@@ -1071,21 +1071,21 @@ def test_transform_request_with_computer_tool_only():
 def test_transform_request_with_bash_tool_only():
     """Test transformation with only bash tool."""
     config = AmazonConverseConfig()
-    
+
     tools = [
         {
             "type": "bash_20241022",
             "name": "bash",
         }
     ]
-    
+
     messages = [
         {
-            "role": "user", 
+            "role": "user",
             "content": "run ls command and find all python files"
         }
     ]
-    
+
     # Transform request
     request_data = config.transform_request(
         model="anthropic.claude-3-5-sonnet-20241022-v2:0",
@@ -1094,15 +1094,15 @@ def test_transform_request_with_bash_tool_only():
         litellm_params={},
         headers={}
     )
-    
+
     # Verify the structure
     assert "additionalModelRequestFields" in request_data
     additional_fields = request_data["additionalModelRequestFields"]
-    
+
     # Check that anthropic_beta is set correctly for computer use
     assert "anthropic_beta" in additional_fields
     assert additional_fields["anthropic_beta"] == ["computer-use-2024-10-22"]
-    
+
     # Check that tools are present
     assert "tools" in additional_fields
     assert len(additional_fields["tools"]) == 1
@@ -1112,21 +1112,21 @@ def test_transform_request_with_bash_tool_only():
 def test_transform_request_with_text_editor_tool():
     """Test transformation with text editor tool."""
     config = AmazonConverseConfig()
-    
+
     tools = [
         {
             "type": "text_editor_20241022",
             "name": "str_replace_editor",
         }
     ]
-    
+
     messages = [
         {
             "role": "user",
             "content": "Edit this text file"
         }
     ]
-    
+
     # Transform request
     request_data = config.transform_request(
         model="anthropic.claude-3-5-sonnet-20241022-v2:0",
@@ -1135,15 +1135,15 @@ def test_transform_request_with_text_editor_tool():
         litellm_params={},
         headers={}
     )
-    
+
     # Verify the structure
     assert "additionalModelRequestFields" in request_data
     additional_fields = request_data["additionalModelRequestFields"]
-    
+
     # Check that anthropic_beta is set correctly for computer use
     assert "anthropic_beta" in additional_fields
     assert additional_fields["anthropic_beta"] == ["computer-use-2024-10-22"]
-    
+
     # Check that tools are present
     assert "tools" in additional_fields
     assert len(additional_fields["tools"]) == 1
@@ -1153,7 +1153,7 @@ def test_transform_request_with_text_editor_tool():
 def test_transform_request_with_function_tool():
     """Test transformation with function tool."""
     config = AmazonConverseConfig()
-    
+
     tools = [
         {
             "type": "function",
@@ -1174,14 +1174,14 @@ def test_transform_request_with_function_tool():
             }
         }
     ]
-    
+
     messages = [
         {
             "role": "user",
             "content": "What's the weather like in San Francisco?"
         }
     ]
-    
+
     # Transform request
     request_data = config.transform_request(
         model="anthropic.claude-3-5-sonnet-20241022-v2:0",
@@ -1190,11 +1190,11 @@ def test_transform_request_with_function_tool():
         litellm_params={},
         headers={}
     )
-    
+
     # Verify the structure
     assert "additionalModelRequestFields" in request_data
     additional_fields = request_data["additionalModelRequestFields"]
-    
+
     # Function tools are not computer use tools, so they don't get anthropic_beta
     # They are processed through the regular tool config
     assert "toolConfig" in request_data
@@ -1206,7 +1206,7 @@ def test_transform_request_with_function_tool():
 def test_map_openai_params_with_response_format():
     """Test map_openai_params with response_format."""
     config = AmazonConverseConfig()
-    
+
     tools = [
         {
             "type": "function",
@@ -1277,12 +1277,12 @@ async def test_assistant_message_cache_control():
     messages = [
         {"role": "user", "content": "Hello"},
         {
-            "role": "assistant", 
+            "role": "assistant",
             "content": "Hi there!",
             "cache_control": {"type": "ephemeral"}
         }
     ]
-    
+
     result = _bedrock_converse_messages_pt(
         messages=messages,
         model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0",
@@ -1294,7 +1294,7 @@ async def test_assistant_message_cache_control():
         model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0",
         llm_provider="bedrock_converse"
     )
-    
+
     assert result == async_result
 
     async_result = await BedrockConverseMessagesProcessor._bedrock_converse_messages_pt_async(
@@ -1302,14 +1302,14 @@ async def test_assistant_message_cache_control():
         model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0",
         llm_provider="bedrock_converse"
     )
-    
+
     assert result == async_result
-    
+
     # Should have user message and assistant message
     assert len(result) == 2
     assert result[0]["role"] == "user"
     assert result[1]["role"] == "assistant"
-    
+
     # Assistant message should have text content and cachePoint
     assistant_content = result[1]["content"]
     assert len(assistant_content) == 2
@@ -1325,7 +1325,7 @@ async def test_assistant_message_list_content_cache_control():
         BedrockConverseMessagesProcessor,
         _bedrock_converse_messages_pt,
     )
-    
+
     messages = [
         {"role": "user", "content": "Hello"},
         {
@@ -1339,7 +1339,7 @@ async def test_assistant_message_list_content_cache_control():
             ]
         }
     ]
-    
+
     result = _bedrock_converse_messages_pt(
         messages=messages,
         model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0",
@@ -1351,9 +1351,9 @@ async def test_assistant_message_list_content_cache_control():
         model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0",
         llm_provider="bedrock_converse"
     )
-    
+
     assert result == async_result
-    
+
     # Assistant message should have text content and cachePoint
     assistant_content = result[1]["content"]
     assert len(assistant_content) == 2
@@ -1369,7 +1369,7 @@ async def test_tool_message_cache_control():
         BedrockConverseMessagesProcessor,
         _bedrock_converse_messages_pt,
     )
-    
+
     messages = [
         {"role": "user", "content": "What's the weather?"},
         {
@@ -1395,7 +1395,7 @@ async def test_tool_message_cache_control():
             ]
         }
     ]
-    
+
     result = _bedrock_converse_messages_pt(
         messages=messages,
         model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0",
@@ -1407,20 +1407,20 @@ async def test_tool_message_cache_control():
         model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0",
         llm_provider="bedrock_converse"
     )
-    
+
     assert result == async_result
-    
+
     # Should have user, assistant, and user (tool results) messages
     assert len(result) == 3
-    
+
     # Last message should contain tool result and cachePoint
     tool_message_content = result[2]["content"]
     assert len(tool_message_content) == 2
-    
+
     # First should be tool result
     assert "toolResult" in tool_message_content[0]
     assert tool_message_content[0]["toolResult"]["content"][0]["text"] == "Weather data: sunny, 25°C"
-    
+
     # Second should be cachePoint
     assert "cachePoint" in tool_message_content[1]
     assert tool_message_content[1]["cachePoint"]["type"] == "default"
@@ -1433,7 +1433,7 @@ async def test_tool_message_string_content_cache_control():
         BedrockConverseMessagesProcessor,
         _bedrock_converse_messages_pt,
     )
-    
+
     messages = [
         {"role": "user", "content": "What's the weather?"},
         {
@@ -1442,7 +1442,7 @@ async def test_tool_message_string_content_cache_control():
             "tool_calls": [
                 {
                     "id": "call_123",
-                    "type": "function", 
+                    "type": "function",
                     "function": {"name": "get_weather", "arguments": "{}"}
                 }
             ]
@@ -1454,7 +1454,7 @@ async def test_tool_message_string_content_cache_control():
             "cache_control": {"type": "ephemeral"}
         }
     ]
-    
+
     result = _bedrock_converse_messages_pt(
         messages=messages,
         model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0",
@@ -1466,17 +1466,17 @@ async def test_tool_message_string_content_cache_control():
         model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0",
         llm_provider="bedrock_converse"
     )
-    
+
     assert result == async_result
-    
+
     # Last message should contain tool result and cachePoint
     tool_message_content = result[2]["content"]
     assert len(tool_message_content) == 2
-    
+
     # First should be tool result
     assert "toolResult" in tool_message_content[0]
     assert tool_message_content[0]["toolResult"]["content"][0]["text"] == "Weather: sunny, 25°C"
-    
+
     # Second should be cachePoint
     assert "cachePoint" in tool_message_content[1]
     assert tool_message_content[1]["cachePoint"]["type"] == "default"
@@ -1489,7 +1489,7 @@ async def test_assistant_tool_calls_cache_control():
         BedrockConverseMessagesProcessor,
         _bedrock_converse_messages_pt,
     )
-    
+
     messages = [
         {"role": "user", "content": "Calculate 2+2"},
         {
@@ -1505,7 +1505,7 @@ async def test_assistant_tool_calls_cache_control():
             ]
         }
     ]
-    
+
     result = _bedrock_converse_messages_pt(
         messages=messages,
         model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0",
@@ -1517,18 +1517,18 @@ async def test_assistant_tool_calls_cache_control():
         model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0",
         llm_provider="bedrock_converse"
     )
-    
+
     assert result == async_result
-    
+
     # Assistant message should have tool use and cachePoint
     assistant_content = result[1]["content"]
     assert len(assistant_content) == 2
-    
+
     # First should be tool use
     assert "toolUse" in assistant_content[0]
     assert assistant_content[0]["toolUse"]["name"] == "calc"
     assert assistant_content[0]["toolUse"]["toolUseId"] == "call_proxy_123"
-    
+
     # Second should be cachePoint
     assert "cachePoint" in assistant_content[1]
     assert assistant_content[1]["cachePoint"]["type"] == "default"
@@ -1541,7 +1541,7 @@ async def test_multiple_tool_calls_with_mixed_cache_control():
         BedrockConverseMessagesProcessor,
         _bedrock_converse_messages_pt,
     )
-    
+
     messages = [
         {"role": "user", "content": "Do multiple calculations"},
         {
@@ -1563,7 +1563,7 @@ async def test_multiple_tool_calls_with_mixed_cache_control():
             ]
         }
     ]
-    
+
     result = _bedrock_converse_messages_pt(
         messages=messages,
         model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0",
@@ -1575,21 +1575,21 @@ async def test_multiple_tool_calls_with_mixed_cache_control():
         model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0",
         llm_provider="bedrock_converse"
     )
-    
+
     assert result == async_result
-    
+
     # Assistant message should have: toolUse1, cachePoint, toolUse2
     assistant_content = result[1]["content"]
     assert len(assistant_content) == 3
-    
+
     # First tool use with cache
     assert "toolUse" in assistant_content[0]
     assert assistant_content[0]["toolUse"]["toolUseId"] == "call_1"
-    
+
     # Cache point for first tool
     assert "cachePoint" in assistant_content[1]
     assert assistant_content[1]["cachePoint"]["type"] == "default"
-    
+
     # Second tool use without cache
     assert "toolUse" in assistant_content[2]
     assert assistant_content[2]["toolUse"]["toolUseId"] == "call_2"
@@ -1602,7 +1602,7 @@ async def test_no_cache_control_no_cache_point():
         BedrockConverseMessagesProcessor,
         _bedrock_converse_messages_pt,
     )
-    
+
     messages = [
         {"role": "user", "content": "Hello"},
         {"role": "assistant", "content": "Hi there!"},  # No cache_control
@@ -1612,7 +1612,7 @@ async def test_no_cache_control_no_cache_point():
             "content": "Tool result"  # No cache_control
         }
     ]
-    
+
     result = _bedrock_converse_messages_pt(
         messages=messages,
         model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0",
@@ -1624,14 +1624,14 @@ async def test_no_cache_control_no_cache_point():
         model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0",
         llm_provider="bedrock_converse"
     )
-    
+
     assert result == async_result
-    
+
     # Assistant message should only have text content, no cachePoint
     assistant_content = result[1]["content"]
     assert len(assistant_content) == 1
     assert assistant_content[0]["text"] == "Hi there!"
-    
+
     # Tool message should only have tool result, no cachePoint
     tool_content = result[2]["content"]
     assert len(tool_content) == 1
@@ -1867,11 +1867,11 @@ def test_guarded_text_with_tool_calls():
     # First should be regular text
     assert "text" in content[0]
     assert content[0]["text"] == "What's the weather?"
-    
+
     # Second should be guardContent
     assert "guardContent" in content[1]
     assert content[1]["guardContent"]["text"]["text"] == "Please be careful with sensitive information"
-    
+
     # Other messages should not have guardContent
     for i in range(1, 3):
         content = result[i]["content"]
@@ -2115,7 +2115,7 @@ def test_auto_convert_in_full_transformation():
     # Verify the transformation worked
     assert "messages" in result
     assert len(result["messages"]) == 1
-    
+
     # The message should have guardContent
     message = result["messages"][0]
     assert "content" in message
@@ -2626,79 +2626,79 @@ def test_empty_assistant_message_handling():
         {"role": "assistant", "content": ""},  # Empty content
         {"role": "user", "content": "How are you?"}
     ]
-    
+
     # Enable modify_params to prevent consecutive user message merging
     original_modify_params = litellm.modify_params
     litellm.modify_params = True
-    
+
     try:
         result = _bedrock_converse_messages_pt(
             messages=messages,
             model="anthropic.claude-3-5-sonnet-20240620-v1:0",
             llm_provider="bedrock_converse"
         )
-        
+
         # Should have 3 messages: user, assistant (with placeholder), user
         assert len(result) == 3
         assert result[0]["role"] == "user"
         assert result[1]["role"] == "assistant"
         assert result[2]["role"] == "user"
-        
+
         # Assistant message should have placeholder text instead of empty content
         assert len(result[1]["content"]) == 1
         assert result[1]["content"][0]["text"] == "Please continue."
-        
+
         # Test case 2: Whitespace-only content
         messages = [
             {"role": "user", "content": "Hello"},
             {"role": "assistant", "content": "   "},  # Whitespace-only content
             {"role": "user", "content": "How are you?"}
         ]
-        
+
         result = _bedrock_converse_messages_pt(
             messages=messages,
             model="anthropic.claude-3-5-sonnet-20240620-v1:0",
             llm_provider="bedrock_converse"
         )
-        
+
         # Assistant message should have placeholder text instead of whitespace
         assert len(result[1]["content"]) == 1
         assert result[1]["content"][0]["text"] == "Please continue."
-        
+
         # Test case 3: Empty list content
         messages = [
             {"role": "user", "content": "Hello"},
             {"role": "assistant", "content": [{"type": "text", "text": ""}]},  # Empty text in list
             {"role": "user", "content": "How are you?"}
         ]
-        
+
         result = _bedrock_converse_messages_pt(
             messages=messages,
             model="anthropic.claude-3-5-sonnet-20240620-v1:0",
             llm_provider="bedrock_converse"
         )
-        
+
         # Assistant message should have placeholder text instead of empty text
         assert len(result[1]["content"]) == 1
         assert result[1]["content"][0]["text"] == "Please continue."
-        
+
         # Test case 4: Normal content should not be affected
         messages = [
             {"role": "user", "content": "Hello"},
             {"role": "assistant", "content": "I'm doing well, thank you!"},  # Normal content
             {"role": "user", "content": "How are you?"}
         ]
-        
+
         result = _bedrock_converse_messages_pt(
             messages=messages,
             model="anthropic.claude-3-5-sonnet-20240620-v1:0",
             llm_provider="bedrock_converse"
         )
-        
+
         # Assistant message should keep original content
         assert len(result[1]["content"]) == 1
         assert result[1]["content"][0]["text"] == "I'm doing well, thank you!"
-        
+
     finally:
         # Restore original modify_params setting
         litellm.modify_params = original_modify_params
@@ -2707,54 +2707,184 @@ def test_empty_assistant_message_handling():
 def test_is_nova_lite_2_model():
     """Test the _is_nova_lite_2_model() method for detecting Nova 2 models."""
     config = AmazonConverseConfig()
-    
+
     # Test with amazon.nova-2-lite-v1:0
     assert config._is_nova_lite_2_model("amazon.nova-2-lite-v1:0") is True
-    
+
     # Test with regional variants
     assert config._is_nova_lite_2_model("us.amazon.nova-2-lite-v1:0") is True
     assert config._is_nova_lite_2_model("eu.amazon.nova-2-lite-v1:0") is True
     assert config._is_nova_lite_2_model("apac.amazon.nova-2-lite-v1:0") is True
-    
+
     # Test with other Nova 2 variants (pro, micro)
     assert config._is_nova_lite_2_model("amazon.nova-pro-1-5-v1:0") is False
     assert config._is_nova_lite_2_model("amazon.nova-micro-1-5-v1:0") is False
     assert config._is_nova_lite_2_model("us.amazon.nova-pro-1-5-v1:0") is False
     assert config._is_nova_lite_2_model("eu.amazon.nova-micro-1-5-v1:0") is False
-    
+
     # Test with non-Nova-1.5 lite models (should return False)
     assert config._is_nova_lite_2_model("amazon.nova-lite-v1:0") is False
     assert config._is_nova_lite_2_model("amazon.nova-pro-v1:0") is False
     assert config._is_nova_lite_2_model("amazon.nova-micro-v1:0") is False
-    
+
     # Test with Nova v1:0 models (should return False)
     assert config._is_nova_lite_2_model("us.amazon.nova-lite-v1:0") is False
     assert config._is_nova_lite_2_model("eu.amazon.nova-pro-v1:0") is False
-    
+
     # Test with completely different models (should return False)
     assert config._is_nova_lite_2_model("anthropic.claude-3-5-sonnet-20240620-v1:0") is False
     assert config._is_nova_lite_2_model("meta.llama3-70b-instruct-v1:0") is False
     assert config._is_nova_lite_2_model("mistral.mistral-7b-instruct-v0:2") is False
 
+def test_drop_thinking_param_when_thinking_blocks_missing():
+    """
+    Test that thinking param is dropped when modify_params=True and
+    thinking_blocks are missing from assistant message with tool_calls.
+
+    This prevents the Anthropic/Bedrock error:
+    "Expected thinking or redacted_thinking, but found tool_use"
+
+    Related issue: https://github.com/BerriAI/litellm/issues/14194
+    """
+    from litellm.utils import last_assistant_with_tool_calls_has_no_thinking_blocks
+
+    # Save original modify_params setting
+    original_modify_params = litellm.modify_params
+
+    try:
+        # Test case 1: thinking should be dropped when modify_params=True
+        # and assistant message has tool_calls but no thinking_blocks
+        litellm.modify_params = True
+
+        messages_without_thinking_blocks = [
+            {"role": "user", "content": "Search for weather"},
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {
+                        "id": "call_123",
+                        "type": "function",
+                        "function": {"name": "search", "arguments": "{}"},
+                    }
+                ],
+                # No thinking_blocks - simulates OpenAI-compatible client
+            },
+            {"role": "tool", "content": "Weather is sunny", "tool_call_id": "call_123"},
+        ]
+
+        optional_params = {"thinking": {"type": "enabled", "budget_tokens": 1000}}
+
+        # Verify the condition is detected
+        assert last_assistant_with_tool_calls_has_no_thinking_blocks(
+            messages_without_thinking_blocks
+        ), "Should detect missing thinking_blocks"
+
+        # Simulate what _transform_request_helper does
+        if (
+            optional_params.get("thinking") is not None
+            and messages_without_thinking_blocks is not None
+            and last_assistant_with_tool_calls_has_no_thinking_blocks(
+                messages_without_thinking_blocks
+            )
+        ):
+            if litellm.modify_params:
+                optional_params.pop("thinking", None)
+
+        assert "thinking" not in optional_params, (
+            "thinking param should be dropped when modify_params=True "
+            "and thinking_blocks are missing"
+        )
+
+        # Test case 2: thinking should NOT be dropped when thinking_blocks are present
+        messages_with_thinking_blocks = [
+            {"role": "user", "content": "Search for weather"},
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {
+                        "id": "call_123",
+                        "type": "function",
+                        "function": {"name": "search", "arguments": "{}"},
+                    }
+                ],
+                "thinking_blocks": [
+                    {"type": "thinking", "thinking": "Let me search for weather..."}
+                ],
+            },
+            {"role": "tool", "content": "Weather is sunny", "tool_call_id": "call_123"},
+        ]
+
+        optional_params_with_thinking = {
+            "thinking": {"type": "enabled", "budget_tokens": 1000}
+        }
+
+        # Verify the condition is NOT detected when thinking_blocks are present
+        assert not last_assistant_with_tool_calls_has_no_thinking_blocks(
+            messages_with_thinking_blocks
+        ), "Should NOT detect missing thinking_blocks when they are present"
+
+        # Simulate what _transform_request_helper does
+        if (
+            optional_params_with_thinking.get("thinking") is not None
+            and messages_with_thinking_blocks is not None
+            and last_assistant_with_tool_calls_has_no_thinking_blocks(
+                messages_with_thinking_blocks
+            )
+        ):
+            if litellm.modify_params:
+                optional_params_with_thinking.pop("thinking", None)
+
+        assert "thinking" in optional_params_with_thinking, (
+            "thinking param should NOT be dropped when thinking_blocks are present"
+        )
+
+        # Test case 3: thinking should NOT be dropped when modify_params=False
+        litellm.modify_params = False
+
+        optional_params_no_modify = {
+            "thinking": {"type": "enabled", "budget_tokens": 1000}
+        }
+
+        # Simulate what _transform_request_helper does
+        if (
+            optional_params_no_modify.get("thinking") is not None
+            and messages_without_thinking_blocks is not None
+            and last_assistant_with_tool_calls_has_no_thinking_blocks(
+                messages_without_thinking_blocks
+            )
+        ):
+            if litellm.modify_params:
+                optional_params_no_modify.pop("thinking", None)
+
+        assert "thinking" in optional_params_no_modify, (
+            "thinking param should NOT be dropped when modify_params=False"
+        )
+
+    finally:
+        # Restore original modify_params setting
+        litellm.modify_params = original_modify_params
+
 
 def test_thinking_with_max_completion_tokens():
     """Test that thinking respects max_completion_tokens parameter."""
     config = AmazonConverseConfig()
-    
+
     # Test case 1: max_completion_tokens is specified - should NOT set maxTokens automatically
     non_default_params_with_max_completion = {
         "thinking": {"type": "enabled", "budget_tokens": 5000},
         "max_completion_tokens": 10000,
     }
     optional_params = {}
-    
+
     result = config.map_openai_params(
         non_default_params=non_default_params_with_max_completion,
         optional_params=optional_params,
         model="us.anthropic.claude-3-7-sonnet-20250219-v1:0",
         drop_params=False,
     )
-    
+
     # Should have maxTokens set to max_completion_tokens value
     assert "maxTokens" in result
     assert result["maxTokens"] == 10000
@@ -2762,21 +2892,21 @@ def test_thinking_with_max_completion_tokens():
     assert "thinking" in result
     assert result["thinking"]["type"] == "enabled"
     assert result["thinking"]["budget_tokens"] == 5000
-    
+
     # Test case 2: max_tokens is specified - should NOT set maxTokens automatically
     non_default_params_with_max_tokens = {
         "thinking": {"type": "enabled", "budget_tokens": 5000},
         "max_tokens": 8000,
     }
     optional_params = {}
-    
+
     result = config.map_openai_params(
         non_default_params=non_default_params_with_max_tokens,
         optional_params=optional_params,
         model="us.anthropic.claude-3-7-sonnet-20250219-v1:0",
         drop_params=False,
     )
-    
+
     # Should have maxTokens set to max_tokens value
     assert "maxTokens" in result
     assert result["maxTokens"] == 8000
@@ -2784,22 +2914,22 @@ def test_thinking_with_max_completion_tokens():
     assert "thinking" in result
     assert result["thinking"]["type"] == "enabled"
     assert result["thinking"]["budget_tokens"] == 5000
-    
+
     # Test case 3: Neither max_tokens nor max_completion_tokens specified - should set maxTokens automatically
     from litellm.constants import DEFAULT_MAX_TOKENS
-    
+
     non_default_params_without_max = {
         "thinking": {"type": "enabled", "budget_tokens": 5000},
     }
     optional_params = {}
-    
+
     result = config.map_openai_params(
         non_default_params=non_default_params_without_max,
         optional_params=optional_params,
         model="us.anthropic.claude-3-7-sonnet-20250219-v1:0",
         drop_params=False,
     )
-    
+
     # Should have maxTokens set to budget_tokens + DEFAULT_MAX_TOKENS
     assert "maxTokens" in result
     assert result["maxTokens"] == 5000 + DEFAULT_MAX_TOKENS

From afc7378261b3118cccd9ab308c0472708f49b0ac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Benedikt=20=C3=93skarsson?= <bensi94@hotmail.com>
Date: Thu, 8 Jan 2026 02:04:52 +0000
Subject: [PATCH 7/8] chore: fix formatting error

---
 litellm/utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/litellm/utils.py b/litellm/utils.py
index 61829f1c0004..9cb5b41aacbd 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -8340,7 +8340,8 @@ def get_provider_ocr_config(
             from litellm.llms.vertex_ai.ocr.common_utils import get_vertex_ai_ocr_config
 
             return get_vertex_ai_ocr_config(model=model)
-MistralOCRConfig = getattr(sys.modules[__name__], 'MistralOCRConfig')
+
+        MistralOCRConfig = getattr(sys.modules[__name__], 'MistralOCRConfig')
         PROVIDER_TO_CONFIG_MAP = {
             litellm.LlmProviders.MISTRAL: MistralOCRConfig,
         }

From 143d5682665c9198be5a1723bcde0303695d030e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Benedikt=20=C3=93skarsson?= <bensi94@hotmail.com>
Date: Tue, 13 Jan 2026 14:23:43 +0000
Subject: [PATCH 8/8] fix: indent error

---
 litellm/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/litellm/utils.py b/litellm/utils.py
index 9cb5b41aacbd..1ea0e4387b58 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -7626,7 +7626,7 @@ def _get_bedrock_config(model: str) -> BaseConfig:
     def _get_cohere_config(model: str) -> BaseConfig:
         """Get Cohere config based on route."""
         CohereModelInfo = getattr(sys.modules[__name__], 'CohereModelInfo')
-            route = CohereModelInfo.get_cohere_route(model)
+        route = CohereModelInfo.get_cohere_route(model)
         if route == "v2":
             return litellm.CohereV2ChatConfig()
         return litellm.CohereChatConfig()