-
Couldn't load subscription status.
- Fork 1.3k
Open
Labels
bugSomething isn't workingSomething isn't working
Description
Initial Checks
- I confirm that I'm using the latest version of Pydantic AI
- I confirm that I searched for my issue in https://github.com/pydantic/pydantic-ai/issues before opening this issue
Description
Hi,
I'm using a LiteLLM proxy which proxies requests for claude models to Google Vertex AI. However, I am hitting errors when toolcalls and thinking are getting in the mix:
ERROR:src.agents.excel.excel_agent_message_processor:Exception cause: BadRequestError: Error code: 400 - {'error': {'message': 'litellm.BadRequestError: Vertex_aiException BadRequestError - b\'{"type":"error","error":{"type":"invalid_request_error","message":"messages.1.content.0.type: Expected `thinking` or `redacted_thinking`, but found `text`. When `thinking` is enabled, a final `assistant` message must start with a thinking block (preceeding the lastmost set of `tool_use` and `tool_result` blocks). We recommend you include thinking blocks from previous turns. To avoid this requirement, disable `thinking`. Please consult our documentation at https://docs.claude.com/en/docs/build-with-claude/extended-thinking"},"request_id":"req_vrtx_011CTupmw9UhTioFUQDwomKa"}\'. Received Model Group=anthropic/claude\nAvailable Model Group Fallbacks=None', 'type': None, 'param': None, 'code': '400'}}This looks similar to the issues:
- Send ThinkingPart back for OpenAIChatModel models which require reasoning content #2701
- Anthropic extended thinking: invalid request error when using tools #2040
I am at a bit of a loss what I can do here, any ideas/suggestions? I would expect pydantic to send back the thinking blocks when there are thinkingparts. I also added an example of openai working normally with the same endpoint.
PS: @DouweM Long time no see! We worked together on Convex at Utrecht University in another lifetime :D
Example Code
Not working :
#!/usr/bin/env python3
"""
Standalone test script for litellm server with Pydantic AI.
Tests extended thinking and tool calling.
"""
from pydantic_ai import Agent, ModelSettings, RunContext
from pydantic_ai.models.openai import OpenAIChatModel
from pydantic_ai.providers.litellm import LiteLLMProvider
from pydantic import BaseModel
import pprint
# Define weather data structure
class WeatherData(BaseModel):
temperature: float
conditions: str
unit: str
# Configure the model
model = OpenAIChatModel(
"anthropic/claude-sonnet-4-5-20250929",
provider=LiteLLMProvider(
api_base="<endpoint>",
api_key="<key>",
),
settings=ModelSettings(
max_tokens=4096,
extra_body={
"thinking": {
"type": "enabled",
"budget_tokens": 1024
}
}
)
)
# Create the agent
agent = Agent(
model,
system_prompt="You are a helpful weather assistant. Use the get_weather tool to retrieve weather information."
)
# Define the tool
@agent.tool
def get_weather(ctx: RunContext, location: str, unit: str = "celsius") -> WeatherData:
"""
Get the current weather for a given location.
Args:
location: The city and state, e.g. San Francisco, CA
unit: The temperature unit to use (celsius or fahrenheit)
"""
print(f"\n--- Tool Called: get_weather ---")
print(f"Location: {location}")
print(f"Unit: {unit}")
# Return dummy weather data
return WeatherData(
temperature=18,
conditions="partly cloudy",
unit=unit
)
async def main():
try:
# Run the agent
result = await agent.run(
"What's the weather like in Paris? Think through your answer carefully."
)
print("\n--- Messages ---")
pprint.pp(result.all_messages())
print("\n Test completed successfully!")
except Exception as e:
print(f"\n Error occurred: {type(e).__name__}")
print(f"Message: {str(e)}")
raise
if __name__ == "__main__":
import asyncio
asyncio.run(main())Working with pure openai
#!/usr/bin/env python3
"""
Standalone test script for litellm server with OpenAI SDK.
Tests extended thinking and tool calling.
"""
from openai import OpenAI
import pprint
# Configure client for litellm server
client = OpenAI(
base_url="<endpoint>",
api_key="<key>",
)
# Define a test tool
tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather for a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"unit": {
"type": "string",
"enum": ["celsius", "fahrenheit"],
"description": "The temperature unit to use",
},
},
"required": ["location"],
},
},
}
]
def main():
print("Testing litellm server with OpenAI SDK...")
try:
# Initialize messages
messages = [
{
"role": "user",
"content": "What's the weather like in Paris? Think through your answer carefully.",
}
]
# Make a request with extended thinking and tool calling
response = client.chat.completions.create(
model="anthropic/claude-sonnet-4-5-20250929",
messages=messages,
tools=tools,
# Enable extended thinking for Claude
extra_body={
"thinking": {
"type": "enabled",
"budget_tokens": 1024
}
}
)
pprint.pp(response.choices[0].message)
# Check for thinking content
message = response.choices[0].message
if hasattr(message, 'content') and message.content:
print(f"\n--- Content ---")
print(message.content)
# Add assistant's response to messages
messages.append(message.model_dump(exclude_unset=True))
# Check for tool calls
if hasattr(message, 'tool_calls') and message.tool_calls:
print(f"\n--- Tool Calls ---")
for tool_call in message.tool_calls:
print(f"Tool: {tool_call.function.name}")
print(f"Arguments: {tool_call.function.arguments}")
# Respond to tool call with dummy value
print("\n--- Responding to tool call with dummy data ---")
# Add tool result to messages
messages.append({
"role": "tool",
"tool_call_id": message.tool_calls[0].id,
"content": '{"temperature": 18, "conditions": "partly cloudy", "unit": "celsius"}'
})
print("SENDING MESSAGES:")
pprint.pp(messages)
# Get final answer
final_response = client.chat.completions.create(
model="anthropic/claude",
messages=messages,
tools=tools,
extra_body={
"thinking": {
"type": "enabled",
"budget_tokens": 1024
}
}
)
print("\n--- Final Answer ---")
pprint.pp(final_response.choices[0].message)
final_message = final_response.choices[0].message
if hasattr(final_message, 'content') and final_message.content:
print(final_message.content)
print("\n Test completed successfully!")
except Exception as e:
print(f"\n Error occurred: {type(e).__name__}")
print(f"Message: {str(e)}")
raise
if __name__ == "__main__":
main()Python, Pydantic AI & LLM client version
Python 3.12.7
pydantic-ai 1.0.15
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working