@@ -336,28 +336,73 @@ def translate_anthropic_request(request: AnthropicMessagesRequest) -> Dict[str,
336336 if openai_tool_choice :
337337 openai_request ["tool_choice" ] = openai_tool_choice
338338
339+ # Note: request.metadata is intentionally not mapped.
340+ # OpenAI's API doesn't have an equivalent field for client-side metadata.
341+ # The metadata is typically used by Anthropic clients for tracking purposes
342+ # and doesn't affect the model's behavior.
343+
339344 # Handle Anthropic thinking config -> reasoning_effort translation
345+ # The provider (antigravity_provider.py) applies a // 4 reduction to thinking budget
346+ # unless custom_reasoning_budget is True. This conserves thinking tokens.
347+ #
348+ # Reasoning budget thresholds map to provider budgets:
349+ # - Claude "high" = 32768 tokens (but // 4 = 8192 unless custom_reasoning_budget)
350+ # - Claude "medium" = 16384 tokens (// 4 = 4096)
351+ # - Claude "low" = 8192 tokens (// 4 = 2048)
352+ #
353+ # We only set custom_reasoning_budget=True when user explicitly requests
354+ # a large budget (32000+), indicating they want full thinking capacity.
340355 if request .thinking :
341356 if request .thinking .type == "enabled" :
342- # Map budget_tokens to reasoning_effort level
343- # Default to "medium" if enabled but budget not specified
344357 budget = request .thinking .budget_tokens or 10000
345358 if budget >= 32000 :
359+ # User explicitly wants full thinking capacity
346360 openai_request ["reasoning_effort" ] = "high"
347361 openai_request ["custom_reasoning_budget" ] = True
348362 elif budget >= 10000 :
349363 openai_request ["reasoning_effort" ] = "high"
364+ # custom_reasoning_budget defaults to False, so // 4 applies
350365 elif budget >= 5000 :
351366 openai_request ["reasoning_effort" ] = "medium"
352367 else :
353368 openai_request ["reasoning_effort" ] = "low"
354369 elif request .thinking .type == "disabled" :
355370 openai_request ["reasoning_effort" ] = "disable"
356- elif "opus" in request .model .lower ():
357- # Force high thinking for Opus models when no thinking config is provided
358- # Opus 4.5 always uses the -thinking variant, so we want maximum thinking budget
359- # Without this, the backend defaults to thinkingBudget: -1 (auto) instead of high
371+ elif _is_opus_model (request .model ):
372+ # Enable thinking for Opus models when no thinking config is provided
373+ # Use "high" effort but NOT custom_reasoning_budget, so // 4 applies
374+ # This gives 8192 thinking tokens (32768 // 4) which is reasonable for most tasks
375+ # Users who want full capacity can explicitly set thinking.budget_tokens >= 32000
360376 openai_request ["reasoning_effort" ] = "high"
361- openai_request [ " custom_reasoning_budget" ] = True
377+ # Note: NOT setting custom_reasoning_budget here to conserve tokens
362378
363379 return openai_request
380+
381+
382+ def _is_opus_model (model_name : str ) -> bool :
383+ """
384+ Check if a model name refers to a Claude Opus model.
385+
386+ Uses specific pattern matching to avoid false positives with model names
387+ that might contain "opus" as part of another word.
388+
389+ Args:
390+ model_name: The model name to check
391+
392+ Returns:
393+ True if the model is a Claude Opus model, False otherwise
394+ """
395+ import re
396+
397+ model_lower = model_name .lower ()
398+ # Match Claude Opus models specifically:
399+ # - "claude-opus-4-5", "claude-4-opus", "claude_opus"
400+ # - "opus-4", "opus-4.5", "opus4" (standalone with version)
401+ # - "antigravity/claude-opus-4-5"
402+ # Avoid matching things like "magnum-opus" or other non-Claude models
403+ opus_patterns = [
404+ r'claude[-_]?opus' , # "claude-opus", "claude_opus", "claudeopus"
405+ r'opus[-_]?\d' , # "opus-4", "opus_4", "opus4" (with version number)
406+ r'\d[-_]?opus(?:[-_]|$)' , # "4-opus", "4_opus" at word boundary
407+ ]
408+ return any (re .search (pattern , model_lower ) for pattern in opus_patterns )
0 commit comments