From 09bb99411a52eb7a4fd613d5123def176027cde4 Mon Sep 17 00:00:00 2001
From: nmarasoiu <dumitru.nicolae.marasoiu@outlook.com>
Date: Wed, 10 Dec 2025 10:33:32 +0000
Subject: [PATCH] feat(hooks): add thinking_budget hook for extended thinking
 management

Add a new hook that manages Claude's extended thinking budget_tokens parameter:

- Inject default budget_tokens when thinking is enabled but budget is missing
- Override budget_tokens when below configurable minimum threshold
- Ensure budget < max_tokens (API constraint)
- Optionally inject thinking configuration for thinking-capable models

Key design decisions:
- Trust caller: if request has thinking, adjust budget regardless of model
- Model filter only applies to inject_if_missing mode
- Anthropic-specific: non-Anthropic providers will ignore thinking field
- Simple config via hook params (no env var complexity)

Configuration:
- budget_default: Default budget (10000)
- budget_min: Minimum threshold (1024)
- inject_if_missing: Auto-inject thinking (false)
- log_modifications: Log changes (true)
---
 README.md            |  35 +++-
 src/ccproxy/hooks.py | 117 +++++++++++++
 tests/test_hooks.py  | 406 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 556 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index b46fcee..5bc10cc 100644
--- a/README.md
+++ b/README.md
@@ -232,9 +232,40 @@ general_settings:
 
 See [docs/configuration.md](docs/configuration.md) for more information on how to customize your Claude Code experience using `ccproxy`.
 
-<!-- ## Extended Thinking -->
+## Extended Thinking Budget Hook
 
-<!-- Normally, when you send a message, Claude Code does a simple keyword scan for words/phrases like "think deeply" to determine whether or not to enable thinking, as well the size of the thinking token budget. [Simply including the word "ultrathink](https://claudelog.com/mechanics/ultrathink-plus-plus/) sets the thinking token budget to the maximum of `31999`. -->
+`ccproxy` includes a `thinking_budget` hook that manages Claude's extended thinking `budget_tokens` parameter:
+
+- Injects default `budget_tokens` when thinking is enabled but budget is missing
+- Overrides `budget_tokens` when below a configurable minimum
+- Ensures `budget_tokens < max_tokens` (API constraint)
+- Optionally injects thinking for thinking-capable models
+
+Note: `thinking` is Anthropic-specific. Non-Anthropic providers will ignore it.
+
+### Configuration
+
+```yaml
+ccproxy:
+  hooks:
+    - ccproxy.hooks.rule_evaluator
+    - ccproxy.hooks.model_router
+    - ccproxy.hooks.forward_oauth
+    - ccproxy.hooks.thinking_budget  # Simple form - uses defaults
+
+    # OR with custom parameters:
+    # - hook: ccproxy.hooks.thinking_budget
+    #   params:
+    #     budget_default: 16000      # Default budget (default: 10000)
+    #     budget_min: 4000           # Minimum threshold (default: 1024)
+    #     inject_if_missing: false   # Inject thinking if absent (default: false)
+    #     log_modifications: true    # Log changes (default: true)
+```
+
+### Behavior
+
+- **Existing thinking**: If request has `thinking`, adjusts budget as needed (trusts caller on model choice)
+- **inject_if_missing**: Only injects thinking for Claude 3.7+ and Claude 4 models
 
 ## Routing Rules
 
diff --git a/src/ccproxy/hooks.py b/src/ccproxy/hooks.py
index 5515365..b849088 100644
--- a/src/ccproxy/hooks.py
+++ b/src/ccproxy/hooks.py
@@ -13,6 +13,17 @@
 # Set up structured logging
 logger = logging.getLogger(__name__)
 
+# Minimum budget_tokens allowed by the Anthropic API for extended thinking
+API_MIN_BUDGET_TOKENS = 1024
+
+# Models that support extended thinking (regex patterns)
+# Note: thinking is Anthropic-specific; non-Anthropic providers will ignore it
+THINKING_CAPABLE_PATTERNS = [
+    r"claude-3-7",        # Claude 3.7 (e.g., claude-3-7-sonnet-20250219)
+    r"claude-[a-z]+-4",   # Claude 4.x (e.g., claude-sonnet-4-*, claude-opus-4-*)
+    r"claude-4",          # Future claude-4-* models
+]
+
 # Global storage for request metadata, keyed by litellm_call_id
 # Required because LiteLLM doesn't preserve custom metadata from async_pre_call_hook
 # to logging callbacks - only internal fields like user_id and hidden_params survive.
@@ -429,3 +440,109 @@ def forward_apikey(data: dict[str, Any], user_api_key_dict: dict[str, Any], **kw
         )
 
     return data
+
+
+def _is_thinking_capable_model(model: str | None) -> bool:
+    """Check if the model supports extended thinking."""
+    if not model:
+        return False
+    model_lower = model.lower()
+    return any(re.search(pattern, model_lower) for pattern in THINKING_CAPABLE_PATTERNS)
+
+
+def thinking_budget(data: dict[str, Any], user_api_key_dict: dict[str, Any], **kwargs: Any) -> dict[str, Any]:
+    """Manage thinking budget_tokens for Anthropic extended thinking requests.
+
+    Adjusts budget_tokens when thinking is enabled:
+    - Injects default budget when missing
+    - Overrides budget below minimum threshold
+    - Ensures budget < max_tokens (API constraint)
+
+    Optionally injects thinking config when not present (for thinking-capable models only).
+
+    Note: thinking is Anthropic-specific. Non-Anthropic providers will ignore it.
+
+    Args:
+        data: Request data from LiteLLM
+        user_api_key_dict: User API key dictionary
+        **kwargs: Hook parameters:
+            - budget_default: Default budget (default: 10000)
+            - budget_min: Minimum threshold (default: 1024)
+            - inject_if_missing: Inject thinking if absent (default: False)
+            - log_modifications: Log changes (default: True)
+    """
+    budget_default = kwargs.get("budget_default", 10000)
+    budget_min = kwargs.get("budget_min", API_MIN_BUDGET_TOKENS)
+    inject_if_missing = kwargs.get("inject_if_missing", False)
+    log_modifications = kwargs.get("log_modifications", True)
+
+    # Ensure minimums respect API constraint
+    budget_min = max(budget_min, API_MIN_BUDGET_TOKENS)
+    budget_default = max(budget_default, API_MIN_BUDGET_TOKENS)
+
+    # Get fields from request
+    request = data.get("proxy_server_request", {})
+    body = request.get("body", {})
+    thinking = data.get("thinking") or body.get("thinking")
+    model = data.get("model") or body.get("model")
+    max_tokens = data.get("max_tokens") or body.get("max_tokens")
+
+    modified = False
+    reason = ""
+
+    if thinking is not None:
+        # Request has thinking - adjust budget if needed (trust caller on model choice)
+        if isinstance(thinking, dict) and thinking.get("type") == "enabled":
+            current_budget = thinking.get("budget_tokens")
+
+            if current_budget is None:
+                thinking["budget_tokens"] = budget_default
+                modified = True
+                reason = f"injected budget_tokens={budget_default}"
+            elif current_budget < budget_min:
+                thinking["budget_tokens"] = budget_default
+                modified = True
+                reason = f"increased budget_tokens {current_budget} -> {budget_default} (min: {budget_min})"
+
+            # Ensure budget < max_tokens
+            if max_tokens is not None and thinking.get("budget_tokens", 0) >= max_tokens:
+                if max_tokens <= API_MIN_BUDGET_TOKENS:
+                    logger.warning(f"max_tokens={max_tokens} too low for thinking (min budget: {API_MIN_BUDGET_TOKENS})")
+                else:
+                    thinking["budget_tokens"] = max_tokens - 1
+                    modified = True
+                    reason += f"; capped at {max_tokens - 1} (max_tokens constraint)"
+
+            data["thinking"] = thinking
+
+    elif inject_if_missing:
+        # No thinking present - inject only for thinking-capable models
+        if not _is_thinking_capable_model(model):
+            logger.debug(f"Skipping thinking injection for model: {model}")
+            return data
+
+        if max_tokens is not None and max_tokens <= API_MIN_BUDGET_TOKENS:
+            logger.debug(f"Skipping thinking injection: max_tokens={max_tokens} too low")
+            return data
+
+        budget = budget_default
+        if max_tokens is not None and budget >= max_tokens:
+            budget = max_tokens - 1
+
+        data["thinking"] = {"type": "enabled", "budget_tokens": budget}
+        modified = True
+        reason = f"injected thinking with budget_tokens={budget}"
+
+        # Thinking requires temperature=1
+        current_temp = data.get("temperature") or body.get("temperature")
+        if current_temp is not None and current_temp != 1:
+            data["temperature"] = 1
+            reason += "; set temperature=1"
+
+    if modified and log_modifications:
+        logger.info(
+            f"Adjusted thinking budget for {model}: {reason}",
+            extra={"event": "thinking_budget_modified", "model": model, "reason": reason},
+        )
+
+    return data
diff --git a/tests/test_hooks.py b/tests/test_hooks.py
index dbc58da..773c669 100644
--- a/tests/test_hooks.py
+++ b/tests/test_hooks.py
@@ -9,12 +9,16 @@
 from ccproxy.classifier import RequestClassifier
 from ccproxy.config import clear_config_instance
 from ccproxy.hooks import (
+    API_MIN_BUDGET_TOKENS,
+    THINKING_CAPABLE_PATTERNS,
+    _is_thinking_capable_model,
     capture_headers,
     extract_session_id,
     forward_apikey,
     forward_oauth,
     model_router,
     rule_evaluator,
+    thinking_budget,
 )
 from ccproxy.router import ModelRouter, clear_router
 
@@ -1258,3 +1262,405 @@ def test_extract_session_id_preserves_existing_trace_metadata(self, user_api_key
         assert trace_meta["existing_trace_key"] == "existing_trace_value"
         assert trace_meta["claude_user_hash"] == "hash123"
         assert trace_meta["claude_account_id"] == "acct456"
+
+
+class TestIsThinkingCapableModel:
+    """Test the _is_thinking_capable_model helper function."""
+
+    def test_claude_3_7_sonnet(self):
+        """Test claude-3-7-sonnet is recognized."""
+        assert _is_thinking_capable_model("claude-3-7-sonnet-20250219") is True
+
+    def test_claude_4_models(self):
+        """Test claude-4 models are recognized."""
+        assert _is_thinking_capable_model("claude-4-opus-20250101") is True
+        assert _is_thinking_capable_model("claude-4-sonnet-20250101") is True
+
+    def test_claude_sonnet_4(self):
+        """Test claude-sonnet-4 models are recognized."""
+        assert _is_thinking_capable_model("claude-sonnet-4-5-20250929") is True
+        assert _is_thinking_capable_model("claude-sonnet-4-20250514") is True
+
+    def test_claude_opus_4(self):
+        """Test claude-opus-4 models are recognized."""
+        assert _is_thinking_capable_model("claude-opus-4-5-20251101") is True
+        assert _is_thinking_capable_model("claude-opus-4-20250514") is True
+
+    def test_claude_haiku_4(self):
+        """Test claude-haiku-4 models are recognized."""
+        assert _is_thinking_capable_model("claude-haiku-4-5-20251001") is True
+
+    def test_older_models_not_capable(self):
+        """Test older models are not recognized as thinking-capable."""
+        assert _is_thinking_capable_model("claude-3-5-sonnet-20241022") is False
+        assert _is_thinking_capable_model("claude-3-5-haiku-20241022") is False
+        assert _is_thinking_capable_model("claude-3-opus-20240229") is False
+
+    def test_non_claude_models(self):
+        """Test non-Claude models are not recognized."""
+        assert _is_thinking_capable_model("gpt-4") is False
+        assert _is_thinking_capable_model("gemini-pro") is False
+
+    def test_none_model(self):
+        """Test None model returns False."""
+        assert _is_thinking_capable_model(None) is False
+
+    def test_empty_model(self):
+        """Test empty model returns False."""
+        assert _is_thinking_capable_model("") is False
+
+    def test_case_insensitive(self):
+        """Test model matching is case-insensitive."""
+        assert _is_thinking_capable_model("CLAUDE-SONNET-4-5-20250929") is True
+        assert _is_thinking_capable_model("Claude-Opus-4-5-20251101") is True
+
+
+class TestThinkingBudgetHookBasic:
+    """Test basic thinking_budget hook functionality."""
+
+    def test_no_modification_without_thinking(self, user_api_key_dict):
+        """Test hook doesn't modify request without thinking field."""
+        data = {
+            "model": "claude-sonnet-4-5-20250929",
+            "messages": [{"role": "user", "content": "test message"}],
+            "max_tokens": 16000,
+        }
+        result = thinking_budget(data, user_api_key_dict)
+        assert "thinking" not in result
+
+    def test_preserves_existing_thinking(self, user_api_key_dict):
+        """Test hook preserves existing thinking configuration above minimum."""
+        data = {
+            "model": "claude-sonnet-4-5-20250929",
+            "messages": [{"role": "user", "content": "test message"}],
+            "max_tokens": 16000,
+            "thinking": {
+                "type": "enabled",
+                "budget_tokens": 5000,
+            },
+        }
+        result = thinking_budget(data, user_api_key_dict)
+        assert result["thinking"]["type"] == "enabled"
+        assert result["thinking"]["budget_tokens"] == 5000
+
+    def test_returns_data_dict(self, user_api_key_dict):
+        """Test hook returns a dict."""
+        data = {
+            "model": "claude-sonnet-4-5-20250929",
+            "messages": [{"role": "user", "content": "test message"}],
+            "max_tokens": 16000,
+        }
+        result = thinking_budget(data, user_api_key_dict)
+        assert isinstance(result, dict)
+
+
+class TestThinkingBudgetHookModification:
+    """Test thinking_budget hook modification behavior."""
+
+    def test_inject_missing_budget_tokens(self, user_api_key_dict):
+        """Test hook injects budget_tokens when missing."""
+        data = {
+            "model": "claude-sonnet-4-5-20250929",
+            "max_tokens": 16000,
+            "thinking": {
+                "type": "enabled",
+            },
+        }
+        result = thinking_budget(data, user_api_key_dict)
+        assert result["thinking"]["budget_tokens"] == 10000
+
+    def test_override_low_budget(self, user_api_key_dict):
+        """Test hook overrides budget below minimum."""
+        data = {
+            "model": "claude-sonnet-4-5-20250929",
+            "max_tokens": 16000,
+            "thinking": {
+                "type": "enabled",
+                "budget_tokens": 500,
+            },
+        }
+        result = thinking_budget(data, user_api_key_dict)
+        assert result["thinking"]["budget_tokens"] == 10000
+
+    def test_custom_budget_min(self, user_api_key_dict):
+        """Test hook with custom budget_min parameter."""
+        data = {
+            "model": "claude-sonnet-4-5-20250929",
+            "max_tokens": 16000,
+            "thinking": {
+                "type": "enabled",
+                "budget_tokens": 3000,
+            },
+        }
+        result = thinking_budget(data, user_api_key_dict, budget_min=5000, budget_default=8000)
+        assert result["thinking"]["budget_tokens"] == 8000
+
+    def test_respects_budget_above_min(self, user_api_key_dict):
+        """Test hook doesn't modify budget above minimum."""
+        data = {
+            "model": "claude-sonnet-4-5-20250929",
+            "max_tokens": 16000,
+            "thinking": {
+                "type": "enabled",
+                "budget_tokens": 8000,
+            },
+        }
+        result = thinking_budget(data, user_api_key_dict)
+        assert result["thinking"]["budget_tokens"] == 8000
+
+
+class TestThinkingBudgetHookMaxTokensConstraint:
+    """Test max_tokens constraint handling."""
+
+    def test_adjust_budget_when_exceeds_max_tokens(self, user_api_key_dict):
+        """Test hook adjusts budget when it exceeds max_tokens."""
+        data = {
+            "model": "claude-sonnet-4-5-20250929",
+            "max_tokens": 5000,
+            "thinking": {
+                "type": "enabled",
+                "budget_tokens": 6000,
+            },
+        }
+        result = thinking_budget(data, user_api_key_dict)
+        assert result["thinking"]["budget_tokens"] == 4999
+
+    def test_adjust_budget_equals_max_tokens(self, user_api_key_dict):
+        """Test hook adjusts budget when it equals max_tokens."""
+        data = {
+            "model": "claude-sonnet-4-5-20250929",
+            "max_tokens": 5000,
+            "thinking": {
+                "type": "enabled",
+                "budget_tokens": 5000,
+            },
+        }
+        result = thinking_budget(data, user_api_key_dict)
+        assert result["thinking"]["budget_tokens"] == 4999
+
+    def test_no_adjustment_when_budget_below_max_tokens(self, user_api_key_dict):
+        """Test hook doesn't adjust when budget is properly below max_tokens."""
+        data = {
+            "model": "claude-sonnet-4-5-20250929",
+            "max_tokens": 10000,
+            "thinking": {
+                "type": "enabled",
+                "budget_tokens": 5000,
+            },
+        }
+        result = thinking_budget(data, user_api_key_dict)
+        assert result["thinking"]["budget_tokens"] == 5000
+
+    def test_max_tokens_too_low_warning(self, user_api_key_dict, caplog):
+        """Test warning when max_tokens is too low for any budget."""
+        data = {
+            "model": "claude-sonnet-4-5-20250929",
+            "max_tokens": 500,
+            "thinking": {
+                "type": "enabled",
+                "budget_tokens": 2000,
+            },
+        }
+        with caplog.at_level(logging.WARNING):
+            thinking_budget(data, user_api_key_dict)
+        assert "max_tokens=500 too low for thinking" in caplog.text
+
+
+class TestThinkingBudgetHookInjection:
+    """Test inject_if_missing behavior."""
+
+    def test_inject_thinking_when_missing(self, user_api_key_dict):
+        """Test hook injects thinking when configured to do so."""
+        data = {
+            "model": "claude-sonnet-4-5-20250929",
+            "max_tokens": 16000,
+            "messages": [{"role": "user", "content": "test"}],
+        }
+        result = thinking_budget(data, user_api_key_dict, inject_if_missing=True)
+        assert "thinking" in result
+        assert result["thinking"]["type"] == "enabled"
+        assert result["thinking"]["budget_tokens"] == 10000
+
+    def test_no_inject_by_default(self, user_api_key_dict):
+        """Test hook doesn't inject thinking by default."""
+        data = {
+            "model": "claude-sonnet-4-5-20250929",
+            "messages": [{"role": "user", "content": "test message"}],
+            "max_tokens": 16000,
+        }
+        result = thinking_budget(data, user_api_key_dict)
+        assert "thinking" not in result
+
+    def test_inject_adjusts_for_max_tokens(self, user_api_key_dict):
+        """Test injected thinking respects max_tokens constraint."""
+        data = {
+            "model": "claude-sonnet-4-5-20250929",
+            "max_tokens": 5000,
+            "messages": [{"role": "user", "content": "test"}],
+        }
+        result = thinking_budget(data, user_api_key_dict, inject_if_missing=True, budget_default=10000)
+        assert result["thinking"]["budget_tokens"] == 4999
+
+    def test_inject_skipped_when_max_tokens_too_low(self, user_api_key_dict, caplog):
+        """Test injection is skipped when max_tokens is too low."""
+        data = {
+            "model": "claude-sonnet-4-5-20250929",
+            "max_tokens": 500,
+            "messages": [{"role": "user", "content": "test"}],
+        }
+        with caplog.at_level(logging.DEBUG):
+            result = thinking_budget(data, user_api_key_dict, inject_if_missing=True)
+        assert "thinking" not in result
+        assert "Skipping thinking injection" in caplog.text
+
+    def test_inject_sets_temperature(self, user_api_key_dict):
+        """Test injected thinking sets temperature to 1."""
+        data = {
+            "model": "claude-sonnet-4-5-20250929",
+            "max_tokens": 16000,
+            "temperature": 0.7,
+            "messages": [{"role": "user", "content": "test"}],
+        }
+        result = thinking_budget(data, user_api_key_dict, inject_if_missing=True)
+        assert result["temperature"] == 1
+
+
+class TestThinkingBudgetHookModelFilter:
+    """Test model filtering behavior (only applies to inject_if_missing)."""
+
+    def test_existing_thinking_processed_regardless_of_model(self, user_api_key_dict):
+        """Test hook processes existing thinking even for non-thinking models (trust caller)."""
+        data = {
+            "model": "claude-3-5-sonnet-20241022",  # Old model
+            "max_tokens": 16000,
+            "thinking": {
+                "type": "enabled",
+                "budget_tokens": 500,  # Below default min
+            },
+        }
+        result = thinking_budget(data, user_api_key_dict)
+        # Should still adjust budget - caller explicitly enabled thinking
+        assert result["thinking"]["budget_tokens"] == 10000
+
+    def test_inject_skipped_for_non_thinking_model(self, user_api_key_dict, caplog):
+        """Test inject_if_missing skipped for non-thinking-capable models."""
+        data = {
+            "model": "claude-3-5-sonnet-20241022",  # Old model
+            "max_tokens": 16000,
+            "messages": [{"role": "user", "content": "test"}],
+        }
+        with caplog.at_level(logging.DEBUG):
+            result = thinking_budget(data, user_api_key_dict, inject_if_missing=True)
+        assert "thinking" not in result
+        assert "Skipping thinking injection for model" in caplog.text
+
+    def test_inject_allowed_for_thinking_model(self, user_api_key_dict):
+        """Test inject_if_missing works for thinking-capable models."""
+        data = {
+            "model": "claude-sonnet-4-5-20250929",
+            "max_tokens": 16000,
+            "messages": [{"role": "user", "content": "test"}],
+        }
+        result = thinking_budget(data, user_api_key_dict, inject_if_missing=True)
+        assert "thinking" in result
+        assert result["thinking"]["budget_tokens"] == 10000
+
+
+class TestThinkingBudgetHookLogging:
+    """Test logging behavior."""
+
+    def test_logs_modification(self, user_api_key_dict, caplog):
+        """Test hook logs when it modifies a request."""
+        data = {
+            "model": "claude-sonnet-4-5-20250929",
+            "max_tokens": 16000,
+            "thinking": {
+                "type": "enabled",
+            },
+        }
+        with caplog.at_level(logging.INFO):
+            thinking_budget(data, user_api_key_dict)
+        assert "Adjusted thinking budget" in caplog.text
+        assert "injected budget_tokens" in caplog.text
+
+    def test_disable_logging(self, user_api_key_dict, caplog):
+        """Test hook doesn't log when logging disabled."""
+        data = {
+            "model": "claude-sonnet-4-5-20250929",
+            "max_tokens": 16000,
+            "thinking": {
+                "type": "enabled",
+            },
+        }
+        with caplog.at_level(logging.INFO):
+            thinking_budget(data, user_api_key_dict, log_modifications=False)
+        assert "Adjusted thinking budget" not in caplog.text
+
+
+class TestThinkingBudgetHookEdgeCases:
+    """Test edge cases and error handling."""
+
+    def test_thinking_type_not_enabled(self, user_api_key_dict):
+        """Test hook handles thinking with type != enabled."""
+        data = {
+            "model": "claude-sonnet-4-5-20250929",
+            "max_tokens": 16000,
+            "thinking": {
+                "type": "disabled",
+            },
+        }
+        result = thinking_budget(data, user_api_key_dict)
+        assert result["thinking"]["type"] == "disabled"
+        assert "budget_tokens" not in result["thinking"]
+
+    def test_thinking_not_dict(self, user_api_key_dict):
+        """Test hook handles thinking that's not a dict."""
+        data = {
+            "model": "claude-sonnet-4-5-20250929",
+            "max_tokens": 16000,
+            "thinking": "enabled",
+        }
+        result = thinking_budget(data, user_api_key_dict)
+        assert result["thinking"] == "enabled"
+
+    def test_no_max_tokens(self, user_api_key_dict):
+        """Test hook handles missing max_tokens."""
+        data = {
+            "model": "claude-sonnet-4-5-20250929",
+            "thinking": {
+                "type": "enabled",
+                "budget_tokens": 500,
+            },
+        }
+        result = thinking_budget(data, user_api_key_dict)
+        assert result["thinking"]["budget_tokens"] == 10000
+
+    def test_empty_data(self, user_api_key_dict):
+        """Test hook handles empty data."""
+        data = {}
+        result = thinking_budget(data, user_api_key_dict)
+        assert result == {}
+
+    def test_none_model_with_thinking(self, user_api_key_dict):
+        """Test hook processes thinking even with None model (trust caller)."""
+        data = {
+            "model": None,
+            "max_tokens": 16000,
+            "thinking": {
+                "type": "enabled",
+                "budget_tokens": 500,  # Below min
+            },
+        }
+        result = thinking_budget(data, user_api_key_dict)
+        # Should process - caller explicitly enabled thinking
+        assert result["thinking"]["budget_tokens"] == 10000
+
+    def test_none_model_inject_skipped(self, user_api_key_dict):
+        """Test inject_if_missing skipped with None model."""
+        data = {
+            "model": None,
+            "max_tokens": 16000,
+        }
+        result = thinking_budget(data, user_api_key_dict, inject_if_missing=True)
+        # Should not inject - model not recognized as thinking-capable
+        assert "thinking" not in result