From 7f4b761c45a96462b0b7a510bb6c9d4d5f852b64 Mon Sep 17 00:00:00 2001
From: YuHuang0525 <yu.huang2018@gmail.com>
Date: Sat, 14 Mar 2026 11:55:38 -0700
Subject: [PATCH] fix: use max_completion_tokens for gpt-4.1+, gpt-5.x, and
 o-series models

Newer OpenAI models (gpt-4.1+, gpt-5.x, o1, o3, o4) reject the legacy
max_tokens parameter with HTTP 400 and require max_completion_tokens instead.

_chat_compat() now detects the model prefix at call time and sends the
correct parameter, while all other providers (Gemini, local) continue
using max_tokens unchanged.
---
 CHANGELOG.md          |  7 +++++++
 src/applypilot/llm.py | 11 ++++++++++-
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5682b270..4ab3ee08 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,13 @@ All notable changes to ApplyPilot will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [Unreleased]
+
+### Fixed
+- **OpenAI newer models (gpt-4.1+, gpt-5.x, o-series) rejected with HTTP 400** — these models
+  require `max_completion_tokens` instead of the legacy `max_tokens` parameter. `_chat_compat()`
+  now detects the model prefix and sends the correct parameter automatically.
+
 ## [0.2.0] - 2026-02-17
 
 ### Added
diff --git a/src/applypilot/llm.py b/src/applypilot/llm.py
index 1fb7be64..2d9146db 100644
--- a/src/applypilot/llm.py
+++ b/src/applypilot/llm.py
@@ -157,11 +157,20 @@ def _chat_compat(
         if self.api_key:
             headers["Authorization"] = f"Bearer {self.api_key}"
 
+        # Newer OpenAI models (gpt-4.1+, gpt-5.x, o-series) require
+        # max_completion_tokens instead of the legacy max_tokens parameter.
+        # Sending max_tokens to these models returns HTTP 400.
+        _new_param_models = ("gpt-4.1", "gpt-5", "o1", "o3", "o4")
+        if any(self.model.startswith(p) for p in _new_param_models):
+            token_param: dict[str, int] = {"max_completion_tokens": max_tokens}
+        else:
+            token_param = {"max_tokens": max_tokens}
+
         payload = {
             "model": self.model,
             "messages": messages,
             "temperature": temperature,
-            "max_tokens": max_tokens,
+            **token_param,
         }
 
         resp = self._client.post(