From 7f4b761c45a96462b0b7a510bb6c9d4d5f852b64 Mon Sep 17 00:00:00 2001 From: YuHuang0525 Date: Sat, 14 Mar 2026 11:55:38 -0700 Subject: [PATCH] fix: use max_completion_tokens for gpt-4.1+, gpt-5.x, and o-series models Newer OpenAI models (gpt-4.1+, gpt-5.x, o1, o3, o4) reject the legacy max_tokens parameter with HTTP 400 and require max_completion_tokens instead. _chat_compat() now detects the model prefix at call time and sends the correct parameter, while all other providers (Gemini, local) continue using max_tokens unchanged. --- CHANGELOG.md | 7 +++++++ src/applypilot/llm.py | 11 ++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5682b270..4ab3ee08 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ All notable changes to ApplyPilot will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Fixed +- **OpenAI newer models (gpt-4.1+, gpt-5.x, o-series) rejected with HTTP 400** — these models + require `max_completion_tokens` instead of the legacy `max_tokens` parameter. `_chat_compat()` + now detects the model prefix and sends the correct parameter automatically. + ## [0.2.0] - 2026-02-17 ### Added diff --git a/src/applypilot/llm.py b/src/applypilot/llm.py index 1fb7be64..2d9146db 100644 --- a/src/applypilot/llm.py +++ b/src/applypilot/llm.py @@ -157,11 +157,20 @@ def _chat_compat( if self.api_key: headers["Authorization"] = f"Bearer {self.api_key}" + # Newer OpenAI models (gpt-4.1+, gpt-5.x, o-series) require + # max_completion_tokens instead of the legacy max_tokens parameter. + # Sending max_tokens to these models returns HTTP 400. + _new_param_models = ("gpt-4.1", "gpt-5", "o1", "o3", "o4") + if any(self.model.startswith(p) for p in _new_param_models): + token_param: dict[str, int] = {"max_completion_tokens": max_tokens} + else: + token_param = {"max_tokens": max_tokens} + payload = { "model": self.model, "messages": messages, "temperature": temperature, - "max_tokens": max_tokens, + **token_param, } resp = self._client.post(