Skip to content

Commit 3f6220f

Browse files
nestharusclaude
andcommitted
feat(gemini): add Gemini 3 reasoning effort suffix support (-low/-high)
Add support for gemini-3-pro-preview-low and gemini-3-pro-preview-high model variants via suffix parsing and normalization. Changes: - Add ParseGemini3ReasoningEffortSuffix to parse -low/-high suffixes - Add Gemini3ReasoningEffortFromMetadata to read effort from metadata - Update NormalizeGeminiThinkingModel to handle reasoning effort first - Add injectGemini3ReasoningEffort to inject reasoning_effort into payload - Add IsGemini3Model utility function The -low and -high suffixes: 1. Get normalized to base model for routing 2. Store reasoning effort in metadata 3. Executor injects reasoning_effort into payload 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 54e2411 commit 3f6220f

File tree

3 files changed

+139
-12
lines changed

3 files changed

+139
-12
lines changed

internal/runtime/executor/gemini_cli_executor.go

Lines changed: 45 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,10 +60,20 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
6060
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
6161
defer reporter.trackFailure(ctx, &err)
6262

63+
// Inject reasoning_effort for Gemini 3 model variants
64+
payload := injectGemini3ReasoningEffort(req.Model, req.Payload, req.Metadata)
65+
6366
from := opts.SourceFormat
6467
to := sdktranslator.FromString("gemini-cli")
65-
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
66-
basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
68+
budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata)
69+
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(payload), false)
70+
if hasOverride && util.ModelSupportsThinking(req.Model) {
71+
if budgetOverride != nil {
72+
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
73+
budgetOverride = &norm
74+
}
75+
basePayload = util.ApplyGeminiCLIThinkingConfig(basePayload, budgetOverride, includeOverride)
76+
}
6777
basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
6878
basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
6979
basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload)
@@ -195,10 +205,20 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
195205
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
196206
defer reporter.trackFailure(ctx, &err)
197207

208+
// Inject reasoning_effort for Gemini 3 model variants
209+
payload := injectGemini3ReasoningEffort(req.Model, req.Payload, req.Metadata)
210+
198211
from := opts.SourceFormat
199212
to := sdktranslator.FromString("gemini-cli")
200-
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
201-
basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
213+
budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata)
214+
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(payload), true)
215+
if hasOverride && util.ModelSupportsThinking(req.Model) {
216+
if budgetOverride != nil {
217+
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
218+
budgetOverride = &norm
219+
}
220+
basePayload = util.ApplyGeminiCLIThinkingConfig(basePayload, budgetOverride, includeOverride)
221+
}
202222
basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
203223
basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
204224
basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload)
@@ -759,6 +779,27 @@ func newGeminiStatusErr(statusCode int, body []byte) statusErr {
759779
return err
760780
}
761781

782+
// injectGemini3ReasoningEffort injects reasoning_effort for Gemini 3 model variants.
783+
// Checks metadata first (from normalized model), then falls back to model name suffix.
784+
func injectGemini3ReasoningEffort(modelName string, body []byte, metadata map[string]any) []byte {
785+
if gjson.GetBytes(body, "reasoning_effort").Exists() {
786+
return body
787+
}
788+
// First check metadata (set during model normalization)
789+
if effort, ok := util.Gemini3ReasoningEffortFromMetadata(metadata); ok {
790+
body, _ = sjson.SetBytes(body, "reasoning_effort", effort)
791+
return body
792+
}
793+
// Fall back to model name suffix check
794+
switch {
795+
case strings.HasSuffix(modelName, "-low"):
796+
body, _ = sjson.SetBytes(body, "reasoning_effort", "low")
797+
case strings.HasSuffix(modelName, "-high"):
798+
body, _ = sjson.SetBytes(body, "reasoning_effort", "high")
799+
}
800+
return body
801+
}
802+
762803
// parseRetryDelay extracts the retry delay from a Google API 429 error response.
763804
// The error response contains a RetryInfo.retryDelay field in the format "0.847655010s".
764805
// Returns the parsed duration or an error if it cannot be determined.

internal/runtime/executor/gemini_executor.go

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,11 +75,20 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
7575
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
7676
defer reporter.trackFailure(ctx, &err)
7777

78+
// Inject reasoning_effort for Gemini 3 model variants
79+
payload := injectGemini3ReasoningEffort(req.Model, req.Payload, req.Metadata)
80+
7881
// Official Gemini API via API key or OAuth bearer
7982
from := opts.SourceFormat
8083
to := sdktranslator.FromString("gemini")
81-
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
82-
body = applyThinkingMetadata(body, req.Metadata, req.Model)
84+
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(payload), false)
85+
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
86+
if budgetOverride != nil {
87+
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
88+
budgetOverride = &norm
89+
}
90+
body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
91+
}
8392
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
8493
body = fixGeminiImageAspectRatio(req.Model, body)
8594
body = applyPayloadConfig(e.cfg, req.Model, body)
@@ -165,10 +174,19 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
165174
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
166175
defer reporter.trackFailure(ctx, &err)
167176

177+
// Inject reasoning_effort for Gemini 3 model variants
178+
payload := injectGemini3ReasoningEffort(req.Model, req.Payload, req.Metadata)
179+
168180
from := opts.SourceFormat
169181
to := sdktranslator.FromString("gemini")
170-
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
171-
body = applyThinkingMetadata(body, req.Metadata, req.Model)
182+
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(payload), true)
183+
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
184+
if budgetOverride != nil {
185+
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
186+
budgetOverride = &norm
187+
}
188+
body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
189+
}
172190
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
173191
body = fixGeminiImageAspectRatio(req.Model, body)
174192
body = applyPayloadConfig(e.cfg, req.Model, body)

internal/util/gemini_thinking.go

Lines changed: 72 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,29 @@ func ParseGeminiThinkingSuffix(model string) (string, *int, *bool, bool) {
7373
}
7474

7575
func NormalizeGeminiThinkingModel(modelName string) (string, map[string]any) {
76-
baseModel, budget, include, matched := ParseGeminiThinkingSuffix(modelName)
76+
// First try Gemini 3 reasoning effort normalization (-low/-high suffix)
77+
workingModel := modelName
78+
var metadata map[string]any
79+
80+
if base, effort, matched := ParseGemini3ReasoningEffortSuffix(modelName); matched {
81+
workingModel = base
82+
metadata = map[string]any{
83+
Gemini3OriginalModelMetadataKey: modelName,
84+
Gemini3ReasoningEffortMetadataKey: effort,
85+
}
86+
}
87+
88+
// Then try thinking suffix normalization on the (possibly already normalized) model
89+
baseModel, budget, include, matched := ParseGeminiThinkingSuffix(workingModel)
7790
if !matched {
78-
return baseModel, nil
91+
return workingModel, metadata
7992
}
80-
metadata := map[string]any{
81-
GeminiOriginalModelMetadataKey: modelName,
93+
94+
// Merge thinking metadata
95+
if metadata == nil {
96+
metadata = map[string]any{}
8297
}
98+
metadata[GeminiOriginalModelMetadataKey] = modelName
8399
if budget != nil {
84100
metadata[GeminiThinkingBudgetMetadataKey] = *budget
85101
}
@@ -268,3 +284,55 @@ func ConvertThinkingLevelToBudget(body []byte) []byte {
268284
}
269285
return updated
270286
}
287+
288+
const (
289+
Gemini3ReasoningEffortMetadataKey = "gemini3_reasoning_effort"
290+
Gemini3OriginalModelMetadataKey = "gemini3_original_model"
291+
)
292+
293+
// ParseGemini3ReasoningEffortSuffix parses -low or -high suffix from Gemini 3 model names.
294+
// Returns the base model, reasoning effort ("low" or "high"), and whether a match was found.
295+
func ParseGemini3ReasoningEffortSuffix(model string) (baseModel string, reasoningEffort string, matched bool) {
296+
if model == "" {
297+
return model, "", false
298+
}
299+
lower := strings.ToLower(model)
300+
if !strings.HasPrefix(lower, "gemini-3-") {
301+
return model, "", false
302+
}
303+
304+
if strings.HasSuffix(lower, "-low") {
305+
return model[:len(model)-len("-low")], "low", true
306+
}
307+
if strings.HasSuffix(lower, "-high") {
308+
return model[:len(model)-len("-high")], "high", true
309+
}
310+
return model, "", false
311+
}
312+
313+
// NormalizeGemini3ReasoningEffortModel normalizes a Gemini 3 model with -low/-high suffix.
314+
// Returns the base model name and metadata containing the reasoning effort.
315+
func NormalizeGemini3ReasoningEffortModel(modelName string) (string, map[string]any) {
316+
baseModel, effort, matched := ParseGemini3ReasoningEffortSuffix(modelName)
317+
if !matched {
318+
return modelName, nil
319+
}
320+
metadata := map[string]any{
321+
Gemini3OriginalModelMetadataKey: modelName,
322+
Gemini3ReasoningEffortMetadataKey: effort,
323+
}
324+
return baseModel, metadata
325+
}
326+
327+
// Gemini3ReasoningEffortFromMetadata extracts reasoning effort from metadata.
328+
func Gemini3ReasoningEffortFromMetadata(metadata map[string]any) (string, bool) {
329+
if len(metadata) == 0 {
330+
return "", false
331+
}
332+
if effort, ok := metadata[Gemini3ReasoningEffortMetadataKey]; ok {
333+
if s, ok := effort.(string); ok && s != "" {
334+
return s, true
335+
}
336+
}
337+
return "", false
338+
}

0 commit comments

Comments
 (0)