Skip to content

Commit 23919f3

Browse files
nestharusclaude
andcommitted
feat(gemini): add Gemini 3 reasoning effort suffix support (-low/-high)
Add support for gemini-3-pro-preview-low and gemini-3-pro-preview-high model variants via suffix parsing and normalization. Changes: - Add ParseGemini3ReasoningEffortSuffix to parse -low/-high suffixes - Add Gemini3ReasoningEffortFromMetadata to read effort from metadata - Update NormalizeGeminiThinkingModel to handle reasoning effort first - Add injectGemini3ReasoningEffort to inject reasoning_effort into payload - Add IsGemini3Model utility function The -low and -high suffixes: 1. Get normalized to base model for routing 2. Store reasoning effort in metadata 3. Executor injects reasoning_effort into payload 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 39621a0 commit 23919f3

File tree

3 files changed

+109
-8
lines changed

3 files changed

+109
-8
lines changed

internal/runtime/executor/gemini_cli_executor.go

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,10 +60,13 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
6060
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
6161
defer reporter.trackFailure(ctx, &err)
6262

63+
// Inject reasoning_effort for Gemini 3 model variants
64+
payload := injectGemini3ReasoningEffort(req.Model, req.Payload, req.Metadata)
65+
6366
from := opts.SourceFormat
6467
to := sdktranslator.FromString("gemini-cli")
6568
budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata)
66-
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
69+
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(payload), false)
6770
if hasOverride && util.ModelSupportsThinking(req.Model) {
6871
if budgetOverride != nil {
6972
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
@@ -202,10 +205,13 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
202205
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
203206
defer reporter.trackFailure(ctx, &err)
204207

208+
// Inject reasoning_effort for Gemini 3 model variants
209+
payload := injectGemini3ReasoningEffort(req.Model, req.Payload, req.Metadata)
210+
205211
from := opts.SourceFormat
206212
to := sdktranslator.FromString("gemini-cli")
207213
budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata)
208-
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
214+
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(payload), true)
209215
if hasOverride && util.ModelSupportsThinking(req.Model) {
210216
if budgetOverride != nil {
211217
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
@@ -780,6 +786,27 @@ func newGeminiStatusErr(statusCode int, body []byte) statusErr {
780786
return err
781787
}
782788

789+
// injectGemini3ReasoningEffort injects reasoning_effort for Gemini 3 model variants.
790+
// Checks metadata first (from normalized model), then falls back to model name suffix.
791+
func injectGemini3ReasoningEffort(modelName string, body []byte, metadata map[string]any) []byte {
792+
if gjson.GetBytes(body, "reasoning_effort").Exists() {
793+
return body
794+
}
795+
// First check metadata (set during model normalization)
796+
if effort, ok := util.Gemini3ReasoningEffortFromMetadata(metadata); ok {
797+
body, _ = sjson.SetBytes(body, "reasoning_effort", effort)
798+
return body
799+
}
800+
// Fall back to model name suffix check
801+
switch {
802+
case strings.HasSuffix(modelName, "-low"):
803+
body, _ = sjson.SetBytes(body, "reasoning_effort", "low")
804+
case strings.HasSuffix(modelName, "-high"):
805+
body, _ = sjson.SetBytes(body, "reasoning_effort", "high")
806+
}
807+
return body
808+
}
809+
783810
// parseRetryDelay extracts the retry delay from a Google API 429 error response.
784811
// The error response contains a RetryInfo.retryDelay field in the format "0.847655010s".
785812
// Returns the parsed duration or an error if it cannot be determined.

internal/runtime/executor/gemini_executor.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,10 +75,13 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
7575
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
7676
defer reporter.trackFailure(ctx, &err)
7777

78+
// Inject reasoning_effort for Gemini 3 model variants
79+
payload := injectGemini3ReasoningEffort(req.Model, req.Payload, req.Metadata)
80+
7881
// Official Gemini API via API key or OAuth bearer
7982
from := opts.SourceFormat
8083
to := sdktranslator.FromString("gemini")
81-
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
84+
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(payload), false)
8285
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
8386
if budgetOverride != nil {
8487
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
@@ -171,9 +174,12 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
171174
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
172175
defer reporter.trackFailure(ctx, &err)
173176

177+
// Inject reasoning_effort for Gemini 3 model variants
178+
payload := injectGemini3ReasoningEffort(req.Model, req.Payload, req.Metadata)
179+
174180
from := opts.SourceFormat
175181
to := sdktranslator.FromString("gemini")
176-
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
182+
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(payload), true)
177183
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
178184
if budgetOverride != nil {
179185
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)

internal/util/gemini_thinking.go

Lines changed: 72 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,13 +64,29 @@ func ParseGeminiThinkingSuffix(model string) (string, *int, *bool, bool) {
6464
}
6565

6666
func NormalizeGeminiThinkingModel(modelName string) (string, map[string]any) {
67-
baseModel, budget, include, matched := ParseGeminiThinkingSuffix(modelName)
67+
// First try Gemini 3 reasoning effort normalization (-low/-high suffix)
68+
workingModel := modelName
69+
var metadata map[string]any
70+
71+
if base, effort, matched := ParseGemini3ReasoningEffortSuffix(modelName); matched {
72+
workingModel = base
73+
metadata = map[string]any{
74+
Gemini3OriginalModelMetadataKey: modelName,
75+
Gemini3ReasoningEffortMetadataKey: effort,
76+
}
77+
}
78+
79+
// Then try thinking suffix normalization on the (possibly already normalized) model
80+
baseModel, budget, include, matched := ParseGeminiThinkingSuffix(workingModel)
6881
if !matched {
69-
return baseModel, nil
82+
return workingModel, metadata
7083
}
71-
metadata := map[string]any{
72-
GeminiOriginalModelMetadataKey: modelName,
84+
85+
// Merge thinking metadata
86+
if metadata == nil {
87+
metadata = map[string]any{}
7388
}
89+
metadata[GeminiOriginalModelMetadataKey] = modelName
7490
if budget != nil {
7591
metadata[GeminiThinkingBudgetMetadataKey] = *budget
7692
}
@@ -259,3 +275,55 @@ func ConvertThinkingLevelToBudget(body []byte) []byte {
259275
}
260276
return updated
261277
}
278+
279+
const (
280+
Gemini3ReasoningEffortMetadataKey = "gemini3_reasoning_effort"
281+
Gemini3OriginalModelMetadataKey = "gemini3_original_model"
282+
)
283+
284+
// ParseGemini3ReasoningEffortSuffix parses -low or -high suffix from Gemini 3 model names.
285+
// Returns the base model, reasoning effort ("low" or "high"), and whether a match was found.
286+
func ParseGemini3ReasoningEffortSuffix(model string) (baseModel string, reasoningEffort string, matched bool) {
287+
if model == "" {
288+
return model, "", false
289+
}
290+
lower := strings.ToLower(model)
291+
if !strings.HasPrefix(lower, "gemini-3-") {
292+
return model, "", false
293+
}
294+
295+
if strings.HasSuffix(lower, "-low") {
296+
return model[:len(model)-len("-low")], "low", true
297+
}
298+
if strings.HasSuffix(lower, "-high") {
299+
return model[:len(model)-len("-high")], "high", true
300+
}
301+
return model, "", false
302+
}
303+
304+
// NormalizeGemini3ReasoningEffortModel normalizes a Gemini 3 model with -low/-high suffix.
305+
// Returns the base model name and metadata containing the reasoning effort.
306+
func NormalizeGemini3ReasoningEffortModel(modelName string) (string, map[string]any) {
307+
baseModel, effort, matched := ParseGemini3ReasoningEffortSuffix(modelName)
308+
if !matched {
309+
return modelName, nil
310+
}
311+
metadata := map[string]any{
312+
Gemini3OriginalModelMetadataKey: modelName,
313+
Gemini3ReasoningEffortMetadataKey: effort,
314+
}
315+
return baseModel, metadata
316+
}
317+
318+
// Gemini3ReasoningEffortFromMetadata extracts reasoning effort from metadata.
319+
func Gemini3ReasoningEffortFromMetadata(metadata map[string]any) (string, bool) {
320+
if len(metadata) == 0 {
321+
return "", false
322+
}
323+
if effort, ok := metadata[Gemini3ReasoningEffortMetadataKey]; ok {
324+
if s, ok := effort.(string); ok && s != "" {
325+
return s, true
326+
}
327+
}
328+
return "", false
329+
}

0 commit comments

Comments
 (0)