Skip to content

Commit 4a0b6c0

Browse files
nestharusclaude
andcommitted
feat(gemini): add Gemini 3 reasoning effort suffix support (-low/-high)
Add support for gemini-3-pro-preview-low and gemini-3-pro-preview-high model variants via suffix parsing and normalization. Changes: - Add ParseGemini3ReasoningEffortSuffix to parse -low/-high suffixes - Add Gemini3ReasoningEffortFromMetadata to read effort from metadata - Update NormalizeGeminiThinkingModel to handle reasoning effort first - Add injectGemini3ReasoningEffort to inject reasoning_effort into payload - Add IsGemini3Model utility function The -low and -high suffixes: 1. Get normalized to base model for routing 2. Store reasoning effort in metadata 3. Executor injects reasoning_effort into payload 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 39621a0 commit 4a0b6c0

File tree

3 files changed

+115
-8
lines changed

3 files changed

+115
-8
lines changed

internal/runtime/executor/gemini_cli_executor.go

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,10 +60,13 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
6060
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
6161
defer reporter.trackFailure(ctx, &err)
6262

63+
// Inject reasoning_effort for Gemini 3 model variants
64+
payload := injectGemini3ReasoningEffort(req.Model, req.Payload, req.Metadata)
65+
6366
from := opts.SourceFormat
6467
to := sdktranslator.FromString("gemini-cli")
6568
budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata)
66-
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
69+
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(payload), false)
6770
if hasOverride && util.ModelSupportsThinking(req.Model) {
6871
if budgetOverride != nil {
6972
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
@@ -202,10 +205,13 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
202205
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
203206
defer reporter.trackFailure(ctx, &err)
204207

208+
// Inject reasoning_effort for Gemini 3 model variants
209+
payload := injectGemini3ReasoningEffort(req.Model, req.Payload, req.Metadata)
210+
205211
from := opts.SourceFormat
206212
to := sdktranslator.FromString("gemini-cli")
207213
budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata)
208-
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
214+
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(payload), true)
209215
if hasOverride && util.ModelSupportsThinking(req.Model) {
210216
if budgetOverride != nil {
211217
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
@@ -780,6 +786,27 @@ func newGeminiStatusErr(statusCode int, body []byte) statusErr {
780786
return err
781787
}
782788

789+
// injectGemini3ReasoningEffort injects reasoning_effort for Gemini 3 model variants.
790+
// Checks metadata first (from normalized model), then falls back to model name suffix.
791+
func injectGemini3ReasoningEffort(modelName string, body []byte, metadata map[string]any) []byte {
792+
if gjson.GetBytes(body, "reasoning_effort").Exists() {
793+
return body
794+
}
795+
// First check metadata (set during model normalization)
796+
if effort, ok := util.Gemini3ReasoningEffortFromMetadata(metadata); ok {
797+
body, _ = sjson.SetBytes(body, "reasoning_effort", effort)
798+
return body
799+
}
800+
// Fall back to model name suffix check
801+
switch {
802+
case strings.HasSuffix(modelName, "-low"):
803+
body, _ = sjson.SetBytes(body, "reasoning_effort", "low")
804+
case strings.HasSuffix(modelName, "-high"):
805+
body, _ = sjson.SetBytes(body, "reasoning_effort", "high")
806+
}
807+
return body
808+
}
809+
783810
// parseRetryDelay extracts the retry delay from a Google API 429 error response.
784811
// The error response contains a RetryInfo.retryDelay field in the format "0.847655010s".
785812
// Returns the parsed duration or an error if it cannot be determined.

internal/runtime/executor/gemini_executor.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,10 +75,13 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
7575
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
7676
defer reporter.trackFailure(ctx, &err)
7777

78+
// Inject reasoning_effort for Gemini 3 model variants
79+
payload := injectGemini3ReasoningEffort(req.Model, req.Payload, req.Metadata)
80+
7881
// Official Gemini API via API key or OAuth bearer
7982
from := opts.SourceFormat
8083
to := sdktranslator.FromString("gemini")
81-
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
84+
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(payload), false)
8285
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
8386
if budgetOverride != nil {
8487
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
@@ -171,9 +174,12 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
171174
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
172175
defer reporter.trackFailure(ctx, &err)
173176

177+
// Inject reasoning_effort for Gemini 3 model variants
178+
payload := injectGemini3ReasoningEffort(req.Model, req.Payload, req.Metadata)
179+
174180
from := opts.SourceFormat
175181
to := sdktranslator.FromString("gemini")
176-
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
182+
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(payload), true)
177183
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
178184
if budgetOverride != nil {
179185
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)

internal/util/gemini_thinking.go

Lines changed: 78 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,13 +64,29 @@ func ParseGeminiThinkingSuffix(model string) (string, *int, *bool, bool) {
6464
}
6565

6666
func NormalizeGeminiThinkingModel(modelName string) (string, map[string]any) {
67-
baseModel, budget, include, matched := ParseGeminiThinkingSuffix(modelName)
67+
// First try Gemini 3 reasoning effort normalization (-low/-high suffix)
68+
workingModel := modelName
69+
var metadata map[string]any
70+
71+
if base, effort, matched := ParseGemini3ReasoningEffortSuffix(modelName); matched {
72+
workingModel = base
73+
metadata = map[string]any{
74+
Gemini3OriginalModelMetadataKey: modelName,
75+
Gemini3ReasoningEffortMetadataKey: effort,
76+
}
77+
}
78+
79+
// Then try thinking suffix normalization on the (possibly already normalized) model
80+
baseModel, budget, include, matched := ParseGeminiThinkingSuffix(workingModel)
6881
if !matched {
69-
return baseModel, nil
82+
return workingModel, metadata
7083
}
71-
metadata := map[string]any{
72-
GeminiOriginalModelMetadataKey: modelName,
84+
85+
// Merge thinking metadata
86+
if metadata == nil {
87+
metadata = map[string]any{}
7388
}
89+
metadata[GeminiOriginalModelMetadataKey] = modelName
7490
if budget != nil {
7591
metadata[GeminiThinkingBudgetMetadataKey] = *budget
7692
}
@@ -259,3 +275,61 @@ func ConvertThinkingLevelToBudget(body []byte) []byte {
259275
}
260276
return updated
261277
}
278+
279+
// IsGemini3Model returns true if the model is a Gemini 3 model (uses thinkingLevel instead of thinkingBudget).
280+
func IsGemini3Model(model string) bool {
281+
lower := strings.ToLower(model)
282+
return strings.HasPrefix(lower, "gemini-3-")
283+
}
284+
285+
const (
286+
Gemini3ReasoningEffortMetadataKey = "gemini3_reasoning_effort"
287+
Gemini3OriginalModelMetadataKey = "gemini3_original_model"
288+
)
289+
290+
// ParseGemini3ReasoningEffortSuffix parses -low or -high suffix from Gemini 3 model names.
291+
// Returns the base model, reasoning effort ("low" or "high"), and whether a match was found.
292+
func ParseGemini3ReasoningEffortSuffix(model string) (baseModel string, reasoningEffort string, matched bool) {
293+
if model == "" {
294+
return model, "", false
295+
}
296+
lower := strings.ToLower(model)
297+
if !strings.HasPrefix(lower, "gemini-3-") {
298+
return model, "", false
299+
}
300+
301+
if strings.HasSuffix(lower, "-low") {
302+
return model[:len(model)-len("-low")], "low", true
303+
}
304+
if strings.HasSuffix(lower, "-high") {
305+
return model[:len(model)-len("-high")], "high", true
306+
}
307+
return model, "", false
308+
}
309+
310+
// NormalizeGemini3ReasoningEffortModel normalizes a Gemini 3 model with -low/-high suffix.
311+
// Returns the base model name and metadata containing the reasoning effort.
312+
func NormalizeGemini3ReasoningEffortModel(modelName string) (string, map[string]any) {
313+
baseModel, effort, matched := ParseGemini3ReasoningEffortSuffix(modelName)
314+
if !matched {
315+
return modelName, nil
316+
}
317+
metadata := map[string]any{
318+
Gemini3OriginalModelMetadataKey: modelName,
319+
Gemini3ReasoningEffortMetadataKey: effort,
320+
}
321+
return baseModel, metadata
322+
}
323+
324+
// Gemini3ReasoningEffortFromMetadata extracts reasoning effort from metadata.
325+
func Gemini3ReasoningEffortFromMetadata(metadata map[string]any) (string, bool) {
326+
if len(metadata) == 0 {
327+
return "", false
328+
}
329+
if effort, ok := metadata[Gemini3ReasoningEffortMetadataKey]; ok {
330+
if s, ok := effort.(string); ok && s != "" {
331+
return s, true
332+
}
333+
}
334+
return "", false
335+
}

0 commit comments

Comments
 (0)