diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go index 0a4477f7..4f3dec56 100644 --- a/internal/runtime/executor/gemini_cli_executor.go +++ b/internal/runtime/executor/gemini_cli_executor.go @@ -60,10 +60,20 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) + // Inject reasoning_effort for Gemini 3 model variants + payload := injectGemini3ReasoningEffort(req.Model, req.Payload, req.Metadata) + from := opts.SourceFormat to := sdktranslator.FromString("gemini-cli") - basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model) + budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata) + basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(payload), false) + if hasOverride && util.ModelSupportsThinking(req.Model) { + if budgetOverride != nil { + norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + budgetOverride = &norm + } + basePayload = util.ApplyGeminiCLIThinkingConfig(basePayload, budgetOverride, includeOverride) + } basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload) basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload) @@ -195,10 +205,20 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) + // Inject reasoning_effort for Gemini 3 model variants + payload := injectGemini3ReasoningEffort(req.Model, req.Payload, req.Metadata) + from := opts.SourceFormat to := sdktranslator.FromString("gemini-cli") - basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model) + budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata) + basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(payload), true) + if hasOverride && util.ModelSupportsThinking(req.Model) { + if budgetOverride != nil { + norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + budgetOverride = &norm + } + basePayload = util.ApplyGeminiCLIThinkingConfig(basePayload, budgetOverride, includeOverride) + } basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload) basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload) @@ -759,6 +779,27 @@ func newGeminiStatusErr(statusCode int, body []byte) statusErr { return err } +// injectGemini3ReasoningEffort injects reasoning_effort for Gemini 3 model variants. +// Checks metadata first (from normalized model), then falls back to model name suffix. +func injectGemini3ReasoningEffort(modelName string, body []byte, metadata map[string]any) []byte { + if gjson.GetBytes(body, "reasoning_effort").Exists() { + return body + } + // First check metadata (set during model normalization) + if effort, ok := util.Gemini3ReasoningEffortFromMetadata(metadata); ok { + body, _ = sjson.SetBytes(body, "reasoning_effort", effort) + return body + } + // Fall back to model name suffix check + switch { + case strings.HasSuffix(modelName, "-low"): + body, _ = sjson.SetBytes(body, "reasoning_effort", "low") + case strings.HasSuffix(modelName, "-high"): + body, _ = sjson.SetBytes(body, "reasoning_effort", "high") + } + return body +} + // parseRetryDelay extracts the retry delay from a Google API 429 error response. // The error response contains a RetryInfo.retryDelay field in the format "0.847655010s". // Returns the parsed duration or an error if it cannot be determined. diff --git a/internal/runtime/executor/gemini_executor.go b/internal/runtime/executor/gemini_executor.go index fc7b8e19..08f45f18 100644 --- a/internal/runtime/executor/gemini_executor.go +++ b/internal/runtime/executor/gemini_executor.go @@ -75,11 +75,20 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) + // Inject reasoning_effort for Gemini 3 model variants + payload := injectGemini3ReasoningEffort(req.Model, req.Payload, req.Metadata) + // Official Gemini API via API key or OAuth bearer from := opts.SourceFormat to := sdktranslator.FromString("gemini") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - body = applyThinkingMetadata(body, req.Metadata, req.Model) + body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(payload), false) + if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) { + if budgetOverride != nil { + norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + budgetOverride = &norm + } + body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) + } body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body) body = applyPayloadConfig(e.cfg, req.Model, body) @@ -165,10 +174,19 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) + // Inject reasoning_effort for Gemini 3 model variants + payload := injectGemini3ReasoningEffort(req.Model, req.Payload, req.Metadata) + from := opts.SourceFormat to := sdktranslator.FromString("gemini") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - body = applyThinkingMetadata(body, req.Metadata, req.Model) + body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(payload), true) + if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) { + if budgetOverride != nil { + norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + budgetOverride = &norm + } + body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) + } body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body) body = applyPayloadConfig(e.cfg, req.Model, body) diff --git a/internal/util/gemini_thinking.go b/internal/util/gemini_thinking.go index 14077fa0..2b23e3b0 100644 --- a/internal/util/gemini_thinking.go +++ b/internal/util/gemini_thinking.go @@ -73,13 +73,29 @@ func ParseGeminiThinkingSuffix(model string) (string, *int, *bool, bool) { } func NormalizeGeminiThinkingModel(modelName string) (string, map[string]any) { - baseModel, budget, include, matched := ParseGeminiThinkingSuffix(modelName) + // First try Gemini 3 reasoning effort normalization (-low/-high suffix) + workingModel := modelName + var metadata map[string]any + + if base, effort, matched := ParseGemini3ReasoningEffortSuffix(modelName); matched { + workingModel = base + metadata = map[string]any{ + Gemini3OriginalModelMetadataKey: modelName, + Gemini3ReasoningEffortMetadataKey: effort, + } + } + + // Then try thinking suffix normalization on the (possibly already normalized) model + baseModel, budget, include, matched := ParseGeminiThinkingSuffix(workingModel) if !matched { - return baseModel, nil + return workingModel, metadata } - metadata := map[string]any{ - GeminiOriginalModelMetadataKey: modelName, + + // Merge thinking metadata + if metadata == nil { + metadata = map[string]any{} } + metadata[GeminiOriginalModelMetadataKey] = modelName if budget != nil { metadata[GeminiThinkingBudgetMetadataKey] = *budget } @@ -268,3 +284,55 @@ func ConvertThinkingLevelToBudget(body []byte) []byte { } return updated } + +const ( + Gemini3ReasoningEffortMetadataKey = "gemini3_reasoning_effort" + Gemini3OriginalModelMetadataKey = "gemini3_original_model" +) + +// ParseGemini3ReasoningEffortSuffix parses -low or -high suffix from Gemini 3 model names. +// Returns the base model, reasoning effort ("low" or "high"), and whether a match was found. +func ParseGemini3ReasoningEffortSuffix(model string) (baseModel string, reasoningEffort string, matched bool) { + if model == "" { + return model, "", false + } + lower := strings.ToLower(model) + if !strings.HasPrefix(lower, "gemini-3-") { + return model, "", false + } + + if strings.HasSuffix(lower, "-low") { + return model[:len(model)-len("-low")], "low", true + } + if strings.HasSuffix(lower, "-high") { + return model[:len(model)-len("-high")], "high", true + } + return model, "", false +} + +// NormalizeGemini3ReasoningEffortModel normalizes a Gemini 3 model with -low/-high suffix. +// Returns the base model name and metadata containing the reasoning effort. +func NormalizeGemini3ReasoningEffortModel(modelName string) (string, map[string]any) { + baseModel, effort, matched := ParseGemini3ReasoningEffortSuffix(modelName) + if !matched { + return modelName, nil + } + metadata := map[string]any{ + Gemini3OriginalModelMetadataKey: modelName, + Gemini3ReasoningEffortMetadataKey: effort, + } + return baseModel, metadata +} + +// Gemini3ReasoningEffortFromMetadata extracts reasoning effort from metadata. +func Gemini3ReasoningEffortFromMetadata(metadata map[string]any) (string, bool) { + if len(metadata) == 0 { + return "", false + } + if effort, ok := metadata[Gemini3ReasoningEffortMetadataKey]; ok { + if s, ok := effort.(string); ok && s != "" { + return s, true + } + } + return "", false +}