Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 45 additions & 4 deletions internal/runtime/executor/gemini_cli_executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,20 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
defer reporter.trackFailure(ctx, &err)

// Inject reasoning_effort for Gemini 3 model variants
payload := injectGemini3ReasoningEffort(req.Model, req.Payload, req.Metadata)

from := opts.SourceFormat
to := sdktranslator.FromString("gemini-cli")
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata)
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(payload), false)
if hasOverride && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
basePayload = util.ApplyGeminiCLIThinkingConfig(basePayload, budgetOverride, includeOverride)
}
basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload)
Expand Down Expand Up @@ -195,10 +205,20 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
defer reporter.trackFailure(ctx, &err)

// Inject reasoning_effort for Gemini 3 model variants
payload := injectGemini3ReasoningEffort(req.Model, req.Payload, req.Metadata)

from := opts.SourceFormat
to := sdktranslator.FromString("gemini-cli")
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata)
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(payload), true)
if hasOverride && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
basePayload = util.ApplyGeminiCLIThinkingConfig(basePayload, budgetOverride, includeOverride)
}
basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload)
Expand Down Expand Up @@ -759,6 +779,27 @@ func newGeminiStatusErr(statusCode int, body []byte) statusErr {
return err
}

// injectGemini3ReasoningEffort injects reasoning_effort for Gemini 3 model variants.
// Checks metadata first (from normalized model), then falls back to model name suffix.
func injectGemini3ReasoningEffort(modelName string, body []byte, metadata map[string]any) []byte {
if gjson.GetBytes(body, "reasoning_effort").Exists() {
return body
}
// First check metadata (set during model normalization)
if effort, ok := util.Gemini3ReasoningEffortFromMetadata(metadata); ok {
body, _ = sjson.SetBytes(body, "reasoning_effort", effort)
return body
}
// Fall back to model name suffix check
switch {
case strings.HasSuffix(modelName, "-low"):
body, _ = sjson.SetBytes(body, "reasoning_effort", "low")
case strings.HasSuffix(modelName, "-high"):
body, _ = sjson.SetBytes(body, "reasoning_effort", "high")
}
return body
}

// parseRetryDelay extracts the retry delay from a Google API 429 error response.
// The error response contains a RetryInfo.retryDelay field in the format "0.847655010s".
// Returns the parsed duration or an error if it cannot be determined.
Expand Down
26 changes: 22 additions & 4 deletions internal/runtime/executor/gemini_executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,20 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
defer reporter.trackFailure(ctx, &err)

// Inject reasoning_effort for Gemini 3 model variants
payload := injectGemini3ReasoningEffort(req.Model, req.Payload, req.Metadata)

// Official Gemini API via API key or OAuth bearer
from := opts.SourceFormat
to := sdktranslator.FromString("gemini")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
body = applyThinkingMetadata(body, req.Metadata, req.Model)
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(payload), false)
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
}
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
body = fixGeminiImageAspectRatio(req.Model, body)
body = applyPayloadConfig(e.cfg, req.Model, body)
Expand Down Expand Up @@ -165,10 +174,19 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
defer reporter.trackFailure(ctx, &err)

// Inject reasoning_effort for Gemini 3 model variants
payload := injectGemini3ReasoningEffort(req.Model, req.Payload, req.Metadata)

from := opts.SourceFormat
to := sdktranslator.FromString("gemini")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
body = applyThinkingMetadata(body, req.Metadata, req.Model)
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(payload), true)
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
}
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
body = fixGeminiImageAspectRatio(req.Model, body)
body = applyPayloadConfig(e.cfg, req.Model, body)
Expand Down
76 changes: 72 additions & 4 deletions internal/util/gemini_thinking.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,29 @@ func ParseGeminiThinkingSuffix(model string) (string, *int, *bool, bool) {
}

func NormalizeGeminiThinkingModel(modelName string) (string, map[string]any) {
baseModel, budget, include, matched := ParseGeminiThinkingSuffix(modelName)
// First try Gemini 3 reasoning effort normalization (-low/-high suffix)
workingModel := modelName
var metadata map[string]any

if base, effort, matched := ParseGemini3ReasoningEffortSuffix(modelName); matched {
workingModel = base
metadata = map[string]any{
Gemini3OriginalModelMetadataKey: modelName,
Gemini3ReasoningEffortMetadataKey: effort,
}
}

// Then try thinking suffix normalization on the (possibly already normalized) model
baseModel, budget, include, matched := ParseGeminiThinkingSuffix(workingModel)
if !matched {
return baseModel, nil
return workingModel, metadata
}
metadata := map[string]any{
GeminiOriginalModelMetadataKey: modelName,

// Merge thinking metadata
if metadata == nil {
metadata = map[string]any{}
}
metadata[GeminiOriginalModelMetadataKey] = modelName
if budget != nil {
metadata[GeminiThinkingBudgetMetadataKey] = *budget
}
Expand Down Expand Up @@ -268,3 +284,55 @@ func ConvertThinkingLevelToBudget(body []byte) []byte {
}
return updated
}

const (
Gemini3ReasoningEffortMetadataKey = "gemini3_reasoning_effort"
Gemini3OriginalModelMetadataKey = "gemini3_original_model"
)

// ParseGemini3ReasoningEffortSuffix parses -low or -high suffix from Gemini 3 model names.
// Returns the base model, reasoning effort ("low" or "high"), and whether a match was found.
func ParseGemini3ReasoningEffortSuffix(model string) (baseModel string, reasoningEffort string, matched bool) {
if model == "" {
return model, "", false
}
lower := strings.ToLower(model)
if !strings.HasPrefix(lower, "gemini-3-") {
return model, "", false
}

if strings.HasSuffix(lower, "-low") {
return model[:len(model)-len("-low")], "low", true
}
if strings.HasSuffix(lower, "-high") {
return model[:len(model)-len("-high")], "high", true
}
return model, "", false
}

// NormalizeGemini3ReasoningEffortModel normalizes a Gemini 3 model with -low/-high suffix.
// Returns the base model name and metadata containing the reasoning effort.
func NormalizeGemini3ReasoningEffortModel(modelName string) (string, map[string]any) {
baseModel, effort, matched := ParseGemini3ReasoningEffortSuffix(modelName)
if !matched {
return modelName, nil
}
metadata := map[string]any{
Gemini3OriginalModelMetadataKey: modelName,
Gemini3ReasoningEffortMetadataKey: effort,
}
return baseModel, metadata
}

// Gemini3ReasoningEffortFromMetadata extracts reasoning effort from metadata.
func Gemini3ReasoningEffortFromMetadata(metadata map[string]any) (string, bool) {
if len(metadata) == 0 {
return "", false
}
if effort, ok := metadata[Gemini3ReasoningEffortMetadataKey]; ok {
if s, ok := effort.(string); ok && s != "" {
return s, true
}
}
return "", false
}