diff --git a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go index fd8e0071..927d8ea3 100644 --- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go +++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go @@ -48,16 +48,30 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _ out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) case "low": - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024)) + if util.IsGemini3Model(modelName) { + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingLevel", "low") + } else { + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024)) + } out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) case "medium": - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192)) + if !util.IsGemini3Model(modelName) { + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192)) + } + // Gemini 3: no thinkingLevel for medium, uses dynamic thinking (auto) out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) case "high": - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768)) + if util.IsGemini3Model(modelName) { + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingLevel", "high") + } else { + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768)) + } out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) default: - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) + if !util.IsGemini3Model(modelName) { + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) + } + // Gemini 3: no thinkingLevel for auto/default, uses dynamic thinking out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) } } @@ -88,12 +102,10 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _ } } - // For gemini-3-pro-preview, always send default thinkingConfig when none specified. - // This matches the official Gemini CLI behavior which always sends: - // { thinkingBudget: -1, includeThoughts: true } - // See: ai-gemini-cli/packages/core/src/config/defaultModelConfigs.ts - if !gjson.GetBytes(out, "request.generationConfig.thinkingConfig").Exists() && modelName == "gemini-3-pro-preview" { - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) + // For Gemini 3 models, enable thought summaries when no thinkingConfig is specified. + // Don't set thinkingLevel - let API use dynamic thinking by default. + // See: https://ai.google.dev/gemini-api/docs/thinking#thinking-levels + if !gjson.GetBytes(out, "request.generationConfig.thinkingConfig").Exists() && util.IsGemini3Model(modelName) { out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) } diff --git a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go index d14f1119..9feff2d1 100644 --- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go +++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go @@ -48,16 +48,30 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) case "low": - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024)) + if util.IsGemini3Model(modelName) { + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingLevel", "low") + } else { + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024)) + } out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) case "medium": - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192)) + if !util.IsGemini3Model(modelName) { + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192)) + } + // Gemini 3: no thinkingLevel for medium, uses dynamic thinking (auto) out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) case "high": - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768)) + if util.IsGemini3Model(modelName) { + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingLevel", "high") + } else { + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768)) + } out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) default: - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) + if !util.IsGemini3Model(modelName) { + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) + } + // Gemini 3: no thinkingLevel for auto/default, uses dynamic thinking out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) } } @@ -88,12 +102,10 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo } } - // For gemini-3-pro-preview, always send default thinkingConfig when none specified. - // This matches the official Gemini CLI behavior which always sends: - // { thinkingBudget: -1, includeThoughts: true } - // See: ai-gemini-cli/packages/core/src/config/defaultModelConfigs.ts - if !gjson.GetBytes(out, "request.generationConfig.thinkingConfig").Exists() && modelName == "gemini-3-pro-preview" { - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) + // For Gemini 3 models, enable thought summaries when no thinkingConfig is specified. + // Don't set thinkingLevel - let API use dynamic thinking by default. + // See: https://ai.google.dev/gemini-api/docs/thinking#thinking-levels + if !gjson.GetBytes(out, "request.generationConfig.thinkingConfig").Exists() && util.IsGemini3Model(modelName) { out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) } diff --git a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go index 22ce913e..ba2c3b32 100644 --- a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go +++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go @@ -398,19 +398,37 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1) out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) case "minimal": - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024)) + if util.IsGemini3Model(modelName) { + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingLevel", "low") + } else { + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024)) + } out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) case "low": - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 4096)) + if util.IsGemini3Model(modelName) { + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingLevel", "low") + } else { + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 4096)) + } out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) case "medium": - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192)) + if !util.IsGemini3Model(modelName) { + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192)) + } + // Gemini 3: no thinkingLevel for medium, uses dynamic thinking (auto) out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) case "high": - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768)) + if util.IsGemini3Model(modelName) { + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingLevel", "high") + } else { + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768)) + } out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) default: - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1) + if !util.IsGemini3Model(modelName) { + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1) + } + // Gemini 3: no thinkingLevel for auto/default, uses dynamic thinking out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) } } @@ -435,14 +453,11 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte } } - // For gemini-3-pro-preview, always send default thinkingConfig when none specified. - // This matches the official Gemini CLI behavior which always sends: - // { thinkingBudget: -1, includeThoughts: true } - // See: ai-gemini-cli/packages/core/src/config/defaultModelConfigs.ts - if !gjson.Get(out, "generationConfig.thinkingConfig").Exists() && modelName == "gemini-3-pro-preview" { - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1) + // For Gemini 3 models, enable thought summaries when no thinkingConfig is specified. + // Don't set thinkingLevel - let API use dynamic thinking by default. + // See: https://ai.google.dev/gemini-api/docs/thinking#thinking-levels + if !gjson.Get(out, "generationConfig.thinkingConfig").Exists() && util.IsGemini3Model(modelName) { out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) - // log.Debugf("Applied default thinkingConfig for gemini-3-pro-preview (matches Gemini CLI): thinkingBudget=-1, include_thoughts=true") } result := []byte(out) diff --git a/internal/util/gemini_thinking.go b/internal/util/gemini_thinking.go index d7481621..e4e05962 100644 --- a/internal/util/gemini_thinking.go +++ b/internal/util/gemini_thinking.go @@ -259,3 +259,9 @@ func ConvertThinkingLevelToBudget(body []byte) []byte { } return updated } + +// IsGemini3Model returns true if the model is a Gemini 3 model (uses thinkingLevel instead of thinkingBudget). +func IsGemini3Model(model string) bool { + lower := strings.ToLower(model) + return strings.HasPrefix(lower, "gemini-3-") +}