From 6b64cb09082ca1df67cbd5aab6ee6d0ca7688e1f Mon Sep 17 00:00:00 2001 From: Aero Date: Sun, 30 Nov 2025 14:43:49 +0800 Subject: [PATCH 01/10] chore: print server port --- sdk/cliproxy/service.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/cliproxy/service.go b/sdk/cliproxy/service.go index c2ebba8d..93beae78 100644 --- a/sdk/cliproxy/service.go +++ b/sdk/cliproxy/service.go @@ -498,7 +498,7 @@ func (s *Service) Run(ctx context.Context) error { }() time.Sleep(100 * time.Millisecond) - fmt.Println("API server started successfully") + fmt.Printf("API server started successfully on: %d\n", s.cfg.Port) if s.hooks.OnAfterStart != nil { s.hooks.OnAfterStart(s) From 4fa52992b42d30f1e9c0f7d0e1abdbe3bd6890d0 Mon Sep 17 00:00:00 2001 From: Aero Date: Sun, 30 Nov 2025 15:35:49 +0800 Subject: [PATCH 02/10] feat: add vertex compat service --- internal/config/config.go | 8 + internal/config/vertex_compat.go | 60 +++ .../executor/vertex_compat_executor.go | 375 ++++++++++++++++++ internal/watcher/watcher.go | 28 ++ sdk/auth/vertex_compat.go | 94 +++++ sdk/cliproxy/service.go | 5 + 6 files changed, 570 insertions(+) create mode 100644 internal/config/vertex_compat.go create mode 100644 internal/runtime/executor/vertex_compat_executor.go create mode 100644 sdk/auth/vertex_compat.go diff --git a/internal/config/config.go b/internal/config/config.go index 97b5a0c2..473a0553 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -64,6 +64,10 @@ type Config struct { // GeminiKey defines Gemini API key configurations with optional routing overrides. GeminiKey []GeminiKey `yaml:"gemini-api-key" json:"gemini-api-key"` + // VertexCompatAPIKey defines Vertex AI-compatible API key configurations for third-party providers. + // Used for services that use Vertex AI-style paths but with simple API key authentication. + VertexCompatAPIKey []VertexCompatKey `yaml:"vertex-api-key" json:"vertex-api-key"` + // RequestRetry defines the retry times when the request failed. RequestRetry int `yaml:"request-retry" json:"request-retry"` // MaxRetryInterval defines the maximum wait time in seconds before retrying a cooled-down credential. @@ -325,6 +329,9 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) { // Sanitize Gemini API key configuration and migrate legacy entries. cfg.SanitizeGeminiKeys() + // Sanitize Vertex-compatible API keys: drop entries without base-url + cfg.SanitizeVertexCompatKeys() + // Sanitize Codex keys: drop entries without base-url cfg.SanitizeCodexKeys() @@ -813,6 +820,7 @@ func shouldSkipEmptyCollectionOnPersist(key string, node *yaml.Node) bool { switch key { case "generative-language-api-key", "gemini-api-key", + "vertex-api-key", "claude-api-key", "codex-api-key", "openai-compatibility": diff --git a/internal/config/vertex_compat.go b/internal/config/vertex_compat.go new file mode 100644 index 00000000..6451abdb --- /dev/null +++ b/internal/config/vertex_compat.go @@ -0,0 +1,60 @@ +package config + +import "strings" + +// VertexCompatKey represents the configuration for Vertex AI-compatible API keys. +// This supports third-party services that use Vertex AI-style endpoint paths +// (/publishers/google/models/{model}:streamGenerateContent) but authenticate +// with simple API keys instead of Google Cloud service account credentials. +// +// Example services: zenmux.ai and similar Vertex-compatible providers. +type VertexCompatKey struct { + // APIKey is the authentication key for accessing the Vertex-compatible API. + // Maps to the x-goog-api-key header. + APIKey string `yaml:"api-key" json:"api-key"` + + // BaseURL is the base URL for the Vertex-compatible API endpoint. + // The executor will append "/v1/publishers/google/models/{model}:action" to this. + // Example: "https://zenmux.ai/api" becomes "https://zenmux.ai/api/v1/publishers/google/models/..." + BaseURL string `yaml:"base-url,omitempty" json:"base-url,omitempty"` + + // ProxyURL optionally overrides the global proxy for this API key. + ProxyURL string `yaml:"proxy-url,omitempty" json:"proxy-url,omitempty"` + + // Headers optionally adds extra HTTP headers for requests sent with this key. + // Commonly used for cookies, user-agent, and other authentication headers. + Headers map[string]string `yaml:"headers,omitempty" json:"headers,omitempty"` +} + +// SanitizeVertexCompatKeys deduplicates and normalizes Vertex-compatible API key credentials. +func (cfg *Config) SanitizeVertexCompatKeys() { + if cfg == nil { + return + } + + seen := make(map[string]struct{}, len(cfg.VertexCompatAPIKey)) + out := cfg.VertexCompatAPIKey[:0] + for i := range cfg.VertexCompatAPIKey { + entry := cfg.VertexCompatAPIKey[i] + entry.APIKey = strings.TrimSpace(entry.APIKey) + if entry.APIKey == "" { + continue + } + entry.BaseURL = strings.TrimSpace(entry.BaseURL) + if entry.BaseURL == "" { + // BaseURL is required for vertex-compat keys + continue + } + entry.ProxyURL = strings.TrimSpace(entry.ProxyURL) + entry.Headers = NormalizeHeaders(entry.Headers) + + // Use API key + base URL as uniqueness key + uniqueKey := entry.APIKey + "|" + entry.BaseURL + if _, exists := seen[uniqueKey]; exists { + continue + } + seen[uniqueKey] = struct{}{} + out = append(out, entry) + } + cfg.VertexCompatAPIKey = out +} diff --git a/internal/runtime/executor/vertex_compat_executor.go b/internal/runtime/executor/vertex_compat_executor.go new file mode 100644 index 00000000..b513f94d --- /dev/null +++ b/internal/runtime/executor/vertex_compat_executor.go @@ -0,0 +1,375 @@ +// Package executor provides runtime execution capabilities for various AI service providers. +package executor + +import ( + "bufio" + "bytes" + "context" + "fmt" + "io" + "net/http" + "strings" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/config" + "github.com/router-for-me/CLIProxyAPI/v6/internal/util" + cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" + cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" + sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator" + log "github.com/sirupsen/logrus" + "github.com/tidwall/gjson" + "github.com/tidwall/sjson" +) + +const ( + // vertexCompatAPIVersion is the API version for Vertex-compatible endpoints. + vertexCompatAPIVersion = "v1" +) + +// VertexCompatExecutor is a stateless executor for Vertex AI-compatible APIs +// that use Vertex-style paths (/publishers/google/models/{model}:action) +// but authenticate with simple API keys instead of Google Cloud service accounts. +// +// This executor supports third-party providers like zenmux.ai that mimic +// Vertex AI's URL structure while using simpler authentication mechanisms. +type VertexCompatExecutor struct { + cfg *config.Config +} + +// NewVertexCompatExecutor creates a new Vertex-compatible executor instance. +func NewVertexCompatExecutor(cfg *config.Config) *VertexCompatExecutor { + return &VertexCompatExecutor{cfg: cfg} +} + +// Identifier returns the executor identifier for routing. +func (e *VertexCompatExecutor) Identifier() string { return "vertex-compat" } + +// PrepareRequest is a no-op for API key based authentication. +func (e *VertexCompatExecutor) PrepareRequest(_ *http.Request, _ *cliproxyauth.Auth) error { + return nil +} + +// Execute performs a non-streaming request to the Vertex-compatible API. +func (e *VertexCompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { + apiKey, baseURL := vertexCompatCreds(auth) + + reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + defer reporter.trackFailure(ctx, &err) + + from := opts.SourceFormat + to := sdktranslator.FromString("gemini") + body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) + + // Apply thinking config if supported + if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) { + if budgetOverride != nil { + norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + budgetOverride = &norm + } + body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) + } + body = util.StripThinkingConfigIfUnsupported(req.Model, body) + body = applyPayloadConfig(e.cfg, req.Model, body) + + action := "generateContent" + if req.Metadata != nil { + if a, _ := req.Metadata["action"].(string); a == "countTokens" { + action = "countTokens" + } + } + + // Construct Vertex-style URL: {baseURL}/v1/publishers/google/models/{model}:action + url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexCompatAPIVersion, req.Model, action) + if opts.Alt != "" && action != "countTokens" { + url = url + fmt.Sprintf("?$alt=%s", opts.Alt) + } + body, _ = sjson.DeleteBytes(body, "session_id") + + httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) + if err != nil { + return resp, err + } + httpReq.Header.Set("Content-Type", "application/json") + if apiKey != "" { + httpReq.Header.Set("x-goog-api-key", apiKey) + } + applyVertexCompatHeaders(httpReq, auth) + + var authID, authLabel, authType, authValue string + if auth != nil { + authID = auth.ID + authLabel = auth.Label + authType, authValue = auth.AccountInfo() + } + recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + URL: url, + Method: http.MethodPost, + Headers: httpReq.Header.Clone(), + Body: body, + Provider: e.Identifier(), + AuthID: authID, + AuthLabel: authLabel, + AuthType: authType, + AuthValue: authValue, + }) + + httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpResp, err := httpClient.Do(httpReq) + if err != nil { + recordAPIResponseError(ctx, e.cfg, err) + return resp, err + } + defer func() { + if errClose := httpResp.Body.Close(); errClose != nil { + log.Errorf("vertex-compat executor: close response body error: %v", errClose) + } + }() + recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + + if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { + b, _ := io.ReadAll(httpResp.Body) + appendAPIResponseChunk(ctx, e.cfg, b) + log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) + err = statusErr{code: httpResp.StatusCode, msg: string(b)} + return resp, err + } + + data, err := io.ReadAll(httpResp.Body) + if err != nil { + recordAPIResponseError(ctx, e.cfg, err) + return resp, err + } + appendAPIResponseChunk(ctx, e.cfg, data) + reporter.publish(ctx, parseGeminiUsage(data)) + + var param any + out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, ¶m) + resp = cliproxyexecutor.Response{Payload: []byte(out)} + return resp, nil +} + +// ExecuteStream handles SSE streaming for Vertex-compatible APIs. +func (e *VertexCompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) { + apiKey, baseURL := vertexCompatCreds(auth) + + reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + defer reporter.trackFailure(ctx, &err) + + from := opts.SourceFormat + to := sdktranslator.FromString("gemini") + body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) + + // Apply thinking config if supported + if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) { + if budgetOverride != nil { + norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + budgetOverride = &norm + } + body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) + } + body = util.StripThinkingConfigIfUnsupported(req.Model, body) + body = applyPayloadConfig(e.cfg, req.Model, body) + + // Construct Vertex-style streaming URL + url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexCompatAPIVersion, req.Model, "streamGenerateContent") + if opts.Alt == "" { + url = url + "?alt=sse" + } else { + url = url + fmt.Sprintf("?$alt=%s", opts.Alt) + } + body, _ = sjson.DeleteBytes(body, "session_id") + + httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) + if err != nil { + return nil, err + } + httpReq.Header.Set("Content-Type", "application/json") + if apiKey != "" { + httpReq.Header.Set("x-goog-api-key", apiKey) + } + applyVertexCompatHeaders(httpReq, auth) + + var authID, authLabel, authType, authValue string + if auth != nil { + authID = auth.ID + authLabel = auth.Label + authType, authValue = auth.AccountInfo() + } + recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + URL: url, + Method: http.MethodPost, + Headers: httpReq.Header.Clone(), + Body: body, + Provider: e.Identifier(), + AuthID: authID, + AuthLabel: authLabel, + AuthType: authType, + AuthValue: authValue, + }) + + httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpResp, err := httpClient.Do(httpReq) + if err != nil { + recordAPIResponseError(ctx, e.cfg, err) + return nil, err + } + recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + + if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { + b, _ := io.ReadAll(httpResp.Body) + appendAPIResponseChunk(ctx, e.cfg, b) + log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) + if errClose := httpResp.Body.Close(); errClose != nil { + log.Errorf("vertex-compat executor: close response body error: %v", errClose) + } + return nil, statusErr{code: httpResp.StatusCode, msg: string(b)} + } + + out := make(chan cliproxyexecutor.StreamChunk) + stream = out + go func() { + defer close(out) + defer func() { + if errClose := httpResp.Body.Close(); errClose != nil { + log.Errorf("vertex-compat executor: close response body error: %v", errClose) + } + }() + scanner := bufio.NewScanner(httpResp.Body) + scanner.Buffer(nil, 20_971_520) + var param any + for scanner.Scan() { + line := scanner.Bytes() + appendAPIResponseChunk(ctx, e.cfg, line) + filtered := FilterSSEUsageMetadata(line) + payload := jsonPayload(filtered) + if len(payload) == 0 { + continue + } + if detail, ok := parseGeminiStreamUsage(payload); ok { + reporter.publish(ctx, detail) + } + lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone(payload), ¶m) + for i := range lines { + out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])} + } + } + lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone([]byte("[DONE]")), ¶m) + for i := range lines { + out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])} + } + if errScan := scanner.Err(); errScan != nil { + recordAPIResponseError(ctx, e.cfg, errScan) + reporter.publishFailure(ctx) + out <- cliproxyexecutor.StreamChunk{Err: errScan} + } + }() + return stream, nil +} + +// CountTokens calls the Vertex-compatible countTokens endpoint. +func (e *VertexCompatExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { + apiKey, baseURL := vertexCompatCreds(auth) + + from := opts.SourceFormat + to := sdktranslator.FromString("gemini") + translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) + + if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) { + if budgetOverride != nil { + norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + budgetOverride = &norm + } + translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride) + } + translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq) + + respCtx := ctx + if opts.Alt != "" { + respCtx = context.WithValue(ctx, "alt", opts.Alt) + } + translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools") + translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig") + translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings") + + url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexCompatAPIVersion, req.Model, "countTokens") + + httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translatedReq)) + if err != nil { + return cliproxyexecutor.Response{}, err + } + httpReq.Header.Set("Content-Type", "application/json") + if apiKey != "" { + httpReq.Header.Set("x-goog-api-key", apiKey) + } + applyVertexCompatHeaders(httpReq, auth) + + var authID, authLabel, authType, authValue string + if auth != nil { + authID = auth.ID + authLabel = auth.Label + authType, authValue = auth.AccountInfo() + } + recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + URL: url, + Method: http.MethodPost, + Headers: httpReq.Header.Clone(), + Body: translatedReq, + Provider: e.Identifier(), + AuthID: authID, + AuthLabel: authLabel, + AuthType: authType, + AuthValue: authValue, + }) + + httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + resp, err := httpClient.Do(httpReq) + if err != nil { + recordAPIResponseError(ctx, e.cfg, err) + return cliproxyexecutor.Response{}, err + } + defer func() { _ = resp.Body.Close() }() + recordAPIResponseMetadata(ctx, e.cfg, resp.StatusCode, resp.Header.Clone()) + + data, err := io.ReadAll(resp.Body) + if err != nil { + recordAPIResponseError(ctx, e.cfg, err) + return cliproxyexecutor.Response{}, err + } + appendAPIResponseChunk(ctx, e.cfg, data) + + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + log.Debugf("request error, error status: %d, error body: %s", resp.StatusCode, summarizeErrorBody(resp.Header.Get("Content-Type"), data)) + return cliproxyexecutor.Response{}, statusErr{code: resp.StatusCode, msg: string(data)} + } + + count := gjson.GetBytes(data, "totalTokens").Int() + translated := sdktranslator.TranslateTokenCount(respCtx, to, from, count, data) + return cliproxyexecutor.Response{Payload: []byte(translated)}, nil +} + +// Refresh is a no-op for API key based authentication. +func (e *VertexCompatExecutor) Refresh(_ context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) { + return auth, nil +} + +// vertexCompatCreds extracts API key and base URL from auth attributes. +func vertexCompatCreds(a *cliproxyauth.Auth) (apiKey, baseURL string) { + if a == nil || a.Attributes == nil { + return "", "" + } + if v := a.Attributes["api_key"]; v != "" { + apiKey = v + } + if v := a.Attributes["base_url"]; v != "" { + baseURL = strings.TrimRight(strings.TrimSpace(v), "/") + } + return +} + +// applyVertexCompatHeaders applies custom headers from auth attributes. +func applyVertexCompatHeaders(req *http.Request, auth *cliproxyauth.Auth) { + var attrs map[string]string + if auth != nil { + attrs = auth.Attributes + } + util.ApplyCustomHeadersFromAttrs(req, attrs) +} diff --git a/internal/watcher/watcher.go b/internal/watcher/watcher.go index a284541a..273ba105 100644 --- a/internal/watcher/watcher.go +++ b/internal/watcher/watcher.go @@ -1074,6 +1074,34 @@ func (w *Watcher) SnapshotCoreAuths() []*coreauth.Auth { applyAuthExcludedModelsMeta(a, cfg, entry.ExcludedModels, "apikey") out = append(out, a) } + // Vertex-compatible API keys -> synthesize auths + for i := range cfg.VertexCompatAPIKey { + entry := cfg.VertexCompatAPIKey[i] + key := strings.TrimSpace(entry.APIKey) + base := strings.TrimSpace(entry.BaseURL) + if key == "" || base == "" { + continue + } + proxyURL := strings.TrimSpace(entry.ProxyURL) + id, token := idGen.next("vertex-compat:apikey", key, base) + attrs := map[string]string{ + "source": fmt.Sprintf("config:vertex-compat[%s]", token), + "api_key": key, + "base_url": base, + } + addConfigHeadersToAttrs(entry.Headers, attrs) + a := &coreauth.Auth{ + ID: id, + Provider: "vertex-compat", + Label: fmt.Sprintf("vertex-compat-%s", token), + Status: coreauth.StatusActive, + ProxyURL: proxyURL, + Attributes: attrs, + CreatedAt: now, + UpdatedAt: now, + } + out = append(out, a) + } // Claude API keys -> synthesize auths for i := range cfg.ClaudeKey { ck := cfg.ClaudeKey[i] diff --git a/sdk/auth/vertex_compat.go b/sdk/auth/vertex_compat.go new file mode 100644 index 00000000..66dff632 --- /dev/null +++ b/sdk/auth/vertex_compat.go @@ -0,0 +1,94 @@ +package auth + +import ( + "context" + "fmt" + "strings" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/config" + coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" +) + +// LoadVertexCompatCredentials loads Vertex AI-compatible API key credentials from config +// into the auth manager. Each entry becomes an in-memory auth entry with API key and +// custom headers stored in attributes. +func LoadVertexCompatCredentials(ctx context.Context, cfg *config.Config, mgr *coreauth.Manager) error { + if cfg == nil || mgr == nil { + return nil + } + if len(cfg.VertexCompatAPIKey) == 0 { + return nil + } + + for i, entry := range cfg.VertexCompatAPIKey { + apiKey := strings.TrimSpace(entry.APIKey) + baseURL := strings.TrimSpace(entry.BaseURL) + + if apiKey == "" || baseURL == "" { + continue + } + + // Create unique ID from index and base URL + id := fmt.Sprintf("vertex-compat-%d-%s", i, sanitizeForID(baseURL)) + label := fmt.Sprintf("Vertex-Compat (%s)", extractDomain(baseURL)) + + // Build attributes map + attrs := make(map[string]string) + attrs["api_key"] = apiKey + attrs["base_url"] = baseURL + + // Add proxy URL if specified + if entry.ProxyURL != "" { + attrs["proxy_url"] = strings.TrimSpace(entry.ProxyURL) + } + + // Copy custom headers to attributes with "header_" prefix + for k, v := range entry.Headers { + headerKey := "header_" + strings.ToLower(strings.TrimSpace(k)) + attrs[headerKey] = strings.TrimSpace(v) + } + + auth := &coreauth.Auth{ + ID: id, + Provider: "vertex-compat", + Label: label, + Attributes: attrs, + Metadata: make(map[string]any), + } + + if _, err := mgr.Register(ctx, auth); err != nil { + return fmt.Errorf("failed to register vertex-compat credential %d: %w", i, err) + } + } + + return nil +} + +// sanitizeForID creates a safe ID component from a URL. +func sanitizeForID(url string) string { + // Remove https:// and http:// + clean := strings.TrimPrefix(url, "https://") + clean = strings.TrimPrefix(clean, "http://") + // Replace non-alphanumeric with dash + clean = strings.Map(func(r rune) rune { + if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') { + return r + } + return '-' + }, clean) + // Limit length + if len(clean) > 30 { + clean = clean[:30] + } + return strings.Trim(clean, "-") +} + +// extractDomain extracts the domain from a URL for display purposes. +func extractDomain(url string) string { + clean := strings.TrimPrefix(url, "https://") + clean = strings.TrimPrefix(clean, "http://") + if idx := strings.Index(clean, "/"); idx > 0 { + clean = clean[:idx] + } + return clean +} diff --git a/sdk/cliproxy/service.go b/sdk/cliproxy/service.go index 93beae78..f32f90a7 100644 --- a/sdk/cliproxy/service.go +++ b/sdk/cliproxy/service.go @@ -362,6 +362,8 @@ func (s *Service) ensureExecutorsForAuth(a *coreauth.Auth) { s.coreManager.RegisterExecutor(executor.NewGeminiExecutor(s.cfg)) case "vertex": s.coreManager.RegisterExecutor(executor.NewGeminiVertexExecutor(s.cfg)) + case "vertex-compat": + s.coreManager.RegisterExecutor(executor.NewVertexCompatExecutor(s.cfg)) case "gemini-cli": s.coreManager.RegisterExecutor(executor.NewGeminiCLIExecutor(s.cfg)) case "aistudio": @@ -680,6 +682,9 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) { // Vertex AI Gemini supports the same model identifiers as Gemini. models = registry.GetGeminiVertexModels() models = applyExcludedModels(models, excluded) + case "vertex-compat": + // Vertex-compatible providers support the same model identifiers as Gemini. + models = registry.GetGeminiModels() case "gemini-cli": models = registry.GetGeminiCLIModels() models = applyExcludedModels(models, excluded) From a298c0a7b1c4fb1cc0d71f30b58db36c08399e1f Mon Sep 17 00:00:00 2001 From: Aero Date: Sun, 30 Nov 2025 15:48:49 +0800 Subject: [PATCH 03/10] chore: print vertex compat service --- internal/watcher/watcher.go | 17 +++++++++++------ sdk/cliproxy/providers.go | 11 ++++++----- sdk/cliproxy/types.go | 14 ++++++++------ 3 files changed, 25 insertions(+), 17 deletions(-) diff --git a/internal/watcher/watcher.go b/internal/watcher/watcher.go index 273ba105..9fb76e69 100644 --- a/internal/watcher/watcher.go +++ b/internal/watcher/watcher.go @@ -902,8 +902,8 @@ func (w *Watcher) reloadClients(rescanAuth bool, affectedOAuthProviders []string // no legacy clients to unregister // Create new API key clients based on the new config - geminiAPIKeyCount, claudeAPIKeyCount, codexAPIKeyCount, openAICompatCount := BuildAPIKeyClients(cfg) - totalAPIKeyClients := geminiAPIKeyCount + claudeAPIKeyCount + codexAPIKeyCount + openAICompatCount + geminiAPIKeyCount, vertexCompatAPIKeyCount, claudeAPIKeyCount, codexAPIKeyCount, openAICompatCount := BuildAPIKeyClients(cfg) + totalAPIKeyClients := geminiAPIKeyCount + vertexCompatAPIKeyCount + claudeAPIKeyCount + codexAPIKeyCount + openAICompatCount log.Debugf("loaded %d API key clients", totalAPIKeyClients) var authFileCount int @@ -946,7 +946,7 @@ func (w *Watcher) reloadClients(rescanAuth bool, affectedOAuthProviders []string w.clientsMutex.Unlock() } - totalNewClients := authFileCount + geminiAPIKeyCount + claudeAPIKeyCount + codexAPIKeyCount + openAICompatCount + totalNewClients := authFileCount + geminiAPIKeyCount + vertexCompatAPIKeyCount + claudeAPIKeyCount + codexAPIKeyCount + openAICompatCount // Ensure consumers observe the new configuration before auth updates dispatch. if w.reloadCallback != nil { @@ -956,10 +956,11 @@ func (w *Watcher) reloadClients(rescanAuth bool, affectedOAuthProviders []string w.refreshAuthState() - log.Infof("full client load complete - %d clients (%d auth files + %d Gemini API keys + %d Claude API keys + %d Codex keys + %d OpenAI-compat)", + log.Infof("full client load complete - %d clients (%d auth files + %d Gemini API keys + %d Vertex-compat keys + %d Claude API keys + %d Codex keys + %d OpenAI-compat)", totalNewClients, authFileCount, geminiAPIKeyCount, + vertexCompatAPIKeyCount, claudeAPIKeyCount, codexAPIKeyCount, openAICompatCount, @@ -1484,8 +1485,9 @@ func (w *Watcher) loadFileClients(cfg *config.Config) int { return authFileCount } -func BuildAPIKeyClients(cfg *config.Config) (int, int, int, int) { +func BuildAPIKeyClients(cfg *config.Config) (int, int, int, int, int) { geminiAPIKeyCount := 0 + vertexCompatAPIKeyCount := 0 claudeAPIKeyCount := 0 codexAPIKeyCount := 0 openAICompatCount := 0 @@ -1494,6 +1496,9 @@ func BuildAPIKeyClients(cfg *config.Config) (int, int, int, int) { // Stateless executor handles Gemini API keys; avoid constructing legacy clients. geminiAPIKeyCount += len(cfg.GeminiKey) } + if len(cfg.VertexCompatAPIKey) > 0 { + vertexCompatAPIKeyCount += len(cfg.VertexCompatAPIKey) + } if len(cfg.ClaudeKey) > 0 { claudeAPIKeyCount += len(cfg.ClaudeKey) } @@ -1511,7 +1516,7 @@ func BuildAPIKeyClients(cfg *config.Config) (int, int, int, int) { } } } - return geminiAPIKeyCount, claudeAPIKeyCount, codexAPIKeyCount, openAICompatCount + return geminiAPIKeyCount, vertexCompatAPIKeyCount, claudeAPIKeyCount, codexAPIKeyCount, openAICompatCount } func diffOpenAICompatibility(oldList, newList []config.OpenAICompatibility) []string { diff --git a/sdk/cliproxy/providers.go b/sdk/cliproxy/providers.go index a5810336..401885f5 100644 --- a/sdk/cliproxy/providers.go +++ b/sdk/cliproxy/providers.go @@ -29,7 +29,7 @@ func NewAPIKeyClientProvider() APIKeyClientProvider { type apiKeyClientProvider struct{} func (p *apiKeyClientProvider) Load(ctx context.Context, cfg *config.Config) (*APIKeyClientResult, error) { - geminiCount, claudeCount, codexCount, openAICompat := watcher.BuildAPIKeyClients(cfg) + geminiCount, vertexCompatCount, claudeCount, codexCount, openAICompat := watcher.BuildAPIKeyClients(cfg) if ctx != nil { select { case <-ctx.Done(): @@ -38,9 +38,10 @@ func (p *apiKeyClientProvider) Load(ctx context.Context, cfg *config.Config) (*A } } return &APIKeyClientResult{ - GeminiKeyCount: geminiCount, - ClaudeKeyCount: claudeCount, - CodexKeyCount: codexCount, - OpenAICompatCount: openAICompat, + GeminiKeyCount: geminiCount, + VertexCompatKeyCount: vertexCompatCount, + ClaudeKeyCount: claudeCount, + CodexKeyCount: codexCount, + OpenAICompatCount: openAICompat, }, nil } diff --git a/sdk/cliproxy/types.go b/sdk/cliproxy/types.go index b44185d1..42c7c488 100644 --- a/sdk/cliproxy/types.go +++ b/sdk/cliproxy/types.go @@ -49,19 +49,21 @@ type APIKeyClientProvider interface { Load(ctx context.Context, cfg *config.Config) (*APIKeyClientResult, error) } -// APIKeyClientResult contains API key based clients along with type counts. -// It provides metadata about the number of clients loaded for each provider type. +// APIKeyClientResult is returned by APIKeyClientProvider.Load() type APIKeyClientResult struct { - // GeminiKeyCount is the number of Gemini API key clients loaded. + // GeminiKeyCount is the number of Gemini API keys loaded GeminiKeyCount int - // ClaudeKeyCount is the number of Claude API key clients loaded. + // VertexCompatKeyCount is the number of Vertex-compatible API keys loaded + VertexCompatKeyCount int + + // ClaudeKeyCount is the number of Claude API keys loaded ClaudeKeyCount int - // CodexKeyCount is the number of Codex API key clients loaded. + // CodexKeyCount is the number of Codex API keys loaded CodexKeyCount int - // OpenAICompatCount is the number of OpenAI-compatible API key clients loaded. + // OpenAICompatCount is the number of OpenAI compatibility API keys loaded OpenAICompatCount int } From 696f391736fde533f63fa0b03788d9b13189e3f8 Mon Sep 17 00:00:00 2001 From: Aero Date: Sun, 30 Nov 2025 15:52:20 +0800 Subject: [PATCH 04/10] chore: print vertexAICompat --- internal/api/server.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/internal/api/server.go b/internal/api/server.go index ab9c0354..b3c3bd26 100644 --- a/internal/api/server.go +++ b/internal/api/server.go @@ -921,6 +921,7 @@ func (s *Server) UpdateClients(cfg *config.Config) { geminiAPIKeyCount := len(cfg.GeminiKey) claudeAPIKeyCount := len(cfg.ClaudeKey) codexAPIKeyCount := len(cfg.CodexKey) + vertexAICompatCount := len(cfg.VertexCompatAPIKey) openAICompatCount := 0 for i := range cfg.OpenAICompatibility { entry := cfg.OpenAICompatibility[i] @@ -931,13 +932,14 @@ func (s *Server) UpdateClients(cfg *config.Config) { openAICompatCount += len(entry.APIKeys) } - total := authFiles + geminiAPIKeyCount + claudeAPIKeyCount + codexAPIKeyCount + openAICompatCount - fmt.Printf("server clients and configuration updated: %d clients (%d auth files + %d Gemini API keys + %d Claude API keys + %d Codex keys + %d OpenAI-compat)\n", + total := authFiles + geminiAPIKeyCount + claudeAPIKeyCount + codexAPIKeyCount + vertexAICompatCount + openAICompatCount + fmt.Printf("server clients and configuration updated: %d clients (%d auth files + %d Gemini API keys + %d Claude API keys + %d Codex keys + %d Vertex-compat + %d OpenAI-compat)\n", total, authFiles, geminiAPIKeyCount, claudeAPIKeyCount, codexAPIKeyCount, + vertexAICompatCount, openAICompatCount, ) } From 4b955c019ba4dfd184cd3b9eed15ca7285d73fc1 Mon Sep 17 00:00:00 2001 From: Aero Date: Sun, 30 Nov 2025 16:18:14 +0800 Subject: [PATCH 05/10] feat: support custom models for vertex-compat --- internal/config/vertex_compat.go | 24 ++++++++++++++++ internal/watcher/watcher.go | 48 ++++++++++++++++++++++++++++++++ sdk/cliproxy/service.go | 30 ++++++++++++++++++-- 3 files changed, 100 insertions(+), 2 deletions(-) diff --git a/internal/config/vertex_compat.go b/internal/config/vertex_compat.go index 6451abdb..a8d94ccb 100644 --- a/internal/config/vertex_compat.go +++ b/internal/config/vertex_compat.go @@ -24,6 +24,19 @@ type VertexCompatKey struct { // Headers optionally adds extra HTTP headers for requests sent with this key. // Commonly used for cookies, user-agent, and other authentication headers. Headers map[string]string `yaml:"headers,omitempty" json:"headers,omitempty"` + + // Models defines the model configurations including aliases for routing. + Models []VertexCompatModel `yaml:"models,omitempty" json:"models,omitempty"` +} + +// VertexCompatModel represents a model configuration for Vertex compatibility, +// including the actual model name and its alias for API routing. +type VertexCompatModel struct { + // Name is the actual model name used by the external provider. + Name string `yaml:"name" json:"name"` + + // Alias is the model name alias that clients will use to reference this model. + Alias string `yaml:"alias" json:"alias"` } // SanitizeVertexCompatKeys deduplicates and normalizes Vertex-compatible API key credentials. @@ -48,6 +61,17 @@ func (cfg *Config) SanitizeVertexCompatKeys() { entry.ProxyURL = strings.TrimSpace(entry.ProxyURL) entry.Headers = NormalizeHeaders(entry.Headers) + // Sanitize models: remove entries without valid alias + sanitizedModels := make([]VertexCompatModel, 0, len(entry.Models)) + for _, model := range entry.Models { + model.Alias = strings.TrimSpace(model.Alias) + model.Name = strings.TrimSpace(model.Name) + if model.Alias != "" && model.Name != "" { + sanitizedModels = append(sanitizedModels, model) + } + } + entry.Models = sanitizedModels + // Use API key + base URL as uniqueness key uniqueKey := entry.APIKey + "|" + entry.BaseURL if _, exists := seen[uniqueKey]; exists { diff --git a/internal/watcher/watcher.go b/internal/watcher/watcher.go index 9fb76e69..a4ec2aea 100644 --- a/internal/watcher/watcher.go +++ b/internal/watcher/watcher.go @@ -496,6 +496,18 @@ func computeOpenAICompatModelsHash(models []config.OpenAICompatibilityModel) str return hex.EncodeToString(sum[:]) } +func computeVertexCompatModelsHash(models []config.VertexCompatModel) string { + if len(models) == 0 { + return "" + } + data, err := json.Marshal(models) + if err != nil || len(data) == 0 { + return "" + } + sum := sha256.Sum256(data) + return hex.EncodeToString(sum[:]) +} + // computeClaudeModelsHash returns a stable hash for Claude model aliases. func computeClaudeModelsHash(models []config.ClaudeModel) string { if len(models) == 0 { @@ -1269,6 +1281,42 @@ func (w *Watcher) SnapshotCoreAuths() []*coreauth.Auth { } } } + + // Process Vertex compatibility providers + for i := range cfg.VertexCompatAPIKey { + compat := &cfg.VertexCompatAPIKey[i] + providerName := "vertex-compat" + base := strings.TrimSpace(compat.BaseURL) + + key := strings.TrimSpace(compat.APIKey) + proxyURL := strings.TrimSpace(compat.ProxyURL) + idKind := fmt.Sprintf("vertex-compatibility:%s", base) + id, token := idGen.next(idKind, key, base, proxyURL) + attrs := map[string]string{ + "source": fmt.Sprintf("config:vertex-compatibility[%s]", token), + "base_url": base, + "provider_key": providerName, + } + if key != "" { + attrs["api_key"] = key + } + if hash := computeVertexCompatModelsHash(compat.Models); hash != "" { + attrs["models_hash"] = hash + } + addConfigHeadersToAttrs(compat.Headers, attrs) + a := &coreauth.Auth{ + ID: id, + Provider: providerName, + Label: "Vertex Compatibility", + Status: coreauth.StatusActive, + ProxyURL: proxyURL, + Attributes: attrs, + CreatedAt: now, + UpdatedAt: now, + } + out = append(out, a) + } + // Also synthesize auth entries directly from auth files (for OAuth/file-backed providers) entries, _ := os.ReadDir(w.authDir) for _, e := range entries { diff --git a/sdk/cliproxy/service.go b/sdk/cliproxy/service.go index f32f90a7..1383c480 100644 --- a/sdk/cliproxy/service.go +++ b/sdk/cliproxy/service.go @@ -683,8 +683,34 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) { models = registry.GetGeminiVertexModels() models = applyExcludedModels(models, excluded) case "vertex-compat": - // Vertex-compatible providers support the same model identifiers as Gemini. - models = registry.GetGeminiModels() + // Handle Vertex AI compatibility providers with custom model definitions + if s.cfg != nil && len(s.cfg.VertexCompatAPIKey) > 0 { + // Create models for all Vertex compatibility providers + allModels := make([]*ModelInfo, 0) + for i := range s.cfg.VertexCompatAPIKey { + compat := &s.cfg.VertexCompatAPIKey[i] + for j := range compat.Models { + m := compat.Models[j] + // Use alias as model ID, fallback to name if alias is empty + modelID := m.Alias + if modelID == "" { + modelID = m.Name + } + if modelID != "" { + allModels = append(allModels, &ModelInfo{ + ID: modelID, + Object: "model", + Created: time.Now().Unix(), + OwnedBy: "vertex-compat", + Type: "vertex-compat", + DisplayName: m.Name, + }) + } + } + } + models = allModels + } + case "gemini-cli": models = registry.GetGeminiCLIModels() models = applyExcludedModels(models, excluded) From 0acd9d98dc6406a490d18ab8e8a4fdb418b05817 Mon Sep 17 00:00:00 2001 From: Aero Date: Sun, 30 Nov 2025 16:22:19 +0800 Subject: [PATCH 06/10] fix: openAI-compat -> vertexAI-compat --- sdk/cliproxy/service.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/cliproxy/service.go b/sdk/cliproxy/service.go index 1383c480..facd24aa 100644 --- a/sdk/cliproxy/service.go +++ b/sdk/cliproxy/service.go @@ -324,7 +324,7 @@ func openAICompatInfoFromAuth(a *coreauth.Auth) (providerKey string, compatName if len(a.Attributes) > 0 { providerKey = strings.TrimSpace(a.Attributes["provider_key"]) compatName = strings.TrimSpace(a.Attributes["compat_name"]) - if providerKey != "" || compatName != "" { + if compatName != "" { if providerKey == "" { providerKey = compatName } From f9607c448df312d82efdd3127f99d73211529a23 Mon Sep 17 00:00:00 2001 From: Aero Date: Sun, 30 Nov 2025 16:45:16 +0800 Subject: [PATCH 07/10] fix: remove duplicated logic --- internal/watcher/watcher.go | 29 +---------------------------- 1 file changed, 1 insertion(+), 28 deletions(-) diff --git a/internal/watcher/watcher.go b/internal/watcher/watcher.go index a4ec2aea..6ecf88a3 100644 --- a/internal/watcher/watcher.go +++ b/internal/watcher/watcher.go @@ -1087,34 +1087,7 @@ func (w *Watcher) SnapshotCoreAuths() []*coreauth.Auth { applyAuthExcludedModelsMeta(a, cfg, entry.ExcludedModels, "apikey") out = append(out, a) } - // Vertex-compatible API keys -> synthesize auths - for i := range cfg.VertexCompatAPIKey { - entry := cfg.VertexCompatAPIKey[i] - key := strings.TrimSpace(entry.APIKey) - base := strings.TrimSpace(entry.BaseURL) - if key == "" || base == "" { - continue - } - proxyURL := strings.TrimSpace(entry.ProxyURL) - id, token := idGen.next("vertex-compat:apikey", key, base) - attrs := map[string]string{ - "source": fmt.Sprintf("config:vertex-compat[%s]", token), - "api_key": key, - "base_url": base, - } - addConfigHeadersToAttrs(entry.Headers, attrs) - a := &coreauth.Auth{ - ID: id, - Provider: "vertex-compat", - Label: fmt.Sprintf("vertex-compat-%s", token), - Status: coreauth.StatusActive, - ProxyURL: proxyURL, - Attributes: attrs, - CreatedAt: now, - UpdatedAt: now, - } - out = append(out, a) - } + // Claude API keys -> synthesize auths for i := range cfg.ClaudeKey { ck := cfg.ClaudeKey[i] From c4d45fff09df13db2b98af0c1eece6545837324f Mon Sep 17 00:00:00 2001 From: Aero Date: Sun, 30 Nov 2025 16:55:18 +0800 Subject: [PATCH 08/10] fix: remove unused code --- sdk/auth/vertex_compat.go | 94 --------------------------------------- 1 file changed, 94 deletions(-) delete mode 100644 sdk/auth/vertex_compat.go diff --git a/sdk/auth/vertex_compat.go b/sdk/auth/vertex_compat.go deleted file mode 100644 index 66dff632..00000000 --- a/sdk/auth/vertex_compat.go +++ /dev/null @@ -1,94 +0,0 @@ -package auth - -import ( - "context" - "fmt" - "strings" - - "github.com/router-for-me/CLIProxyAPI/v6/internal/config" - coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" -) - -// LoadVertexCompatCredentials loads Vertex AI-compatible API key credentials from config -// into the auth manager. Each entry becomes an in-memory auth entry with API key and -// custom headers stored in attributes. -func LoadVertexCompatCredentials(ctx context.Context, cfg *config.Config, mgr *coreauth.Manager) error { - if cfg == nil || mgr == nil { - return nil - } - if len(cfg.VertexCompatAPIKey) == 0 { - return nil - } - - for i, entry := range cfg.VertexCompatAPIKey { - apiKey := strings.TrimSpace(entry.APIKey) - baseURL := strings.TrimSpace(entry.BaseURL) - - if apiKey == "" || baseURL == "" { - continue - } - - // Create unique ID from index and base URL - id := fmt.Sprintf("vertex-compat-%d-%s", i, sanitizeForID(baseURL)) - label := fmt.Sprintf("Vertex-Compat (%s)", extractDomain(baseURL)) - - // Build attributes map - attrs := make(map[string]string) - attrs["api_key"] = apiKey - attrs["base_url"] = baseURL - - // Add proxy URL if specified - if entry.ProxyURL != "" { - attrs["proxy_url"] = strings.TrimSpace(entry.ProxyURL) - } - - // Copy custom headers to attributes with "header_" prefix - for k, v := range entry.Headers { - headerKey := "header_" + strings.ToLower(strings.TrimSpace(k)) - attrs[headerKey] = strings.TrimSpace(v) - } - - auth := &coreauth.Auth{ - ID: id, - Provider: "vertex-compat", - Label: label, - Attributes: attrs, - Metadata: make(map[string]any), - } - - if _, err := mgr.Register(ctx, auth); err != nil { - return fmt.Errorf("failed to register vertex-compat credential %d: %w", i, err) - } - } - - return nil -} - -// sanitizeForID creates a safe ID component from a URL. -func sanitizeForID(url string) string { - // Remove https:// and http:// - clean := strings.TrimPrefix(url, "https://") - clean = strings.TrimPrefix(clean, "http://") - // Replace non-alphanumeric with dash - clean = strings.Map(func(r rune) rune { - if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') { - return r - } - return '-' - }, clean) - // Limit length - if len(clean) > 30 { - clean = clean[:30] - } - return strings.Trim(clean, "-") -} - -// extractDomain extracts the domain from a URL for display purposes. -func extractDomain(url string) string { - clean := strings.TrimPrefix(url, "https://") - clean = strings.TrimPrefix(clean, "http://") - if idx := strings.Index(clean, "/"); idx > 0 { - clean = clean[:idx] - } - return clean -} From bcff0533734069ee1bb8582314fc3a044d47075c Mon Sep 17 00:00:00 2001 From: Aero Date: Sun, 30 Nov 2025 23:18:02 +0800 Subject: [PATCH 09/10] chore: update gitignore --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index ef2d935a..9e730c98 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ pgstore/* gitstore/* objectstore/* static/* +refs/* # Authentication data auths/* @@ -30,3 +31,7 @@ GEMINI.md .vscode/* .claude/* .serena/* + +# macOS +.DS_Store +._* From 0fdce3e110a538a13cef6c9ea3a4353f323d4bd7 Mon Sep 17 00:00:00 2001 From: Aero Date: Sun, 30 Nov 2025 23:12:13 +0800 Subject: [PATCH 10/10] feat: consolidate Vertex AI compatibility with API key support in Gemini Vertex executor --- .../executor/gemini_vertex_executor.go | 481 ++++++++++++++++-- .../executor/vertex_compat_executor.go | 375 -------------- sdk/cliproxy/service.go | 2 +- 3 files changed, 426 insertions(+), 432 deletions(-) delete mode 100644 internal/runtime/executor/vertex_compat_executor.go diff --git a/internal/runtime/executor/gemini_vertex_executor.go b/internal/runtime/executor/gemini_vertex_executor.go index bd4242a1..eeb7356e 100644 --- a/internal/runtime/executor/gemini_vertex_executor.go +++ b/internal/runtime/executor/gemini_vertex_executor.go @@ -44,6 +44,22 @@ func NewGeminiVertexExecutor(cfg *config.Config) *GeminiVertexExecutor { // Identifier returns provider key for manager routing. func (e *GeminiVertexExecutor) Identifier() string { return "vertex" } +// GeminiVertexCompatExecutor is a thin wrapper around GeminiVertexExecutor +// that provides the correct identifier for vertex-compat routing. +type GeminiVertexCompatExecutor struct { + *GeminiVertexExecutor +} + +// NewGeminiVertexCompatExecutor constructs the Vertex-compatible executor. +func NewGeminiVertexCompatExecutor(cfg *config.Config) *GeminiVertexCompatExecutor { + return &GeminiVertexCompatExecutor{ + GeminiVertexExecutor: NewGeminiVertexExecutor(cfg), + } +} + +// Identifier returns provider key for manager routing. +func (e *GeminiVertexCompatExecutor) Identifier() string { return "vertex-compat" } + // PrepareRequest is a no-op for Vertex. func (e *GeminiVertexExecutor) PrepareRequest(_ *http.Request, _ *cliproxyauth.Auth) error { return nil @@ -51,11 +67,238 @@ func (e *GeminiVertexExecutor) PrepareRequest(_ *http.Request, _ *cliproxyauth.A // Execute handles non-streaming requests. func (e *GeminiVertexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { - projectID, location, saJSON, errCreds := vertexCreds(auth) - if errCreds != nil { - return resp, errCreds + // Try API key authentication first + apiKey, baseURL := vertexAPICreds(auth) + + // If no API key found, fall back to service account authentication + if apiKey == "" { + projectID, location, saJSON, errCreds := vertexCreds(auth) + if errCreds != nil { + return resp, errCreds + } + return e.executeWithServiceAccount(ctx, auth, req, opts, projectID, location, saJSON) + } + + // Use API key authentication + return e.executeWithAPIKey(ctx, auth, req, opts, apiKey, baseURL) +} + +// ExecuteStream handles SSE streaming for Vertex. +func (e *GeminiVertexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) { + // Try API key authentication first + apiKey, baseURL := vertexAPICreds(auth) + + // If no API key found, fall back to service account authentication + if apiKey == "" { + projectID, location, saJSON, errCreds := vertexCreds(auth) + if errCreds != nil { + return nil, errCreds + } + return e.executeStreamWithServiceAccount(ctx, auth, req, opts, projectID, location, saJSON) + } + + // Use API key authentication + return e.executeStreamWithAPIKey(ctx, auth, req, opts, apiKey, baseURL) +} + +// CountTokens calls Vertex countTokens endpoint. +func (e *GeminiVertexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { + // Try API key authentication first + apiKey, baseURL := vertexAPICreds(auth) + + // If no API key found, fall back to service account authentication + if apiKey == "" { + projectID, location, saJSON, errCreds := vertexCreds(auth) + if errCreds != nil { + return cliproxyexecutor.Response{}, errCreds + } + return e.countTokensWithServiceAccount(ctx, auth, req, opts, projectID, location, saJSON) + } + + // Use API key authentication + return e.countTokensWithAPIKey(ctx, auth, req, opts, apiKey, baseURL) +} + +// countTokensWithServiceAccount handles token counting using service account credentials. +func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (cliproxyexecutor.Response, error) { + from := opts.SourceFormat + to := sdktranslator.FromString("gemini") + translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) + if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) { + if budgetOverride != nil { + norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + budgetOverride = &norm + } + translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride) + } + translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq) + translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq) + respCtx := context.WithValue(ctx, "alt", opts.Alt) + translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools") + translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig") + translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings") + + baseURL := vertexBaseURL(location) + url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, "countTokens") + + httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq)) + if errNewReq != nil { + return cliproxyexecutor.Response{}, errNewReq + } + httpReq.Header.Set("Content-Type", "application/json") + if token, errTok := vertexAccessToken(ctx, e.cfg, auth, saJSON); errTok == nil && token != "" { + httpReq.Header.Set("Authorization", "Bearer "+token) + } else if errTok != nil { + log.Errorf("vertex executor: access token error: %v", errTok) + return cliproxyexecutor.Response{}, statusErr{code: 500, msg: "internal server error"} + } + applyGeminiHeaders(httpReq, auth) + + var authID, authLabel, authType, authValue string + if auth != nil { + authID = auth.ID + authLabel = auth.Label + authType, authValue = auth.AccountInfo() + } + recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + URL: url, + Method: http.MethodPost, + Headers: httpReq.Header.Clone(), + Body: translatedReq, + Provider: e.Identifier(), + AuthID: authID, + AuthLabel: authLabel, + AuthType: authType, + AuthValue: authValue, + }) + + httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpResp, errDo := httpClient.Do(httpReq) + if errDo != nil { + recordAPIResponseError(ctx, e.cfg, errDo) + return cliproxyexecutor.Response{}, errDo + } + defer func() { + if errClose := httpResp.Body.Close(); errClose != nil { + log.Errorf("vertex executor: close response body error: %v", errClose) + } + }() + recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { + b, _ := io.ReadAll(httpResp.Body) + appendAPIResponseChunk(ctx, e.cfg, b) + log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) + return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(b)} } + data, errRead := io.ReadAll(httpResp.Body) + if errRead != nil { + recordAPIResponseError(ctx, e.cfg, errRead) + return cliproxyexecutor.Response{}, errRead + } + appendAPIResponseChunk(ctx, e.cfg, data) + if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { + log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data)) + return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(data)} + } + count := gjson.GetBytes(data, "totalTokens").Int() + out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data) + return cliproxyexecutor.Response{Payload: []byte(out)}, nil +} + +// countTokensWithAPIKey handles token counting using API key credentials. +func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (cliproxyexecutor.Response, error) { + from := opts.SourceFormat + to := sdktranslator.FromString("gemini") + translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) + if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) { + if budgetOverride != nil { + norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + budgetOverride = &norm + } + translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride) + } + translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq) + translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq) + respCtx := context.WithValue(ctx, "alt", opts.Alt) + translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools") + translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig") + translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings") + + // For API key auth, use simpler URL format without project/location + if baseURL == "" { + baseURL = "https://generativelanguage.googleapis.com" + } + url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, req.Model, "countTokens") + + httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq)) + if errNewReq != nil { + return cliproxyexecutor.Response{}, errNewReq + } + httpReq.Header.Set("Content-Type", "application/json") + if apiKey != "" { + httpReq.Header.Set("x-goog-api-key", apiKey) + } + applyGeminiHeaders(httpReq, auth) + + var authID, authLabel, authType, authValue string + if auth != nil { + authID = auth.ID + authLabel = auth.Label + authType, authValue = auth.AccountInfo() + } + recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + URL: url, + Method: http.MethodPost, + Headers: httpReq.Header.Clone(), + Body: translatedReq, + Provider: e.Identifier(), + AuthID: authID, + AuthLabel: authLabel, + AuthType: authType, + AuthValue: authValue, + }) + httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpResp, errDo := httpClient.Do(httpReq) + if errDo != nil { + recordAPIResponseError(ctx, e.cfg, errDo) + return cliproxyexecutor.Response{}, errDo + } + defer func() { + if errClose := httpResp.Body.Close(); errClose != nil { + log.Errorf("vertex executor: close response body error: %v", errClose) + } + }() + recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { + b, _ := io.ReadAll(httpResp.Body) + appendAPIResponseChunk(ctx, e.cfg, b) + log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) + return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(b)} + } + data, errRead := io.ReadAll(httpResp.Body) + if errRead != nil { + recordAPIResponseError(ctx, e.cfg, errRead) + return cliproxyexecutor.Response{}, errRead + } + appendAPIResponseChunk(ctx, e.cfg, data) + if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { + log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data)) + return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(data)} + } + count := gjson.GetBytes(data, "totalTokens").Int() + out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data) + return cliproxyexecutor.Response{Payload: []byte(out)}, nil +} + +// Refresh is a no-op for service account based credentials. +func (e *GeminiVertexExecutor) Refresh(_ context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) { + return auth, nil +} + +// executeWithServiceAccount handles authentication using service account credentials. +// This method contains the original service account authentication logic. +func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (resp cliproxyexecutor.Response, err error) { reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) @@ -149,13 +392,105 @@ func (e *GeminiVertexExecutor) Execute(ctx context.Context, auth *cliproxyauth.A return resp, nil } -// ExecuteStream handles SSE streaming for Vertex. -func (e *GeminiVertexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) { - projectID, location, saJSON, errCreds := vertexCreds(auth) - if errCreds != nil { - return nil, errCreds +// executeWithAPIKey handles authentication using API key credentials. +// This method follows the vertex-compat pattern for API key authentication. +func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (resp cliproxyexecutor.Response, err error) { + reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + defer reporter.trackFailure(ctx, &err) + + from := opts.SourceFormat + to := sdktranslator.FromString("gemini") + body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) + if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) { + if budgetOverride != nil { + norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + budgetOverride = &norm + } + body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) + } + body = util.StripThinkingConfigIfUnsupported(req.Model, body) + body = fixGeminiImageAspectRatio(req.Model, body) + body = applyPayloadConfig(e.cfg, req.Model, body) + + action := "generateContent" + if req.Metadata != nil { + if a, _ := req.Metadata["action"].(string); a == "countTokens" { + action = "countTokens" + } + } + + // For API key auth, use simpler URL format without project/location + if baseURL == "" { + baseURL = "https://generativelanguage.googleapis.com" + } + url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, req.Model, action) + if opts.Alt != "" && action != "countTokens" { + url = url + fmt.Sprintf("?$alt=%s", opts.Alt) + } + body, _ = sjson.DeleteBytes(body, "session_id") + + httpReq, errNewReq := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) + if errNewReq != nil { + return resp, errNewReq + } + httpReq.Header.Set("Content-Type", "application/json") + if apiKey != "" { + httpReq.Header.Set("x-goog-api-key", apiKey) + } + applyGeminiHeaders(httpReq, auth) + + var authID, authLabel, authType, authValue string + if auth != nil { + authID = auth.ID + authLabel = auth.Label + authType, authValue = auth.AccountInfo() + } + recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + URL: url, + Method: http.MethodPost, + Headers: httpReq.Header.Clone(), + Body: body, + Provider: e.Identifier(), + AuthID: authID, + AuthLabel: authLabel, + AuthType: authType, + AuthValue: authValue, + }) + + httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpResp, errDo := httpClient.Do(httpReq) + if errDo != nil { + recordAPIResponseError(ctx, e.cfg, errDo) + return resp, errDo } + defer func() { + if errClose := httpResp.Body.Close(); errClose != nil { + log.Errorf("vertex executor: close response body error: %v", errClose) + } + }() + recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { + b, _ := io.ReadAll(httpResp.Body) + appendAPIResponseChunk(ctx, e.cfg, b) + log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) + err = statusErr{code: httpResp.StatusCode, msg: string(b)} + return resp, err + } + data, errRead := io.ReadAll(httpResp.Body) + if errRead != nil { + recordAPIResponseError(ctx, e.cfg, errRead) + return resp, errRead + } + appendAPIResponseChunk(ctx, e.cfg, data) + reporter.publish(ctx, parseGeminiUsage(data)) + var param any + out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, ¶m) + resp = cliproxyexecutor.Response{Payload: []byte(out)} + return resp, nil +} +// executeStreamWithServiceAccount handles streaming authentication using service account credentials. +func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (stream <-chan cliproxyexecutor.StreamChunk, err error) { reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) @@ -266,42 +601,44 @@ func (e *GeminiVertexExecutor) ExecuteStream(ctx context.Context, auth *cliproxy return stream, nil } -// CountTokens calls Vertex countTokens endpoint. -func (e *GeminiVertexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { - projectID, location, saJSON, errCreds := vertexCreds(auth) - if errCreds != nil { - return cliproxyexecutor.Response{}, errCreds - } +// executeStreamWithAPIKey handles streaming authentication using API key credentials. +func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (stream <-chan cliproxyexecutor.StreamChunk, err error) { + reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + defer reporter.trackFailure(ctx, &err) + from := opts.SourceFormat to := sdktranslator.FromString("gemini") - translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) + body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) { if budgetOverride != nil { norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) budgetOverride = &norm } - translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride) + body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) } - translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq) - translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq) - respCtx := context.WithValue(ctx, "alt", opts.Alt) - translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools") - translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig") - translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings") + body = util.StripThinkingConfigIfUnsupported(req.Model, body) + body = fixGeminiImageAspectRatio(req.Model, body) + body = applyPayloadConfig(e.cfg, req.Model, body) - baseURL := vertexBaseURL(location) - url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, "countTokens") + // For API key auth, use simpler URL format without project/location + if baseURL == "" { + baseURL = "https://generativelanguage.googleapis.com" + } + url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, req.Model, "streamGenerateContent") + if opts.Alt == "" { + url = url + "?alt=sse" + } else { + url = url + fmt.Sprintf("?$alt=%s", opts.Alt) + } + body, _ = sjson.DeleteBytes(body, "session_id") - httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq)) + httpReq, errNewReq := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) if errNewReq != nil { - return cliproxyexecutor.Response{}, errNewReq + return nil, errNewReq } httpReq.Header.Set("Content-Type", "application/json") - if token, errTok := vertexAccessToken(ctx, e.cfg, auth, saJSON); errTok == nil && token != "" { - httpReq.Header.Set("Authorization", "Bearer "+token) - } else if errTok != nil { - log.Errorf("vertex executor: access token error: %v", errTok) - return cliproxyexecutor.Response{}, statusErr{code: 500, msg: "internal server error"} + if apiKey != "" { + httpReq.Header.Set("x-goog-api-key", apiKey) } applyGeminiHeaders(httpReq, auth) @@ -315,7 +652,7 @@ func (e *GeminiVertexExecutor) CountTokens(ctx context.Context, auth *cliproxyau URL: url, Method: http.MethodPost, Headers: httpReq.Header.Clone(), - Body: translatedReq, + Body: body, Provider: e.Identifier(), AuthID: authID, AuthLabel: authLabel, @@ -327,38 +664,53 @@ func (e *GeminiVertexExecutor) CountTokens(ctx context.Context, auth *cliproxyau httpResp, errDo := httpClient.Do(httpReq) if errDo != nil { recordAPIResponseError(ctx, e.cfg, errDo) - return cliproxyexecutor.Response{}, errDo + return nil, errDo } - defer func() { - if errClose := httpResp.Body.Close(); errClose != nil { - log.Errorf("vertex executor: close response body error: %v", errClose) - } - }() recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { b, _ := io.ReadAll(httpResp.Body) appendAPIResponseChunk(ctx, e.cfg, b) log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) - return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(b)} - } - data, errRead := io.ReadAll(httpResp.Body) - if errRead != nil { - recordAPIResponseError(ctx, e.cfg, errRead) - return cliproxyexecutor.Response{}, errRead - } - appendAPIResponseChunk(ctx, e.cfg, data) - if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { - log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data)) - return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(data)} + if errClose := httpResp.Body.Close(); errClose != nil { + log.Errorf("vertex executor: close response body error: %v", errClose) + } + return nil, statusErr{code: httpResp.StatusCode, msg: string(b)} } - count := gjson.GetBytes(data, "totalTokens").Int() - out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data) - return cliproxyexecutor.Response{Payload: []byte(out)}, nil -} -// Refresh is a no-op for service account based credentials. -func (e *GeminiVertexExecutor) Refresh(_ context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) { - return auth, nil + out := make(chan cliproxyexecutor.StreamChunk) + stream = out + go func() { + defer close(out) + defer func() { + if errClose := httpResp.Body.Close(); errClose != nil { + log.Errorf("vertex executor: close response body error: %v", errClose) + } + }() + scanner := bufio.NewScanner(httpResp.Body) + scanner.Buffer(nil, 20_971_520) + var param any + for scanner.Scan() { + line := scanner.Bytes() + appendAPIResponseChunk(ctx, e.cfg, line) + if detail, ok := parseGeminiStreamUsage(line); ok { + reporter.publish(ctx, detail) + } + lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone(line), ¶m) + for i := range lines { + out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])} + } + } + lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, []byte("[DONE]"), ¶m) + for i := range lines { + out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])} + } + if errScan := scanner.Err(); errScan != nil { + recordAPIResponseError(ctx, e.cfg, errScan) + reporter.publishFailure(ctx) + out <- cliproxyexecutor.StreamChunk{Err: errScan} + } + }() + return stream, nil } // vertexCreds extracts project, location and raw service account JSON from auth metadata. @@ -401,6 +753,23 @@ func vertexCreds(a *cliproxyauth.Auth) (projectID, location string, serviceAccou return projectID, location, saJSON, nil } +// vertexAPICreds extracts API key and base URL from auth attributes following the claudeCreds pattern. +func vertexAPICreds(a *cliproxyauth.Auth) (apiKey, baseURL string) { + if a == nil { + return "", "" + } + if a.Attributes != nil { + apiKey = a.Attributes["api_key"] + baseURL = a.Attributes["base_url"] + } + if apiKey == "" && a.Metadata != nil { + if v, ok := a.Metadata["access_token"].(string); ok { + apiKey = v + } + } + return +} + func vertexBaseURL(location string) string { loc := strings.TrimSpace(location) if loc == "" { diff --git a/internal/runtime/executor/vertex_compat_executor.go b/internal/runtime/executor/vertex_compat_executor.go deleted file mode 100644 index b513f94d..00000000 --- a/internal/runtime/executor/vertex_compat_executor.go +++ /dev/null @@ -1,375 +0,0 @@ -// Package executor provides runtime execution capabilities for various AI service providers. -package executor - -import ( - "bufio" - "bytes" - "context" - "fmt" - "io" - "net/http" - "strings" - - "github.com/router-for-me/CLIProxyAPI/v6/internal/config" - "github.com/router-for-me/CLIProxyAPI/v6/internal/util" - cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" - cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" - sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator" - log "github.com/sirupsen/logrus" - "github.com/tidwall/gjson" - "github.com/tidwall/sjson" -) - -const ( - // vertexCompatAPIVersion is the API version for Vertex-compatible endpoints. - vertexCompatAPIVersion = "v1" -) - -// VertexCompatExecutor is a stateless executor for Vertex AI-compatible APIs -// that use Vertex-style paths (/publishers/google/models/{model}:action) -// but authenticate with simple API keys instead of Google Cloud service accounts. -// -// This executor supports third-party providers like zenmux.ai that mimic -// Vertex AI's URL structure while using simpler authentication mechanisms. -type VertexCompatExecutor struct { - cfg *config.Config -} - -// NewVertexCompatExecutor creates a new Vertex-compatible executor instance. -func NewVertexCompatExecutor(cfg *config.Config) *VertexCompatExecutor { - return &VertexCompatExecutor{cfg: cfg} -} - -// Identifier returns the executor identifier for routing. -func (e *VertexCompatExecutor) Identifier() string { return "vertex-compat" } - -// PrepareRequest is a no-op for API key based authentication. -func (e *VertexCompatExecutor) PrepareRequest(_ *http.Request, _ *cliproxyauth.Auth) error { - return nil -} - -// Execute performs a non-streaming request to the Vertex-compatible API. -func (e *VertexCompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { - apiKey, baseURL := vertexCompatCreds(auth) - - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) - defer reporter.trackFailure(ctx, &err) - - from := opts.SourceFormat - to := sdktranslator.FromString("gemini") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - - // Apply thinking config if supported - if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) { - if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) - budgetOverride = &norm - } - body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) - } - body = util.StripThinkingConfigIfUnsupported(req.Model, body) - body = applyPayloadConfig(e.cfg, req.Model, body) - - action := "generateContent" - if req.Metadata != nil { - if a, _ := req.Metadata["action"].(string); a == "countTokens" { - action = "countTokens" - } - } - - // Construct Vertex-style URL: {baseURL}/v1/publishers/google/models/{model}:action - url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexCompatAPIVersion, req.Model, action) - if opts.Alt != "" && action != "countTokens" { - url = url + fmt.Sprintf("?$alt=%s", opts.Alt) - } - body, _ = sjson.DeleteBytes(body, "session_id") - - httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) - if err != nil { - return resp, err - } - httpReq.Header.Set("Content-Type", "application/json") - if apiKey != "" { - httpReq.Header.Set("x-goog-api-key", apiKey) - } - applyVertexCompatHeaders(httpReq, auth) - - var authID, authLabel, authType, authValue string - if auth != nil { - authID = auth.ID - authLabel = auth.Label - authType, authValue = auth.AccountInfo() - } - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ - URL: url, - Method: http.MethodPost, - Headers: httpReq.Header.Clone(), - Body: body, - Provider: e.Identifier(), - AuthID: authID, - AuthLabel: authLabel, - AuthType: authType, - AuthValue: authValue, - }) - - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) - httpResp, err := httpClient.Do(httpReq) - if err != nil { - recordAPIResponseError(ctx, e.cfg, err) - return resp, err - } - defer func() { - if errClose := httpResp.Body.Close(); errClose != nil { - log.Errorf("vertex-compat executor: close response body error: %v", errClose) - } - }() - recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) - - if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { - b, _ := io.ReadAll(httpResp.Body) - appendAPIResponseChunk(ctx, e.cfg, b) - log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) - err = statusErr{code: httpResp.StatusCode, msg: string(b)} - return resp, err - } - - data, err := io.ReadAll(httpResp.Body) - if err != nil { - recordAPIResponseError(ctx, e.cfg, err) - return resp, err - } - appendAPIResponseChunk(ctx, e.cfg, data) - reporter.publish(ctx, parseGeminiUsage(data)) - - var param any - out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, ¶m) - resp = cliproxyexecutor.Response{Payload: []byte(out)} - return resp, nil -} - -// ExecuteStream handles SSE streaming for Vertex-compatible APIs. -func (e *VertexCompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) { - apiKey, baseURL := vertexCompatCreds(auth) - - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) - defer reporter.trackFailure(ctx, &err) - - from := opts.SourceFormat - to := sdktranslator.FromString("gemini") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - - // Apply thinking config if supported - if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) { - if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) - budgetOverride = &norm - } - body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) - } - body = util.StripThinkingConfigIfUnsupported(req.Model, body) - body = applyPayloadConfig(e.cfg, req.Model, body) - - // Construct Vertex-style streaming URL - url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexCompatAPIVersion, req.Model, "streamGenerateContent") - if opts.Alt == "" { - url = url + "?alt=sse" - } else { - url = url + fmt.Sprintf("?$alt=%s", opts.Alt) - } - body, _ = sjson.DeleteBytes(body, "session_id") - - httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) - if err != nil { - return nil, err - } - httpReq.Header.Set("Content-Type", "application/json") - if apiKey != "" { - httpReq.Header.Set("x-goog-api-key", apiKey) - } - applyVertexCompatHeaders(httpReq, auth) - - var authID, authLabel, authType, authValue string - if auth != nil { - authID = auth.ID - authLabel = auth.Label - authType, authValue = auth.AccountInfo() - } - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ - URL: url, - Method: http.MethodPost, - Headers: httpReq.Header.Clone(), - Body: body, - Provider: e.Identifier(), - AuthID: authID, - AuthLabel: authLabel, - AuthType: authType, - AuthValue: authValue, - }) - - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) - httpResp, err := httpClient.Do(httpReq) - if err != nil { - recordAPIResponseError(ctx, e.cfg, err) - return nil, err - } - recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) - - if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { - b, _ := io.ReadAll(httpResp.Body) - appendAPIResponseChunk(ctx, e.cfg, b) - log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) - if errClose := httpResp.Body.Close(); errClose != nil { - log.Errorf("vertex-compat executor: close response body error: %v", errClose) - } - return nil, statusErr{code: httpResp.StatusCode, msg: string(b)} - } - - out := make(chan cliproxyexecutor.StreamChunk) - stream = out - go func() { - defer close(out) - defer func() { - if errClose := httpResp.Body.Close(); errClose != nil { - log.Errorf("vertex-compat executor: close response body error: %v", errClose) - } - }() - scanner := bufio.NewScanner(httpResp.Body) - scanner.Buffer(nil, 20_971_520) - var param any - for scanner.Scan() { - line := scanner.Bytes() - appendAPIResponseChunk(ctx, e.cfg, line) - filtered := FilterSSEUsageMetadata(line) - payload := jsonPayload(filtered) - if len(payload) == 0 { - continue - } - if detail, ok := parseGeminiStreamUsage(payload); ok { - reporter.publish(ctx, detail) - } - lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone(payload), ¶m) - for i := range lines { - out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])} - } - } - lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone([]byte("[DONE]")), ¶m) - for i := range lines { - out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])} - } - if errScan := scanner.Err(); errScan != nil { - recordAPIResponseError(ctx, e.cfg, errScan) - reporter.publishFailure(ctx) - out <- cliproxyexecutor.StreamChunk{Err: errScan} - } - }() - return stream, nil -} - -// CountTokens calls the Vertex-compatible countTokens endpoint. -func (e *VertexCompatExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { - apiKey, baseURL := vertexCompatCreds(auth) - - from := opts.SourceFormat - to := sdktranslator.FromString("gemini") - translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - - if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) { - if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) - budgetOverride = &norm - } - translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride) - } - translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq) - - respCtx := ctx - if opts.Alt != "" { - respCtx = context.WithValue(ctx, "alt", opts.Alt) - } - translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools") - translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig") - translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings") - - url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexCompatAPIVersion, req.Model, "countTokens") - - httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translatedReq)) - if err != nil { - return cliproxyexecutor.Response{}, err - } - httpReq.Header.Set("Content-Type", "application/json") - if apiKey != "" { - httpReq.Header.Set("x-goog-api-key", apiKey) - } - applyVertexCompatHeaders(httpReq, auth) - - var authID, authLabel, authType, authValue string - if auth != nil { - authID = auth.ID - authLabel = auth.Label - authType, authValue = auth.AccountInfo() - } - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ - URL: url, - Method: http.MethodPost, - Headers: httpReq.Header.Clone(), - Body: translatedReq, - Provider: e.Identifier(), - AuthID: authID, - AuthLabel: authLabel, - AuthType: authType, - AuthValue: authValue, - }) - - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) - resp, err := httpClient.Do(httpReq) - if err != nil { - recordAPIResponseError(ctx, e.cfg, err) - return cliproxyexecutor.Response{}, err - } - defer func() { _ = resp.Body.Close() }() - recordAPIResponseMetadata(ctx, e.cfg, resp.StatusCode, resp.Header.Clone()) - - data, err := io.ReadAll(resp.Body) - if err != nil { - recordAPIResponseError(ctx, e.cfg, err) - return cliproxyexecutor.Response{}, err - } - appendAPIResponseChunk(ctx, e.cfg, data) - - if resp.StatusCode < 200 || resp.StatusCode >= 300 { - log.Debugf("request error, error status: %d, error body: %s", resp.StatusCode, summarizeErrorBody(resp.Header.Get("Content-Type"), data)) - return cliproxyexecutor.Response{}, statusErr{code: resp.StatusCode, msg: string(data)} - } - - count := gjson.GetBytes(data, "totalTokens").Int() - translated := sdktranslator.TranslateTokenCount(respCtx, to, from, count, data) - return cliproxyexecutor.Response{Payload: []byte(translated)}, nil -} - -// Refresh is a no-op for API key based authentication. -func (e *VertexCompatExecutor) Refresh(_ context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) { - return auth, nil -} - -// vertexCompatCreds extracts API key and base URL from auth attributes. -func vertexCompatCreds(a *cliproxyauth.Auth) (apiKey, baseURL string) { - if a == nil || a.Attributes == nil { - return "", "" - } - if v := a.Attributes["api_key"]; v != "" { - apiKey = v - } - if v := a.Attributes["base_url"]; v != "" { - baseURL = strings.TrimRight(strings.TrimSpace(v), "/") - } - return -} - -// applyVertexCompatHeaders applies custom headers from auth attributes. -func applyVertexCompatHeaders(req *http.Request, auth *cliproxyauth.Auth) { - var attrs map[string]string - if auth != nil { - attrs = auth.Attributes - } - util.ApplyCustomHeadersFromAttrs(req, attrs) -} diff --git a/sdk/cliproxy/service.go b/sdk/cliproxy/service.go index facd24aa..f0b6bf53 100644 --- a/sdk/cliproxy/service.go +++ b/sdk/cliproxy/service.go @@ -363,7 +363,7 @@ func (s *Service) ensureExecutorsForAuth(a *coreauth.Auth) { case "vertex": s.coreManager.RegisterExecutor(executor.NewGeminiVertexExecutor(s.cfg)) case "vertex-compat": - s.coreManager.RegisterExecutor(executor.NewVertexCompatExecutor(s.cfg)) + s.coreManager.RegisterExecutor(executor.NewGeminiVertexCompatExecutor(s.cfg)) case "gemini-cli": s.coreManager.RegisterExecutor(executor.NewGeminiCLIExecutor(s.cfg)) case "aistudio":