From 42313cbfee10b7def4947f5f03ad13f7a3f769f8 Mon Sep 17 00:00:00 2001 From: doubao-backup Date: Thu, 23 Apr 2026 13:03:01 +0800 Subject: [PATCH] fix: adapt to ChatGPT API v2 metadata format + add Turnstile solver support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Problem ChatGPT upstream API changed the metadata format for image generation tool messages: - Old format: `metadata.async_task_type = "image_gen"` - New format: `metadata.image_gen_title = "..."` (final) or `metadata.async_source = "..."` (preview) This caused `ExtractImageToolMsgs` to match zero tool messages, making poll always timeout with no images returned. Additionally, the upstream now requires Turnstile challenge solving for the two-step chat-requirements flow (prepare → finalize). ## Changes ### Core fix: metadata compatibility (image.go) - `ExtractImageToolMsgs` now matches tool messages by any of: `async_task_type=image_gen` (old), `image_gen_title` (new final), or `async_source` (new preview) ### Turnstile solver support - New `TurnstileSolver` interface signature: `Solve(ctx, dx, token)` adds the `token` (p_value) parameter needed for bytecode decryption - New `HTTPTurnstileSolver`: delegates to an external HTTP service - New config field: `upstream.turnstile_solver_url` - Wiring in `runner.go` and `main.go` - `ChatRequirementsPrepareResp.PToken` field to pass the requirements_token through to the solver ### Tuning - Poll interval: 3s → 15s (avoids 429 rate limiting from upstream) - Consecutive 429 tolerance: 3 → 10 - PerAttemptTimeout: 6min → 10min (image gen can take 3-5 min) - PollMaxWait: 300s → 480s --- cmd/server/main.go | 11 +++- internal/config/config.go | 1 + internal/image/runner.go | 24 +++++---- internal/upstream/chatgpt/client.go | 4 +- .../upstream/chatgpt/http_turnstile_solver.go | 50 +++++++++++++++++++ internal/upstream/chatgpt/image.go | 11 ++-- internal/upstream/chatgpt/pow.go | 2 +- 7 files changed, 85 insertions(+), 18 deletions(-) create mode 100644 internal/upstream/chatgpt/http_turnstile_solver.go diff --git a/cmd/server/main.go b/cmd/server/main.go index 7dda641..de3a044 100644 --- a/cmd/server/main.go +++ b/cmd/server/main.go @@ -21,7 +21,8 @@ import ( "github.com/432539/gpt2api/internal/config" "github.com/432539/gpt2api/internal/db" "github.com/432539/gpt2api/internal/gateway" - "github.com/432539/gpt2api/internal/image" + " "github.com/432539/gpt2api/internal/image" + "github.com/432539/gpt2api/internal/upstream/chatgpt"" modelpkg "github.com/432539/gpt2api/internal/model" "github.com/432539/gpt2api/internal/proxy" gwratelimit "github.com/432539/gpt2api/internal/ratelimit" @@ -142,7 +143,13 @@ func main() { } imageDAO := image.NewDAO(sqldb) - imageRunner := image.NewRunner(sched, imageDAO) + // Turnstile solver (optional): set upstream.turnstile_solver_url in config.yaml + var turnstileSolver chatgpt.TurnstileSolver + if cfg.Upstream.TurnstileSolverURL != "" { + turnstileSolver = chatgpt.NewHTTPTurnstileSolver(cfg.Upstream.TurnstileSolverURL) + log.Info("turnstile solver enabled", zap.String("url", cfg.Upstream.TurnstileSolverURL)) + } + imageRunner := image.NewRunner(sched, imageDAO, turnstileSolver) imagesH := &gateway.ImagesHandler{ Handler: gwH, Runner: imageRunner, diff --git a/internal/config/config.go b/internal/config/config.go index 9802881..2711242 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -78,6 +78,7 @@ type UpstreamConfig struct { BaseURL string `mapstructure:"base_url"` RequestTimeoutSec int `mapstructure:"request_timeout_sec"` SSEReadTimeoutSec int `mapstructure:"sse_read_timeout_sec"` + TurnstileSolverURL string `mapstructure:"turnstile_solver_url"` } // BackupConfig 数据库备份配置。 diff --git a/internal/image/runner.go b/internal/image/runner.go index 8734c08..d0541fd 100644 --- a/internal/image/runner.go +++ b/internal/image/runner.go @@ -23,13 +23,14 @@ import ( // IMG2 已正式上线,不再做"灰度命中判定 / preview_only 换账号重试"这些节流操作, // 拿到任意 file-service / sediment 引用即算成功,以速度和效率优先。 type Runner struct { - sched *scheduler.Scheduler - dao *DAO + sched *scheduler.Scheduler + dao *DAO + solver chatgpt.TurnstileSolver } // NewRunner 构造 Runner。 -func NewRunner(sched *scheduler.Scheduler, dao *DAO) *Runner { - return &Runner{sched: sched, dao: dao} +func NewRunner(sched *scheduler.Scheduler, dao *DAO, solver chatgpt.TurnstileSolver) *Runner { + return &Runner{sched: sched, dao: dao, solver: solver} } // ReferenceImage 是图生图/编辑的一张参考图输入。 @@ -77,10 +78,10 @@ func (r *Runner) Run(ctx context.Context, opt RunOptions) *RunResult { opt.MaxAttempts = 1 } if opt.PerAttemptTimeout <= 0 { - opt.PerAttemptTimeout = 6 * time.Minute + opt.PerAttemptTimeout = 10 * time.Minute } if opt.PollMaxWait <= 0 { - opt.PollMaxWait = 300 * time.Second + opt.PollMaxWait = 480 * time.Second } if opt.UpstreamModel == "" { // 对齐浏览器抓包 + 参考实现:图像走 f/conversation 时 model 字段和 @@ -173,11 +174,12 @@ func (r *Runner) runOnce(ctx context.Context, opt RunOptions, result *RunResult) // 2) 构造上游 client cli, err := chatgpt.New(chatgpt.Options{ - AuthToken: lease.AuthToken, - DeviceID: lease.DeviceID, - SessionID: lease.SessionID, - ProxyURL: lease.ProxyURL, - Cookies: "", // 目前不从 oai_account_cookies 加载,后续 M3+ 再做 + AuthToken: lease.AuthToken, + DeviceID: lease.DeviceID, + SessionID: lease.SessionID, + ProxyURL: lease.ProxyURL, + Cookies: "", // 目前不从 oai_account_cookies 加载,后续 M3+ 再做 + TurnstileSolver: r.solver, }) if err != nil { return false, ErrUnknown, fmt.Errorf("chatgpt client: %w", err) diff --git a/internal/upstream/chatgpt/client.go b/internal/upstream/chatgpt/client.go index 4556879..69bdfc4 100644 --- a/internal/upstream/chatgpt/client.go +++ b/internal/upstream/chatgpt/client.go @@ -346,6 +346,7 @@ func truncatePrefix(s string, n int) string { type ChatRequirementsPrepareResp struct { Persona string `json:"persona"` PrepareToken string `json:"prepare_token"` + PToken string `json:"-"` // local: the p_value / requirements_token used in prepare request Turnstile struct { Required bool `json:"required"` DX string `json:"dx"` @@ -384,6 +385,7 @@ func (c *Client) ChatRequirementsPrepare(ctx context.Context) (*ChatRequirements if err := json.Unmarshal(buf, &out); err != nil { return nil, fmt.Errorf("decode chat-requirements/prepare: %w", err) } + out.PToken = reqToken return &out, nil } @@ -476,7 +478,7 @@ func (c *Client) ChatRequirementsV2(ctx context.Context) (*ChatRequirementsResp, if c.opts.TurnstileSolver != nil { sCtx, cancel := context.WithTimeout(ctx, 30*time.Second) defer cancel() - out, solveErr := c.opts.TurnstileSolver.Solve(sCtx, prep.Turnstile.DX) + out, solveErr := c.opts.TurnstileSolver.Solve(sCtx, prep.Turnstile.DX, prep.PToken) if solveErr != nil || out == "" { if logger := loggerL(); logger != nil { logger.Warn("turnstile solver failed, fallback to single-step chat-requirements", diff --git a/internal/upstream/chatgpt/http_turnstile_solver.go b/internal/upstream/chatgpt/http_turnstile_solver.go new file mode 100644 index 0000000..279c2df --- /dev/null +++ b/internal/upstream/chatgpt/http_turnstile_solver.go @@ -0,0 +1,50 @@ +package chatgpt + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" +) + +// HTTPTurnstileSolver calls an external HTTP service to solve Turnstile challenges. +// The service should accept POST /solve with {"dx": "...", "p": "..."} and return +// {"token": "..."}. +type HTTPTurnstileSolver struct { + url string + client *http.Client +} + +// NewHTTPTurnstileSolver creates a solver that delegates to the given URL. +func NewHTTPTurnstileSolver(url string) *HTTPTurnstileSolver { + return &HTTPTurnstileSolver{url: url, client: &http.Client{}} +} + +func (s *HTTPTurnstileSolver) Solve(ctx context.Context, dx string, token string) (string, error) { + body, _ := json.Marshal(map[string]string{"dx": dx, "p": token}) + req, err := http.NewRequestWithContext(ctx, "POST", s.url, bytes.NewReader(body)) + if err != nil { + return "", err + } + req.Header.Set("Content-Type", "application/json") + + resp, err := s.client.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + buf, _ := io.ReadAll(resp.Body) + if resp.StatusCode != 200 { + return "", fmt.Errorf("turnstile solver HTTP %d: %s", resp.StatusCode, string(buf)) + } + + var result struct { + Token string `json:"token"` + } + if err := json.Unmarshal(buf, &result); err != nil { + return "", fmt.Errorf("decode solver response: %w", err) + } + return result.Token, nil +} diff --git a/internal/upstream/chatgpt/image.go b/internal/upstream/chatgpt/image.go index 33d140e..c8d1d00 100644 --- a/internal/upstream/chatgpt/image.go +++ b/internal/upstream/chatgpt/image.go @@ -433,7 +433,12 @@ func ExtractImageToolMsgs(mapping map[string]interface{}) []ImageToolMsg { if s, _ := author["role"].(string); s != "tool" { continue } - if s, _ := meta["async_task_type"].(string); s != "image_gen" { + // Match: old format (async_task_type=image_gen), new final (image_gen_title), + // or new preview (async_source without image_gen_title). + asyncType, _ := meta["async_task_type"].(string) + imgTitle, _ := meta["image_gen_title"].(string) + asyncSrc, _ := meta["async_source"].(string) + if asyncType != "image_gen" && imgTitle == "" && asyncSrc == "" { continue } if s, _ := content["content_type"].(string); s != "multimodal_text" { @@ -525,7 +530,7 @@ func (c *Client) PollConversationForImages(ctx context.Context, convID string, o opt.MaxWait = 300 * time.Second } if opt.Interval <= 0 { - opt.Interval = 3 * time.Second + opt.Interval = 15 * time.Second } baseline := opt.BaselineToolIDs @@ -551,7 +556,7 @@ func (c *Client) PollConversationForImages(ctx context.Context, convID string, o if err != nil { if ue, ok := err.(*UpstreamError); ok && ue.Status == 429 { consecutive429++ - if consecutive429 >= 3 { + if consecutive429 >= 10 { return PollStatusError, nil, nil } sleep(ctx, 10*time.Second) diff --git a/internal/upstream/chatgpt/pow.go b/internal/upstream/chatgpt/pow.go index 624ee2d..d096659 100644 --- a/internal/upstream/chatgpt/pow.go +++ b/internal/upstream/chatgpt/pow.go @@ -41,7 +41,7 @@ import ( // 没有 solver 时,Client.ChatRequirementsV2 会自动回退到老的单步 // chat-requirements 流程(Turnstile=true 直接忽略)。 type TurnstileSolver interface { - Solve(ctx context.Context, dx string) (string, error) + Solve(ctx context.Context, dx string, token string) (string, error) } const (