From 70dff41b708b5fe493b7c2a6b03325d6e49563d5 Mon Sep 17 00:00:00 2001 From: Awuqing <3184394176@qq.com> Date: Wed, 1 Apr 2026 18:35:26 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D:=20=E4=B8=8A=E4=BC=A0?= =?UTF-8?q?=E6=93=8D=E4=BD=9C=E7=BA=A7=E9=87=8D=E8=AF=95=EF=BC=8C=E8=A7=A3?= =?UTF-8?q?=E5=86=B3=20Google=20Drive=20=E7=AD=89=E8=BF=9C=E7=AB=AF?= =?UTF-8?q?=E4=B8=B4=E6=97=B6=E6=95=85=E9=9A=9C=E5=AF=BC=E8=87=B4=E8=87=AA?= =?UTF-8?q?=E5=8A=A8=E5=A4=87=E4=BB=BD=E8=BF=9E=E7=BB=AD=E5=A4=B1=E8=B4=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 问题:rclone 底层重试只覆盖单个 HTTP 请求,但 Google API 的 502/timeout 等临时故障会导致整个上传操作失败,自动触发的备份任务连续失败。 修复:在 provider.Upload 外层增加操作级重试(最多 3 次,指数退避 10s/40s/90s), 每次重试重新打开文件并重建 reader 链。重试过程通过日志流实时反馈。 --- .../service/backup_execution_service.go | 61 +++++++++++-------- 1 file changed, 37 insertions(+), 24 deletions(-) diff --git a/server/internal/service/backup_execution_service.go b/server/internal/service/backup_execution_service.go index 8c0e335..a514547 100644 --- a/server/internal/service/backup_execution_service.go +++ b/server/internal/service/backup_execution_service.go @@ -363,33 +363,46 @@ func (s *BackupExecutionService) executeTask(ctx context.Context, task *model.Ba logger.Warnf("存储目标 %s 创建客户端失败:%v", targetName, resolveErr) return } - artifact, openErr := os.Open(finalPath) - if openErr != nil { - uploadResults[index] = StorageUploadResultItem{StorageTargetID: targetID, StorageTargetName: targetName, Status: "failed", Error: openErr.Error()} - logger.Warnf("存储目标 %s 打开备份文件失败:%v", targetName, openErr) - return - } - defer artifact.Close() logger.Infof("开始上传备份到存储目标:%s", targetName) - // hashingReader: 上传过程中同步计算字节数 + SHA-256,单次读取零额外 I/O - hr := newHashingReader(artifact) - // progressReader: 包装 hashingReader,通过 LogHub 推送实时上传进度 - pr := newProgressReader(hr, fileSize, func(bytesRead int64, speedBps float64) { - percent := float64(0) - if fileSize > 0 { - percent = float64(bytesRead) / float64(fileSize) * 100 + // 上传级重试:最多 3 次,指数退避(10s, 30s, 90s) + maxAttempts := 3 + var lastUploadErr error + var hr *hashingReader + for attempt := 1; attempt <= maxAttempts; attempt++ { + if attempt > 1 { + backoff := time.Duration(attempt*attempt) * 10 * time.Second + logger.Warnf("存储目标 %s 第 %d 次重试(等待 %v):%v", targetName, attempt, backoff, lastUploadErr) + time.Sleep(backoff) + } + artifact, openErr := os.Open(finalPath) + if openErr != nil { + uploadResults[index] = StorageUploadResultItem{StorageTargetID: targetID, StorageTargetName: targetName, Status: "failed", Error: openErr.Error()} + logger.Warnf("存储目标 %s 打开备份文件失败:%v", targetName, openErr) + return } - s.logHub.AppendProgress(recordID, backup.ProgressInfo{ - BytesSent: bytesRead, - TotalBytes: fileSize, - Percent: percent, - SpeedBps: speedBps, - TargetName: targetName, + hr = newHashingReader(artifact) + pr := newProgressReader(hr, fileSize, func(bytesRead int64, speedBps float64) { + percent := float64(0) + if fileSize > 0 { + percent = float64(bytesRead) / float64(fileSize) * 100 + } + s.logHub.AppendProgress(recordID, backup.ProgressInfo{ + BytesSent: bytesRead, + TotalBytes: fileSize, + Percent: percent, + SpeedBps: speedBps, + TargetName: targetName, + }) }) - }) - if uploadErr := provider.Upload(ctx, storagePath, pr, fileSize, map[string]string{"taskId": fmt.Sprintf("%d", task.ID), "recordId": fmt.Sprintf("%d", recordID)}); uploadErr != nil { - uploadResults[index] = StorageUploadResultItem{StorageTargetID: targetID, StorageTargetName: targetName, Status: "failed", Error: uploadErr.Error()} - logger.Warnf("存储目标 %s 上传失败:%v", targetName, uploadErr) + lastUploadErr = provider.Upload(ctx, storagePath, pr, fileSize, map[string]string{"taskId": fmt.Sprintf("%d", task.ID), "recordId": fmt.Sprintf("%d", recordID)}) + artifact.Close() + if lastUploadErr == nil { + break + } + } + if lastUploadErr != nil { + uploadResults[index] = StorageUploadResultItem{StorageTargetID: targetID, StorageTargetName: targetName, Status: "failed", Error: lastUploadErr.Error()} + logger.Warnf("存储目标 %s 上传失败(已重试 %d 次):%v", targetName, maxAttempts, lastUploadErr) return } // 完整性校验:对比实际传输字节数