diff --git a/shortcuts/common/runner.go b/shortcuts/common/runner.go index 7623c8d4..58189772 100644 --- a/shortcuts/common/runner.go +++ b/shortcuts/common/runner.go @@ -283,7 +283,13 @@ func (ctx *RuntimeContext) DoAPIStream(callCtx context.Context, req *larkcore.Ap option.Header = make(http.Header) } if shortcutHeaders := cmdutil.ShortcutHeaderOpts(ctx.ctx); shortcutHeaders != nil { - shortcutHeaders(&option) + var shortcutOption larkcore.RequestOption + shortcutHeaders(&shortcutOption) + for key, values := range shortcutOption.Header { + for _, value := range values { + option.Header.Add(key, value) + } + } } accessToken, err := ctx.AccessToken() diff --git a/shortcuts/im/helpers_network_test.go b/shortcuts/im/helpers_network_test.go index b09b45e0..cdfb139d 100644 --- a/shortcuts/im/helpers_network_test.go +++ b/shortcuts/im/helpers_network_test.go @@ -6,6 +6,7 @@ package im import ( "bytes" "context" + "crypto/md5" "encoding/json" "fmt" "io" @@ -13,6 +14,7 @@ import ( "os" "path/filepath" "reflect" + "strconv" "strings" "testing" "unsafe" @@ -289,6 +291,9 @@ func TestDownloadIMResourceToPathSuccess(t *testing.T) { if gotHeaders.Get(cmdutil.HeaderExecutionId) != "exec-123" { t.Fatalf("%s = %q, want %q", cmdutil.HeaderExecutionId, gotHeaders.Get(cmdutil.HeaderExecutionId), "exec-123") } + if gotHeaders.Get("Range") != fmt.Sprintf("bytes=0-%d", probeChunkSize-1) { + t.Fatalf("Range header = %q, want %q", gotHeaders.Get("Range"), fmt.Sprintf("bytes=0-%d", probeChunkSize-1)) + } } func TestDownloadIMResourceToPathHTTPErrorBody(t *testing.T) { @@ -313,6 +318,252 @@ func TestDownloadIMResourceToPathHTTPErrorBody(t *testing.T) { } } +func TestDownloadIMResourceToPathRetriesNetworkError(t *testing.T) { + attempts := 0 + payload := []byte("retry success") + runtime := newBotShortcutRuntime(t, shortcutRoundTripFunc(func(req *http.Request) (*http.Response, error) { + switch { + case strings.Contains(req.URL.Path, "tenant_access_token"): + return shortcutJSONResponse(200, map[string]interface{}{ + "code": 0, + "tenant_access_token": "tenant-token", + "expire": 7200, + }), nil + case strings.Contains(req.URL.Path, "/open-apis/im/v1/messages/om_retry/resources/file_retry"): + attempts++ + if attempts < 3 { + return nil, fmt.Errorf("temporary network failure") + } + return shortcutRawResponse(200, payload, http.Header{"Content-Type": []string{"application/octet-stream"}}), nil + default: + return nil, fmt.Errorf("unexpected request: %s", req.URL.String()) + } + })) + + target := filepath.Join(t.TempDir(), "out.bin") + _, size, err := downloadIMResourceToPath(context.Background(), runtime, "om_retry", "file_retry", "file", target) + if err != nil { + t.Fatalf("downloadIMResourceToPath() error = %v", err) + } + if attempts != 3 { + t.Fatalf("download attempts = %d, want 3", attempts) + } + if size != int64(len(payload)) { + t.Fatalf("downloadIMResourceToPath() size = %d, want %d", size, len(payload)) + } +} + +func TestDownloadIMResourceToPathRetrySecondAttemptSuccess(t *testing.T) { + attempts := 0 + payload := []byte("second retry success") + runtime := newBotShortcutRuntime(t, shortcutRoundTripFunc(func(req *http.Request) (*http.Response, error) { + switch { + case strings.Contains(req.URL.Path, "tenant_access_token"): + return shortcutJSONResponse(200, map[string]interface{}{ + "code": 0, + "tenant_access_token": "tenant-token", + "expire": 7200, + }), nil + case strings.Contains(req.URL.Path, "/open-apis/im/v1/messages/om_retry2/resources/file_retry2"): + attempts++ + if attempts < 2 { + return nil, fmt.Errorf("temporary network failure") + } + return shortcutRawResponse(200, payload, http.Header{"Content-Type": []string{"application/octet-stream"}}), nil + default: + return nil, fmt.Errorf("unexpected request: %s", req.URL.String()) + } + })) + + target := filepath.Join(t.TempDir(), "out.bin") + _, size, err := downloadIMResourceToPath(context.Background(), runtime, "om_retry2", "file_retry2", "file", target) + if err != nil { + t.Fatalf("downloadIMResourceToPath() error = %v", err) + } + if attempts != 2 { + t.Fatalf("download attempts = %d, want 2", attempts) + } + if size != int64(len(payload)) { + t.Fatalf("downloadIMResourceToPath() size = %d, want %d", size, len(payload)) + } +} + +func TestDownloadIMResourceToPathRetryContextCanceled(t *testing.T) { + attempts := 0 + runtime := newBotShortcutRuntime(t, shortcutRoundTripFunc(func(req *http.Request) (*http.Response, error) { + switch { + case strings.Contains(req.URL.Path, "tenant_access_token"): + return shortcutJSONResponse(200, map[string]interface{}{ + "code": 0, + "tenant_access_token": "tenant-token", + "expire": 7200, + }), nil + case strings.Contains(req.URL.Path, "/open-apis/im/v1/messages/om_cancel/resources/file_cancel"): + attempts++ + return nil, fmt.Errorf("temporary network failure") + default: + return nil, fmt.Errorf("unexpected request: %s", req.URL.String()) + } + })) + + ctx, cancel := context.WithCancel(context.Background()) + // Cancel context immediately to trigger context error on first retry + cancel() + + target := filepath.Join(t.TempDir(), "out.bin") + _, _, err := downloadIMResourceToPath(ctx, runtime, "om_cancel", "file_cancel", "file", target) + if err != context.Canceled { + t.Fatalf("downloadIMResourceToPath() error = %v, want context.Canceled", err) + } + // First attempt is made, then retry checks ctx.Err() and returns + if attempts != 1 { + t.Fatalf("download attempts = %d, want 1", attempts) + } +} + +func TestDownloadIMResourceToPathRangeDownload(t *testing.T) { + cases := []struct { + name string + payloadLen int64 + wantRanges []string + }{ + { + name: "single small chunk", + payloadLen: 16, + wantRanges: []string{"bytes=0-131071"}, + }, + { + name: "exact probe chunk", + payloadLen: probeChunkSize, + wantRanges: []string{"bytes=0-131071"}, + }, + { + name: "multiple chunks with tail", + payloadLen: probeChunkSize + normalChunkSize + 1234, + wantRanges: []string{ + "bytes=0-131071", + fmt.Sprintf("bytes=%d-%d", probeChunkSize, probeChunkSize+normalChunkSize-1), + fmt.Sprintf("bytes=%d-%d", probeChunkSize+normalChunkSize, probeChunkSize+normalChunkSize+1233), + }, + }, + { + name: "multiple chunks exact 8mb tail", + payloadLen: probeChunkSize + 2*normalChunkSize, + wantRanges: []string{ + "bytes=0-131071", + fmt.Sprintf("bytes=%d-%d", probeChunkSize, probeChunkSize+normalChunkSize-1), + fmt.Sprintf("bytes=%d-%d", probeChunkSize+normalChunkSize, probeChunkSize+2*normalChunkSize-1), + }, + }, + } + + for _, tt := range cases { + t.Run(tt.name, func(t *testing.T) { + payload := bytes.Repeat([]byte("range-download-"), int(tt.payloadLen/15)+1) + payload = payload[:tt.payloadLen] + + var gotRanges []string + runtime := newBotShortcutRuntime(t, shortcutRoundTripFunc(func(req *http.Request) (*http.Response, error) { + switch { + case strings.Contains(req.URL.Path, "tenant_access_token"): + return shortcutJSONResponse(200, map[string]interface{}{ + "code": 0, + "tenant_access_token": "tenant-token", + "expire": 7200, + }), nil + case strings.Contains(req.URL.Path, "/open-apis/im/v1/messages/om_range/resources/file_range"): + rangeHeader := req.Header.Get("Range") + gotRanges = append(gotRanges, rangeHeader) + if req.Header.Get("Authorization") != "Bearer tenant-token" { + return nil, fmt.Errorf("missing authorization header") + } + start, end, err := parseRangeHeader(rangeHeader, int64(len(payload))) + if err != nil { + return nil, err + } + return shortcutRawResponse(http.StatusPartialContent, payload[start:end+1], http.Header{ + "Content-Type": []string{"application/octet-stream"}, + "Content-Range": []string{fmt.Sprintf("bytes %d-%d/%d", start, end, len(payload))}, + }), nil + default: + return nil, fmt.Errorf("unexpected request: %s", req.URL.String()) + } + })) + + target := filepath.Join(t.TempDir(), "nested", "resource.bin") + _, size, err := downloadIMResourceToPath(context.Background(), runtime, "om_range", "file_range", "file", target) + if err != nil { + t.Fatalf("downloadIMResourceToPath() error = %v", err) + } + if size != int64(len(payload)) { + t.Fatalf("downloadIMResourceToPath() size = %d, want %d", size, len(payload)) + } + if !reflect.DeepEqual(gotRanges, tt.wantRanges) { + t.Fatalf("Range requests = %#v, want %#v", gotRanges, tt.wantRanges) + } + + got, err := os.ReadFile(target) + if err != nil { + t.Fatalf("ReadFile() error = %v", err) + } + if md5.Sum(got) != md5.Sum(payload) { + t.Fatalf("downloaded payload MD5 = %x, want %x", md5.Sum(got), md5.Sum(payload)) + } + }) + } +} + +func TestDownloadIMResourceToPathInvalidContentRange(t *testing.T) { + runtime := newBotShortcutRuntime(t, shortcutRoundTripFunc(func(req *http.Request) (*http.Response, error) { + switch { + case strings.Contains(req.URL.Path, "tenant_access_token"): + return shortcutJSONResponse(200, map[string]interface{}{ + "code": 0, + "tenant_access_token": "tenant-token", + "expire": 7200, + }), nil + case strings.Contains(req.URL.Path, "/open-apis/im/v1/messages/om_bad/resources/file_bad"): + return shortcutRawResponse(http.StatusPartialContent, []byte("bad"), http.Header{ + "Content-Type": []string{"application/octet-stream"}, + "Content-Range": []string{"bytes 0-2/not-a-number"}, + }), nil + default: + return nil, fmt.Errorf("unexpected request: %s", req.URL.String()) + } + })) + + _, _, err := downloadIMResourceToPath(context.Background(), runtime, "om_bad", "file_bad", "file", filepath.Join(t.TempDir(), "out.bin")) + if err == nil || !strings.Contains(err.Error(), "invalid Content-Range header") { + t.Fatalf("downloadIMResourceToPath() error = %v", err) + } +} + +func parseRangeHeader(header string, totalSize int64) (int64, int64, error) { + if !strings.HasPrefix(header, "bytes=") { + return 0, 0, fmt.Errorf("unexpected range header: %q", header) + } + parts := strings.SplitN(strings.TrimPrefix(header, "bytes="), "-", 2) + if len(parts) != 2 { + return 0, 0, fmt.Errorf("unexpected range header: %q", header) + } + + start, err := strconv.ParseInt(parts[0], 10, 64) + if err != nil { + return 0, 0, fmt.Errorf("parse start: %w", err) + } + end, err := strconv.ParseInt(parts[1], 10, 64) + if err != nil { + return 0, 0, fmt.Errorf("parse end: %w", err) + } + if start < 0 || end < start || start >= totalSize { + return 0, 0, fmt.Errorf("invalid range bounds: %d-%d for size %d", start, end, totalSize) + } + if end >= totalSize { + end = totalSize - 1 + } + return start, end, nil +} + func TestUploadImageToIMSuccess(t *testing.T) { var gotBody string runtime := newBotShortcutRuntime(t, shortcutRoundTripFunc(func(req *http.Request) (*http.Response, error) { diff --git a/shortcuts/im/helpers_test.go b/shortcuts/im/helpers_test.go index bf9bf870..957e6e19 100644 --- a/shortcuts/im/helpers_test.go +++ b/shortcuts/im/helpers_test.go @@ -489,6 +489,43 @@ func TestDownloadIMResourceToPathHTTPClientError(t *testing.T) { } } +func TestParseTotalSize(t *testing.T) { + tests := []struct { + name string + contentRange string + want int64 + wantErr string + }{ + {name: "normal", contentRange: "bytes 0-131071/104857600", want: 104857600}, + {name: "single probe chunk", contentRange: "bytes 0-131071/131072", want: 131072}, + {name: "single small chunk", contentRange: "bytes 0-15/16", want: 16}, + {name: "empty", contentRange: "", wantErr: "content-range is empty"}, + {name: "invalid prefix", contentRange: "items 0-15/16", wantErr: `unsupported content-range: "items 0-15/16"`}, + {name: "missing total", contentRange: "bytes 0-15/", wantErr: `unsupported content-range: "bytes 0-15/"`}, + {name: "wildcard", contentRange: "bytes */16", wantErr: `unsupported content-range: "bytes */16"`}, + {name: "unknown total size", contentRange: "bytes 0-99/*", wantErr: `unknown total size in content-range: "bytes 0-99/*"`}, + {name: "invalid total", contentRange: "bytes 0-15/not-a-number", wantErr: "parse total size:"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := parseTotalSize(tt.contentRange) + if tt.wantErr != "" { + if err == nil || !strings.Contains(err.Error(), tt.wantErr) { + t.Fatalf("parseTotalSize() error = %v, want substring %q", err, tt.wantErr) + } + return + } + if err != nil { + t.Fatalf("parseTotalSize() unexpected error = %v", err) + } + if got != tt.want { + t.Fatalf("parseTotalSize() = %d, want %d", got, tt.want) + } + }) + } +} + func TestShortcuts(t *testing.T) { var commands []string for _, shortcut := range Shortcuts() { diff --git a/shortcuts/im/im_messages_resources_download.go b/shortcuts/im/im_messages_resources_download.go index beeaacd8..fdfad2a4 100644 --- a/shortcuts/im/im_messages_resources_download.go +++ b/shortcuts/im/im_messages_resources_download.go @@ -10,6 +10,7 @@ import ( "net/http" "os" "path/filepath" + "strconv" "strings" "time" @@ -106,43 +107,159 @@ func normalizeDownloadOutputPath(fileKey, outputPath string) (string, error) { return outputPath, nil } -const defaultIMResourceDownloadTimeout = 120 * time.Second +const ( + defaultIMResourceDownloadTimeout = 120 * time.Second + probeChunkSize = int64(128 * 1024) + normalChunkSize = int64(8 * 1024 * 1024) + downloadWorkers = 1 + imDownloadRequestRetries = 2 + imDownloadRetryDelay = 300 * time.Millisecond +) var imMimeToExt = map[string]string{ - "image/png": ".png", - "image/jpeg": ".jpg", - "image/gif": ".gif", - "image/webp": ".webp", - "image/svg+xml": ".svg", - "application/pdf": ".pdf", - "video/mp4": ".mp4", - "video/3gpp": ".3gp", - "video/x-msvideo": ".avi", - "audio/mpeg": ".mp3", - "audio/ogg": ".ogg", - "audio/wav": ".wav", - "text/plain": ".txt", - "text/html": ".html", - "text/css": ".css", - "text/csv": ".csv", - "application/zip": ".zip", + "image/png": ".png", + "image/jpeg": ".jpg", + "image/gif": ".gif", + "image/webp": ".webp", + "image/svg+xml": ".svg", + "application/pdf": ".pdf", + "video/mp4": ".mp4", + "video/3gpp": ".3gp", + "video/x-msvideo": ".avi", + "audio/mpeg": ".mp3", + "audio/ogg": ".ogg", + "audio/wav": ".wav", + "text/plain": ".txt", + "text/html": ".html", + "text/css": ".css", + "text/csv": ".csv", + "application/zip": ".zip", "application/x-zip-compressed": ".zip", "application/x-rar-compressed": ".rar", - "application/json": ".json", - "application/xml": ".xml", - "application/octet-stream": ".bin", - "application/msword": ".doc", + "application/json": ".json", + "application/xml": ".xml", + "application/octet-stream": ".bin", + "application/msword": ".doc", "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx", - "application/vnd.ms-excel": ".xls", - "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx", - "application/vnd.ms-powerpoint": ".ppt", + "application/vnd.ms-excel": ".xls", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx", + "application/vnd.ms-powerpoint": ".ppt", "application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx", } +type chunkTask struct { + start int64 + end int64 +} + func downloadIMResourceToPath(ctx context.Context, runtime *common.RuntimeContext, messageID, fileKey, fileType, safePath string) (string, int64, error) { + if err := os.MkdirAll(filepath.Dir(safePath), 0700); err != nil { + return "", 0, output.Errorf(output.ExitInternal, "api_error", "cannot create parent directory: %s", err) + } + + downloadResp, err := doIMResourceDownloadRequest(ctx, runtime, messageID, fileKey, fileType, map[string]string{ + "Range": fmt.Sprintf("bytes=0-%d", probeChunkSize-1), + }) + if err != nil { + return "", 0, err + } + defer downloadResp.Body.Close() + + if downloadResp.StatusCode >= 400 { + return "", 0, downloadResponseError(downloadResp) + } + + finalPath := resolveIMResourceDownloadPath(safePath, downloadResp.Header.Get("Content-Type")) + tmpFile, tmpPath, err := createTempDownloadFile(finalPath, 0600) + if err != nil { + return "", 0, err + } + + success := false + defer func() { + if success { + return + } + _ = tmpFile.Close() + _ = os.Remove(tmpPath) + }() + + var sizeBytes int64 + switch downloadResp.StatusCode { + case http.StatusPartialContent: + totalSize, err := parseTotalSize(downloadResp.Header.Get("Content-Range")) + if err != nil { + return "", 0, output.ErrNetwork("invalid Content-Range header: %s", err) + } + if _, err := writeChunkAt(tmpFile, downloadResp.Body, 0); err != nil { + return "", 0, err + } + + tasks := buildChunkTasks(totalSize) + sem := make(chan struct{}, downloadWorkers) + for _, task := range tasks { + sem <- struct{}{} + err := downloadAndWriteChunk(ctx, runtime, messageID, fileKey, fileType, tmpFile, task.start, task.end) + <-sem + if err != nil { + return "", 0, err + } + } + + stat, err := tmpFile.Stat() + if err != nil { + return "", 0, output.Errorf(output.ExitInternal, "api_error", "cannot stat file: %s", err) + } + if stat.Size() != totalSize { + return "", 0, output.ErrNetwork("file size mismatch: expected %d, got %d", totalSize, stat.Size()) + } + sizeBytes = totalSize + + case http.StatusOK: + sizeBytes, err = io.Copy(tmpFile, downloadResp.Body) + if err != nil { + return "", 0, output.Errorf(output.ExitInternal, "api_error", "cannot write file: %s", err) + } + + default: + return "", 0, output.ErrNetwork("unexpected status code: %d", downloadResp.StatusCode) + } + + if err := tmpFile.Sync(); err != nil { + return "", 0, output.Errorf(output.ExitInternal, "api_error", "cannot sync file: %s", err) + } + if err := tmpFile.Close(); err != nil { + return "", 0, output.Errorf(output.ExitInternal, "api_error", "cannot close file: %s", err) + } + if err := os.Rename(tmpPath, finalPath); err != nil { + return "", 0, output.Errorf(output.ExitInternal, "api_error", "cannot finalize file: %s", err) + } + success = true + return finalPath, sizeBytes, nil +} + +func resolveIMResourceDownloadPath(safePath, contentType string) string { + if filepath.Ext(safePath) != "" { + return safePath + } + mimeType := strings.Split(contentType, ";")[0] + mimeType = strings.TrimSpace(mimeType) + if ext, ok := imMimeToExt[mimeType]; ok { + return safePath + ext + } + return safePath +} + +func doIMResourceDownloadRequest(ctx context.Context, runtime *common.RuntimeContext, messageID, fileKey, fileType string, headers map[string]string) (*http.Response, error) { query := larkcore.QueryParams{} query.Set("type", fileType) - downloadResp, err := runtime.DoAPIStream(ctx, &larkcore.ApiReq{ + + headerValues := make(http.Header, len(headers)) + for key, value := range headers { + headerValues.Set(key, value) + } + + req := &larkcore.ApiReq{ HttpMethod: http.MethodGet, ApiPath: "/open-apis/im/v1/messages/:message_id/resources/:file_key", PathParams: larkcore.PathParams{ @@ -150,38 +267,128 @@ func downloadIMResourceToPath(ctx context.Context, runtime *common.RuntimeContex "file_key": fileKey, }, QueryParams: query, - }, defaultIMResourceDownloadTimeout) - if err != nil { - return "", 0, err } - defer downloadResp.Body.Close() - if downloadResp.StatusCode >= 400 { - body, _ := io.ReadAll(io.LimitReader(downloadResp.Body, 4096)) - if len(body) > 0 { - return "", 0, output.ErrNetwork("download failed: HTTP %d: %s", downloadResp.StatusCode, strings.TrimSpace(string(body))) + for attempt := 0; attempt <= imDownloadRequestRetries; attempt++ { + resp, err := runtime.DoAPIStream(ctx, req, defaultIMResourceDownloadTimeout, larkcore.WithHeaders(headerValues)) + if err == nil { + return resp, nil + } + if ctx.Err() != nil { + return nil, ctx.Err() } - return "", 0, output.ErrNetwork("download failed: HTTP %d", downloadResp.StatusCode) + if attempt == imDownloadRequestRetries { + return nil, err + } + sleepIMDownloadRetry(ctx, attempt) } - if err := os.MkdirAll(filepath.Dir(safePath), 0700); err != nil { - return "", 0, output.Errorf(output.ExitInternal, "api_error", "cannot create parent directory: %s", err) + return nil, output.ErrNetwork("download request failed after retries") +} + +func sleepIMDownloadRetry(ctx context.Context, attempt int) { + delay := imDownloadRetryDelay * (1 << uint(attempt)) + timer := time.NewTimer(delay) + defer timer.Stop() + select { + case <-ctx.Done(): + case <-timer.C: } +} - // Auto-detect extension from Content-Type if missing - finalPath := safePath - if filepath.Ext(safePath) == "" { - contentType := downloadResp.Header.Get("Content-Type") - mimeType := strings.Split(contentType, ";")[0] - mimeType = strings.TrimSpace(mimeType) - if ext, ok := imMimeToExt[mimeType]; ok { - finalPath = safePath + ext - } +func downloadResponseError(resp *http.Response) error { + body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096)) + if len(body) > 0 { + return output.ErrNetwork("download failed: HTTP %d: %s", resp.StatusCode, strings.TrimSpace(string(body))) } + return output.ErrNetwork("download failed: HTTP %d", resp.StatusCode) +} - sizeBytes, err := validate.AtomicWriteFromReader(finalPath, downloadResp.Body, 0600) +func createTempDownloadFile(finalPath string, perm os.FileMode) (*os.File, string, error) { + tmpFile, err := os.CreateTemp(filepath.Dir(finalPath), "."+filepath.Base(finalPath)+".*.tmp") if err != nil { - return "", 0, output.Errorf(output.ExitInternal, "api_error", "cannot create file: %s", err) + return nil, "", output.Errorf(output.ExitInternal, "api_error", "cannot create file: %s", err) } - return finalPath, sizeBytes, nil + if err := tmpFile.Chmod(perm); err != nil { + _ = tmpFile.Close() + _ = os.Remove(tmpFile.Name()) + return nil, "", output.Errorf(output.ExitInternal, "api_error", "cannot set file mode: %s", err) + } + return tmpFile, tmpFile.Name(), nil +} + +func buildChunkTasks(totalSize int64) []chunkTask { + var tasks []chunkTask + for offset := probeChunkSize; offset < totalSize; offset += normalChunkSize { + tasks = append(tasks, chunkTask{ + start: offset, + end: min(offset+normalChunkSize-1, totalSize-1), + }) + } + return tasks +} + +func parseTotalSize(contentRange string) (int64, error) { + contentRange = strings.TrimSpace(contentRange) + if contentRange == "" { + return 0, fmt.Errorf("content-range is empty") + } + if !strings.HasPrefix(contentRange, "bytes ") { + return 0, fmt.Errorf("unsupported content-range: %q", contentRange) + } + + parts := strings.SplitN(strings.TrimPrefix(contentRange, "bytes "), "/", 2) + if len(parts) != 2 || parts[1] == "" { + return 0, fmt.Errorf("unsupported content-range: %q", contentRange) + } + if parts[0] == "*" { + return 0, fmt.Errorf("unsupported content-range: %q", contentRange) + } + if parts[1] == "*" { + return 0, fmt.Errorf("unknown total size in content-range: %q", contentRange) + } + + totalSize, err := strconv.ParseInt(parts[1], 10, 64) + if err != nil { + return 0, fmt.Errorf("parse total size: %w", err) + } + if totalSize < 0 { + return 0, fmt.Errorf("invalid total size: %d", totalSize) + } + return totalSize, nil +} + +func writeChunkAt(file *os.File, body io.Reader, offset int64) (int64, error) { + n, err := io.Copy(io.NewOffsetWriter(file, offset), body) + if err != nil { + return n, output.Errorf(output.ExitInternal, "api_error", "cannot write file: %s", err) + } + return n, nil +} + +func downloadAndWriteChunk(ctx context.Context, runtime *common.RuntimeContext, messageID, fileKey, fileType string, file *os.File, start, end int64) error { + downloadResp, err := doIMResourceDownloadRequest(ctx, runtime, messageID, fileKey, fileType, map[string]string{ + "Range": fmt.Sprintf("bytes=%d-%d", start, end), + }) + if err != nil { + return err + } + defer downloadResp.Body.Close() + + if downloadResp.StatusCode >= 400 { + return downloadResponseError(downloadResp) + } + if downloadResp.StatusCode != http.StatusPartialContent { + return output.ErrNetwork("unexpected status code: %d", downloadResp.StatusCode) + } + + written, err := writeChunkAt(file, downloadResp.Body, start) + if err != nil { + return err + } + expected := end - start + 1 + if written != expected { + return output.ErrNetwork("chunk size mismatch: expected %d, got %d", expected, written) + } + return nil } diff --git a/skills/lark-im/SKILL.md b/skills/lark-im/SKILL.md index d5b182ef..ea78206d 100644 --- a/skills/lark-im/SKILL.md +++ b/skills/lark-im/SKILL.md @@ -1,7 +1,7 @@ --- name: lark-im version: 1.0.0 -description: "飞书即时通讯:收发消息和管理群聊。发送和回复消息、搜索聊天记录、管理群聊成员、上传下载图片和文件、管理表情回复。当用户需要发消息、查看或搜索聊天记录、下载聊天中的文件、查看群成员时使用。" +description: "飞书即时通讯:收发消息和管理群聊。发送和回复消息、搜索聊天记录、管理群聊成员、上传下载图片和文件(支持大文件分片下载)、管理表情回复。当用户需要发消息、查看或搜索聊天记录、下载聊天中的文件、查看群成员时使用。" metadata: requires: bins: ["lark-cli"] @@ -62,7 +62,7 @@ Shortcut 是对常用操作的高级封装(`lark-cli im + [flags]`)。 | [`+chat-update`](references/lark-im-chat-update.md) | Update group chat name or description; user/bot; updates a chat's name or description | | [`+messages-mget`](references/lark-im-messages-mget.md) | Batch get messages by IDs; user/bot; fetches up to 50 om_ message IDs, formats sender names, expands thread replies | | [`+messages-reply`](references/lark-im-messages-reply.md) | Reply to a message (supports thread replies); user/bot; supports text/markdown/post/media replies, reply-in-thread, idempotency key | -| [`+messages-resources-download`](references/lark-im-messages-resources-download.md) | Download images/files from a message; user/bot; downloads image/file resources by message-id and file-key to a safe relative output path | +| [`+messages-resources-download`](references/lark-im-messages-resources-download.md) | Download images/files from a message; user/bot; supports automatic chunked download for large files (8MB chunks), auto-detects file extension from Content-Type | | [`+messages-search`](references/lark-im-messages-search.md) | Search messages across chats (supports keyword, sender, time range filters) with user identity; user-only; filters by chat/sender/attachment/time, supports auto-pagination via `--page-all` / `--page-limit`, enriches results via batched mget and chats batch_query | | [`+messages-send`](references/lark-im-messages-send.md) | Send a message to a chat or direct message; user/bot; sends to chat-id or user-id with text/markdown/post/media, supports idempotency key | | [`+threads-messages-list`](references/lark-im-threads-messages-list.md) | List messages in a thread; user/bot; accepts om_/omt_ input, resolves message IDs to thread_id, supports sort/pagination | diff --git a/skills/lark-im/references/lark-im-messages-resources-download.md b/skills/lark-im/references/lark-im-messages-resources-download.md index bf512879..2f04786f 100644 --- a/skills/lark-im/references/lark-im-messages-resources-download.md +++ b/skills/lark-im/references/lark-im-messages-resources-download.md @@ -2,7 +2,7 @@ > **Prerequisite:** Read [`../lark-shared/SKILL.md`](../../lark-shared/SKILL.md) first to understand authentication, global parameters, and safety rules. -Download image or file resources from a message. Resources are identified by the combination of `message_id` + `file_key`, both of which come directly from message content returned by `im +chat-messages-list`. +Download image or file resources from a message. Supports **automatic chunked download for large files** using HTTP Range requests. Resources are identified by the combination of `message_id` + `file_key`, both of which come directly from message content returned by `im +chat-messages-list`. > **Note:** read-only message commands render resource keys in message content, but they do not download binaries automatically. Use this command whenever you need to fetch the actual image/file bytes or save them to a specific path. @@ -34,10 +34,26 @@ lark-cli im +messages-resources-download --message-id om_xxx --file-key img_v3_x | `--message-id ` | Yes | Message ID (`om_xxx` format) | | `--file-key ` | Yes | Resource key (`img_xxx` or `file_xxx`) | | `--type ` | Yes | Resource type: `image` or `file` | -| `--output ` | No | Output path (relative paths only; `..` traversal is not allowed; defaults to `file_key` as the file name) | +| `--output ` | No | Output path (relative paths only; `..` traversal is not allowed; defaults to `file_key` as the file name). File extension is automatically added based on Content-Type if not provided | | `--as ` | No | Identity type: `user` (default) or `bot` | | `--dry-run` | No | Print the request only, do not execute it | +## Large File Download (Auto Chunking) + +When downloading large files, the command automatically uses **HTTP Range requests** for reliable chunked downloading: + +| Behavior | Details | +|----------|---------| +| Probe chunk | First 128 KB to detect file size and Content-Type | +| Chunk size | 8 MB per subsequent request | +| Workers | Single-threaded sequential download (ensures reliability) | +| Retries | Up to 2 retries for transient request failures, with exponential backoff | + +**Benefits:** +- Reduces the impact of transient request failures during large downloads +- Automatically detects and appends correct file extension from Content-Type +- Validates file size integrity after download completion + ## `file_key` Sources Different resource markers in message content correspond to different `file_key` and `type` values: @@ -70,6 +86,8 @@ lark-cli im +messages-resources-download --message-id om_xxx --file-key img_v3_x | Hit error code 234002 or 14005 | No permission, **not** missing API scope | no access to this chat or file was deleted — do not retry, return the error to the user | | Permission denied | `im:message:readonly` is not authorized | Run `auth login --scope "im:message:readonly"` | | File too large | Over the 100 MB limit | This is a Feishu API limitation and cannot be bypassed with this endpoint | +| File size mismatch | Chunked download integrity check failed | Network instability during download; retry the command | +| Content-Range error | Server returned invalid range header | Transient API issue; retry the command | ## References