From 91b851b570c834552ef30589d0f738651f1d1011 Mon Sep 17 00:00:00 2001 From: Steve Van Hooser Date: Sat, 18 Apr 2026 10:13:24 -0400 Subject: [PATCH 1/6] Use curl instead of websave for bulk document download Gateway-level HTTP compression can corrupt binary/zip downloads when MATLAB's websave auto-decompresses content-encoded responses, causing "not a tar file" / invalid archive errors on Linux (Mac is unaffected). Route the download through ndi.cloud.api.files.getFile with useCurl=true, matching the pattern already used by didsqlite.do_openbinarydoc. --- .../+download/downloadDocumentCollection.m | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/ndi/+ndi/+cloud/+download/downloadDocumentCollection.m b/src/ndi/+ndi/+cloud/+download/downloadDocumentCollection.m index a5cbf4501..4d6456cbe 100644 --- a/src/ndi/+ndi/+cloud/+download/downloadDocumentCollection.m +++ b/src/ndi/+ndi/+cloud/+download/downloadDocumentCollection.m @@ -24,7 +24,7 @@ % directly to avoid an extra API call to fetch the list again. % % options.Timeout - (1,1) double, optional -% The timeout in seconds for the websave download operation. +% The timeout in seconds for the download operation. % Default is 20. % % options.ChunkSize - (1,1) double, optional @@ -107,12 +107,20 @@ isFinished = false; t1 = tic; + lastErr = ''; % The download URL may not be immediately ready. Retry until timeout. + % Use curl (not websave) so the response body is written as-is; HTTP + % content-encoding applied at the gateway otherwise corrupts the zip. while ~isFinished && toc(t1) < options.Timeout try - websave(tempZipFilepath, downloadUrl); + [success_d, answer_d] = ndi.cloud.api.files.getFile(downloadUrl, tempZipFilepath, 'useCurl', true); + if ~success_d + lastErr = char(string(answer_d)); + error('NDI:Cloud:DocumentDownloadFailed', 'curl download failed: %s', lastErr); + end isFinished = true; catch ME + lastErr = ME.message; pause(1) % Wait a second before retrying end end @@ -120,7 +128,7 @@ if ~isFinished error('NDI:Cloud:DocumentDownloadFailed', ... ['Download failed for chunk %d with message:\n %s\n. If this persists, ', ... - 'consider increasing the Timeout value.'], c, ME.message); + 'consider increasing the Timeout value.'], c, lastErr); end % Unzip and process documents from the current chunk @@ -156,4 +164,4 @@ function deleteIfExists(filePath) if isfile(filePath) delete(filePath) end -end \ No newline at end of file +end From 57aa9d7dadc0990a2f67bf75373acf966e43a30e Mon Sep 17 00:00:00 2001 From: Steve Van Hooser Date: Sat, 18 Apr 2026 10:13:50 -0400 Subject: [PATCH 2/6] Use curl instead of websave in downloadDatasetFiles Gateway HTTP compression corrupts binary downloads when websave auto-decompresses the response. Route through getFile with useCurl=true. --- src/ndi/+ndi/+cloud/+download/downloadDatasetFiles.m | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/ndi/+ndi/+cloud/+download/downloadDatasetFiles.m b/src/ndi/+ndi/+cloud/+download/downloadDatasetFiles.m index 0dfde4fc1..9f6b28ab0 100644 --- a/src/ndi/+ndi/+cloud/+download/downloadDatasetFiles.m +++ b/src/ndi/+ndi/+cloud/+download/downloadDatasetFiles.m @@ -76,9 +76,14 @@ function downloadDatasetFiles(cloudDatasetId, targetFolder, fileUuids, options) end downloadURL = answer.downloadUrl; - % Save the file + % Save the file using curl so gateway-level HTTP compression + % does not corrupt the saved bytes (websave auto-decompresses). try - websave(targetFilepath, downloadURL); + [success_d, answer_d] = ndi.cloud.api.files.getFile(downloadURL, targetFilepath, 'useCurl', true); + if ~success_d + error('NDI:Cloud:FileDownloadFailed', ... + 'curl download failed: %s', char(string(answer_d))); + end catch ME if options.AbortOnError rethrow(ME) From a8a4e5379332d1a6780f678ca10104d2abf3e9ba Mon Sep 17 00:00:00 2001 From: Steve Van Hooser Date: Sat, 18 Apr 2026 10:14:32 -0400 Subject: [PATCH 3/6] Use curl instead of websave in downloadGenericFiles Gateway HTTP compression corrupts binary downloads when websave auto-decompresses the response. Route through getFile with useCurl=true. --- src/ndi/+ndi/+cloud/+download/downloadGenericFiles.m | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/ndi/+ndi/+cloud/+download/downloadGenericFiles.m b/src/ndi/+ndi/+cloud/+download/downloadGenericFiles.m index 46e2fa374..21f494e5a 100644 --- a/src/ndi/+ndi/+cloud/+download/downloadGenericFiles.m +++ b/src/ndi/+ndi/+cloud/+download/downloadGenericFiles.m @@ -167,8 +167,14 @@ continue; end + % Download using curl so gateway-level HTTP compression does not + % corrupt the saved bytes (websave auto-decompresses responses). try - websave(targetPath, answer.downloadUrl); + [success_d, answer_d] = ndi.cloud.api.files.getFile(answer.downloadUrl, targetPath, 'useCurl', true); + if ~success_d + error('NDI:downloadGenericFiles:DownloadError', ... + 'curl download failed: %s', char(string(answer_d))); + end downloadedFiles(end+1) = filename; %#ok catch ME warning('NDI:downloadGenericFiles:DownloadError', ... From 2c9ddb1c1fbd19454981206f02a1fe5c23c2e4c5 Mon Sep 17 00:00:00 2001 From: Steve Van Hooser Date: Sat, 18 Apr 2026 10:25:20 -0400 Subject: [PATCH 4/6] Pass --compressed to curl so gateway gzip responses decode The API gateway serves binary artifacts (.zip for bulk document downloads, .nbf.tgz for epoch binaries) with Content-Encoding: gzip. websave was transparently decoding these on Mac, which masked the issue there; plain `curl -L -o` does not decode and writes the compressed bytes to disk, producing "invalid ZIP file" / "not a tar file" errors. Adding --compressed makes curl advertise Accept-Encoding and decompress the response, matching websave's prior behavior. --- .../+ndi/+cloud/+api/+implementation/+files/GetFile.m | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/ndi/+ndi/+cloud/+api/+implementation/+files/GetFile.m b/src/ndi/+ndi/+cloud/+api/+implementation/+files/GetFile.m index bb40730fc..c3e0eeff4 100644 --- a/src/ndi/+ndi/+cloud/+api/+implementation/+files/GetFile.m +++ b/src/ndi/+ndi/+cloud/+api/+implementation/+files/GetFile.m @@ -58,7 +58,12 @@ b = false; apiURL = this.downloadURL; % Return the URL as a string - command = sprintf('curl -L -o "%s" "%s"', this.downloadedFile, this.downloadURL); + % --compressed advertises Accept-Encoding: gzip,deflate and + % transparently decompresses the response. The gateway serves + % binary artifacts (.zip, .nbf.tgz) with Content-Encoding set, + % so without this flag curl writes the raw compressed body and + % downstream unzip/untar fails with "invalid" archive errors. + command = sprintf('curl --compressed -L -o "%s" "%s"', this.downloadedFile, this.downloadURL); [status, result] = system(command); @@ -69,4 +74,4 @@ apiResponse = struct('StatusCode', 'N/A (cURL)', 'StatusLine', "Exit Status: " + status); end end -end \ No newline at end of file +end From a752e9ebc22f97af9185b7a4365b9c546ccf06fa Mon Sep 17 00:00:00 2001 From: Steve Van Hooser Date: Sat, 18 Apr 2026 10:28:14 -0400 Subject: [PATCH 5/6] Request Accept-Encoding: identity; fail curl on HTTP errors Our cloud payloads are already compressed archives (.zip, .nbf.tgz). Layering HTTP-level gzip on top has produced corrupt archives on both Mac (stream unzip fails) and Linux (untar fails) even with --compressed, likely because streaming decoders misbehave on already-compressed content. Explicitly request identity encoding so the gateway delivers the file bytes as-is. Add -f so curl exits non-zero on HTTP errors instead of writing an error body to the destination file (which would then masquerade as a corrupt archive). --- .../+cloud/+api/+implementation/+files/GetFile.m | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/ndi/+ndi/+cloud/+api/+implementation/+files/GetFile.m b/src/ndi/+ndi/+cloud/+api/+implementation/+files/GetFile.m index c3e0eeff4..5c6a9b511 100644 --- a/src/ndi/+ndi/+cloud/+api/+implementation/+files/GetFile.m +++ b/src/ndi/+ndi/+cloud/+api/+implementation/+files/GetFile.m @@ -58,12 +58,15 @@ b = false; apiURL = this.downloadURL; % Return the URL as a string - % --compressed advertises Accept-Encoding: gzip,deflate and - % transparently decompresses the response. The gateway serves - % binary artifacts (.zip, .nbf.tgz) with Content-Encoding set, - % so without this flag curl writes the raw compressed body and - % downstream unzip/untar fails with "invalid" archive errors. - command = sprintf('curl --compressed -L -o "%s" "%s"', this.downloadedFile, this.downloadURL); + % Our payloads are already compressed archives (.zip, .nbf.tgz). + % Asking the gateway to apply HTTP compression on top of that + % buys nothing and has produced corrupt archives on both Mac + % and Linux (stream decoders fail on already-compressed bytes). + % Request identity encoding so the raw file is delivered as-is. + % Use -f so HTTP errors surface as non-zero exit codes instead + % of writing a server error body into the destination file. + command = sprintf('curl -fsSL -H "Accept-Encoding: identity" -o "%s" "%s"', ... + this.downloadedFile, this.downloadURL); [status, result] = system(command); From 9f0f1aa02c37f1f26eff689df2b113e0ac8fa953 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 18 Apr 2026 15:02:38 +0000 Subject: [PATCH 6/6] Update GitHub badges --- .github/badges/tests.svg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/badges/tests.svg b/.github/badges/tests.svg index c4fb836a1..78f1fc3ea 100644 --- a/.github/badges/tests.svg +++ b/.github/badges/tests.svg @@ -1 +1 @@ -teststests638/643 passed638/643 passed +teststests643 passed643 passed \ No newline at end of file