From af496acf809c020b810e7e884e8bdb875beefc24 Mon Sep 17 00:00:00 2001 From: pulimsr Date: Fri, 28 Nov 2025 09:38:43 -0800 Subject: [PATCH 1/2] Add full object checksum validation for Transfer Manager downloads --- .../include/aws/transfer/TransferManager.h | 7 ++ .../source/transfer/TransferManager.cpp | 97 +++++++++++++++++++ 2 files changed, 104 insertions(+) diff --git a/src/aws-cpp-sdk-transfer/include/aws/transfer/TransferManager.h b/src/aws-cpp-sdk-transfer/include/aws/transfer/TransferManager.h index a4b5580fd6e..f8506efe0ee 100644 --- a/src/aws-cpp-sdk-transfer/include/aws/transfer/TransferManager.h +++ b/src/aws-cpp-sdk-transfer/include/aws/transfer/TransferManager.h @@ -338,6 +338,13 @@ namespace Aws */ void SetChecksumForAlgorithm(const std::shared_ptr& state, Aws::S3::Model::CompletedPart& part); + /** + * Validates downloaded file checksum against expected checksum from HeadObject. + * @param handle The transfer handle containing checksum and file path. + * @return True if validation passes or no checksum to validate, false if validation fails. + */ + bool ValidateDownloadChecksum(const std::shared_ptr& handle); + static Aws::String DetermineFilePath(const Aws::String& directory, const Aws::String& prefix, const Aws::String& keyName); Aws::Utils::ExclusiveOwnershipResourceManager m_bufferManager; diff --git a/src/aws-cpp-sdk-transfer/source/transfer/TransferManager.cpp b/src/aws-cpp-sdk-transfer/source/transfer/TransferManager.cpp index 996e427e114..f0191f180ed 100644 --- a/src/aws-cpp-sdk-transfer/source/transfer/TransferManager.cpp +++ b/src/aws-cpp-sdk-transfer/source/transfer/TransferManager.cpp @@ -938,6 +938,21 @@ namespace Aws handle->SetContentType(getObjectOutcome.GetResult().GetContentType()); handle->ChangePartToCompleted(partState, getObjectOutcome.GetResult().GetETag()); getObjectOutcome.GetResult().GetBody().flush(); + + if (!ValidateDownloadChecksum(handle)) { + Aws::Client::AWSError checksumError( + Aws::S3::S3Errors::INTERNAL_FAILURE, + "ChecksumMismatch", + "Downloaded file checksum does not match expected checksum", + false); + handle->ChangePartToFailed(partState); + handle->UpdateStatus(TransferStatus::FAILED); + handle->SetError(checksumError); + TriggerErrorCallback(handle, checksumError); + TriggerTransferStatusUpdatedCallback(handle); + return; + } + handle->UpdateStatus(TransferStatus::COMPLETED); } else @@ -1000,6 +1015,21 @@ namespace Aws handle->SetContentType(headObjectOutcome.GetResult().GetContentType()); handle->SetMetadata(headObjectOutcome.GetResult().GetMetadata()); handle->SetEtag(headObjectOutcome.GetResult().GetETag()); + + if (headObjectOutcome.GetResult().GetChecksumType() == S3::Model::ChecksumType::FULL_OBJECT) { + if (!headObjectOutcome.GetResult().GetChecksumCRC32().empty()) { + handle->SetChecksum(headObjectOutcome.GetResult().GetChecksumCRC32()); + } else if (!headObjectOutcome.GetResult().GetChecksumCRC32C().empty()) { + handle->SetChecksum(headObjectOutcome.GetResult().GetChecksumCRC32C()); + } else if (!headObjectOutcome.GetResult().GetChecksumSHA256().empty()) { + handle->SetChecksum(headObjectOutcome.GetResult().GetChecksumSHA256()); + } else if (!headObjectOutcome.GetResult().GetChecksumSHA1().empty()) { + handle->SetChecksum(headObjectOutcome.GetResult().GetChecksumSHA1()); + } else if (!headObjectOutcome.GetResult().GetChecksumCRC64NVME().empty()) { + handle->SetChecksum(headObjectOutcome.GetResult().GetChecksumCRC64NVME()); + } + } + /* When bucket versioning is suspended, head object will return "null" for unversioned object. * Send following GetObject with "null" as versionId will result in 403 access denied error if your IAM role or policy * doesn't have GetObjectVersion permission. @@ -1240,6 +1270,22 @@ namespace Aws if (failedParts.size() == 0 && handle->GetBytesTransferred() == handle->GetBytesTotalSize()) { outcome.GetResult().GetBody().flush(); + + // Validate checksum if available + if (!ValidateDownloadChecksum(handle)) { + Aws::Client::AWSError checksumError( + Aws::S3::S3Errors::INTERNAL_FAILURE, + "ChecksumMismatch", + "Downloaded file checksum does not match expected checksum", + false); + handle->UpdateStatus(TransferStatus::FAILED); + handle->SetError(checksumError); + TriggerErrorCallback(handle, checksumError); + TriggerTransferStatusUpdatedCallback(handle); + partState->SetDownloadPartStream(nullptr); + return; + } + handle->UpdateStatus(TransferStatus::COMPLETED); } else @@ -1594,5 +1640,56 @@ namespace Aws partFunc->second(part, state->GetChecksum()); } } + + bool TransferManager::ValidateDownloadChecksum(const std::shared_ptr& handle) { + if (handle->GetChecksum().empty() || handle->GetTargetFilePath().empty()) { + return true; + } + + std::shared_ptr hashCalculator; + if (m_transferConfig.checksumAlgorithm == S3::Model::ChecksumAlgorithm::CRC32) { + hashCalculator = Aws::MakeShared("TransferManager"); + } else if (m_transferConfig.checksumAlgorithm == S3::Model::ChecksumAlgorithm::CRC32C) { + hashCalculator = Aws::MakeShared("TransferManager"); + } else if (m_transferConfig.checksumAlgorithm == S3::Model::ChecksumAlgorithm::SHA1) { + hashCalculator = Aws::MakeShared("TransferManager"); + } else if (m_transferConfig.checksumAlgorithm == S3::Model::ChecksumAlgorithm::SHA256) { + hashCalculator = Aws::MakeShared("TransferManager"); + } else { + hashCalculator = Aws::MakeShared("TransferManager"); + } + + auto fileStream = Aws::MakeShared("TransferManager", + handle->GetTargetFilePath().c_str(), + std::ios_base::in | std::ios_base::binary); + + if (!fileStream->good()) { + AWS_LOGSTREAM_ERROR(CLASS_TAG, "Transfer handle [" << handle->GetId() + << "] Failed to open downloaded file for checksum validation: " << handle->GetTargetFilePath()); + return false; + } + + unsigned char buffer[8192]; + while (fileStream->read(reinterpret_cast(buffer), sizeof(buffer)) || fileStream->gcount() > 0) { + hashCalculator->Update(buffer, static_cast(fileStream->gcount())); + } + + auto hashResult = hashCalculator->GetHash(); + if (!hashResult.IsSuccess()) { + AWS_LOGSTREAM_ERROR(CLASS_TAG, "Transfer handle [" << handle->GetId() + << "] Failed to calculate checksum for downloaded file"); + return false; + } + + Aws::String calculatedChecksum = Aws::Utils::HashingUtils::Base64Encode(hashResult.GetResult()); + if (calculatedChecksum != handle->GetChecksum()) { + AWS_LOGSTREAM_ERROR(CLASS_TAG, "Transfer handle [" << handle->GetId() + << "] Checksum validation failed. Expected: " << handle->GetChecksum() + << ", Calculated: " << calculatedChecksum); + return false; + } + + return true; + } } } \ No newline at end of file From 1aac67b3fc9f57df8cea52d15c0415028286fcaf Mon Sep 17 00:00:00 2001 From: pulimsr Date: Tue, 2 Dec 2025 14:51:33 -0500 Subject: [PATCH 2/2] Download to temporary .s3tmp files with atomic rename on validation success --- .../include/aws/transfer/TransferHandle.h | 12 ++ .../include/aws/transfer/TransferManager.h | 4 +- .../source/transfer/TransferManager.cpp | 112 +++++++++++++----- 3 files changed, 95 insertions(+), 33 deletions(-) diff --git a/src/aws-cpp-sdk-transfer/include/aws/transfer/TransferHandle.h b/src/aws-cpp-sdk-transfer/include/aws/transfer/TransferHandle.h index 0d062be1e00..3bdb1bfdec2 100644 --- a/src/aws-cpp-sdk-transfer/include/aws/transfer/TransferHandle.h +++ b/src/aws-cpp-sdk-transfer/include/aws/transfer/TransferHandle.h @@ -291,6 +291,12 @@ namespace Aws */ inline const Aws::String& GetTargetFilePath() const { return m_fileName; } + /** + * (Download only) Temporary file path used during download before atomic rename to target. + */ + inline const Aws::String& GetTempFilePath() const { return m_tempFilePath; } + inline void SetTempFilePath(const Aws::String& tempPath) { m_tempFilePath = tempPath; } + /** * (Download only) version id of the object to retrieve; if not specified in constructor, then latest is used */ @@ -366,6 +372,11 @@ namespace Aws void WaitUntilFinished() const; const CreateDownloadStreamCallback& GetCreateDownloadStreamFunction() const { return m_createDownloadStreamFn; } + void SetCreateDownloadStreamFunction(const CreateDownloadStreamCallback& fn) { + std::lock_guard lock(m_downloadStreamLock); + m_createDownloadStreamFn = fn; + m_downloadStream = nullptr; + } /** * Write @partStream to the configured output (f)stream. @@ -409,6 +420,7 @@ namespace Aws Aws::String m_bucket; Aws::String m_key; Aws::String m_fileName; + Aws::String m_tempFilePath; Aws::String m_contentType; Aws::String m_versionId; Aws::String m_etag; diff --git a/src/aws-cpp-sdk-transfer/include/aws/transfer/TransferManager.h b/src/aws-cpp-sdk-transfer/include/aws/transfer/TransferManager.h index f8506efe0ee..82ea277a029 100644 --- a/src/aws-cpp-sdk-transfer/include/aws/transfer/TransferManager.h +++ b/src/aws-cpp-sdk-transfer/include/aws/transfer/TransferManager.h @@ -341,9 +341,9 @@ namespace Aws /** * Validates downloaded file checksum against expected checksum from HeadObject. * @param handle The transfer handle containing checksum and file path. - * @return True if validation passes or no checksum to validate, false if validation fails. + * @return Success outcome if validation passes or no checksum to validate, error outcome if validation fails. */ - bool ValidateDownloadChecksum(const std::shared_ptr& handle); + Aws::Utils::Outcome> ValidateDownloadChecksum(const std::shared_ptr& handle); static Aws::String DetermineFilePath(const Aws::String& directory, const Aws::String& prefix, const Aws::String& keyName); diff --git a/src/aws-cpp-sdk-transfer/source/transfer/TransferManager.cpp b/src/aws-cpp-sdk-transfer/source/transfer/TransferManager.cpp index f0191f180ed..38faaf81b7a 100644 --- a/src/aws-cpp-sdk-transfer/source/transfer/TransferManager.cpp +++ b/src/aws-cpp-sdk-transfer/source/transfer/TransferManager.cpp @@ -36,6 +36,21 @@ namespace Aws return (path.find_last_of('/') == path.size() - 1 || path.find_last_of('\\') == path.size() - 1); } + static Aws::String GenerateTempFilePath(const Aws::String& targetPath) + { + static const char* HEX_CHARS = "0123456789abcdef"; + char uniqueId[9] = {0}; + for (int i = 0; i < 8; ++i) { + uniqueId[i] = HEX_CHARS[rand() % 16]; + } + + size_t lastSlash = targetPath.find_last_of("/\\"); + if (lastSlash != Aws::String::npos) { + return targetPath.substr(0, lastSlash + 1) + ".s3tmp." + uniqueId; + } + return ".s3tmp." + Aws::String(uniqueId); + } + template static void SetChecksumOnRequest(RequestT& request, S3::Model::ChecksumAlgorithm checksumAlgorithm, const Aws::String& checksum) { if (checksumAlgorithm == S3::Model::ChecksumAlgorithm::CRC64NVME) { @@ -231,15 +246,19 @@ namespace Aws const DownloadConfiguration& downloadConfig, const std::shared_ptr& context) { + Aws::String tempFilePath = GenerateTempFilePath(writeToFile); + #ifdef _MSC_VER - auto createFileFn = [=]() { return Aws::New(CLASS_TAG, Aws::Utils::StringUtils::ToWString(writeToFile.c_str()).c_str(), + auto createFileFn = [=]() { return Aws::New(CLASS_TAG, Aws::Utils::StringUtils::ToWString(tempFilePath.c_str()).c_str(), std::ios_base::out | std::ios_base::in | std::ios_base::binary | std::ios_base::trunc);}; #else - auto createFileFn = [=]() { return Aws::New(CLASS_TAG, writeToFile.c_str(), + auto createFileFn = [=]() { return Aws::New(CLASS_TAG, tempFilePath.c_str(), std::ios_base::out | std::ios_base::in | std::ios_base::binary | std::ios_base::trunc);}; #endif - return DownloadFile(bucketName, keyName, createFileFn, downloadConfig, writeToFile, context); + auto handle = DownloadFile(bucketName, keyName, createFileFn, downloadConfig, writeToFile, context); + handle->SetTempFilePath(tempFilePath); + return handle; } std::shared_ptr TransferManager::RetryUpload(const Aws::String& fileName, const std::shared_ptr& retryHandle) @@ -939,16 +958,12 @@ namespace Aws handle->ChangePartToCompleted(partState, getObjectOutcome.GetResult().GetETag()); getObjectOutcome.GetResult().GetBody().flush(); - if (!ValidateDownloadChecksum(handle)) { - Aws::Client::AWSError checksumError( - Aws::S3::S3Errors::INTERNAL_FAILURE, - "ChecksumMismatch", - "Downloaded file checksum does not match expected checksum", - false); + auto checksumOutcome = ValidateDownloadChecksum(handle); + if (!checksumOutcome.IsSuccess()) { handle->ChangePartToFailed(partState); handle->UpdateStatus(TransferStatus::FAILED); - handle->SetError(checksumError); - TriggerErrorCallback(handle, checksumError); + handle->SetError(checksumOutcome.GetError()); + TriggerErrorCallback(handle, checksumOutcome.GetError()); TriggerTransferStatusUpdatedCallback(handle); return; } @@ -961,7 +976,12 @@ namespace Aws << "] Failed to download object to Bucket: [" << handle->GetBucketName() << "] with Key: [" << handle->GetKey() << "] " << getObjectOutcome.GetError()); handle->ChangePartToFailed(partState); - handle->UpdateStatus(DetermineIfFailedOrCanceled(*handle)); + auto finalStatus = DetermineIfFailedOrCanceled(*handle); + handle->UpdateStatus(finalStatus); + + if (finalStatus == TransferStatus::FAILED && !handle->GetTempFilePath().empty()) { + Aws::FileSystem::RemoveFileIfExists(handle->GetTempFilePath().c_str()); + } handle->SetError(getObjectOutcome.GetError()); TriggerErrorCallback(handle, getObjectOutcome.GetError()); @@ -1271,16 +1291,11 @@ namespace Aws { outcome.GetResult().GetBody().flush(); - // Validate checksum if available - if (!ValidateDownloadChecksum(handle)) { - Aws::Client::AWSError checksumError( - Aws::S3::S3Errors::INTERNAL_FAILURE, - "ChecksumMismatch", - "Downloaded file checksum does not match expected checksum", - false); + auto checksumOutcome = ValidateDownloadChecksum(handle); + if (!checksumOutcome.IsSuccess()) { handle->UpdateStatus(TransferStatus::FAILED); - handle->SetError(checksumError); - TriggerErrorCallback(handle, checksumError); + handle->SetError(checksumOutcome.GetError()); + TriggerErrorCallback(handle, checksumOutcome.GetError()); TriggerTransferStatusUpdatedCallback(handle); partState->SetDownloadPartStream(nullptr); return; @@ -1290,7 +1305,12 @@ namespace Aws } else { - handle->UpdateStatus(DetermineIfFailedOrCanceled(*handle)); + auto finalStatus = DetermineIfFailedOrCanceled(*handle); + handle->UpdateStatus(finalStatus); + + if (finalStatus == TransferStatus::FAILED && !handle->GetTempFilePath().empty()) { + Aws::FileSystem::RemoveFileIfExists(handle->GetTempFilePath().c_str()); + } } TriggerTransferStatusUpdatedCallback(handle); } @@ -1641,9 +1661,21 @@ namespace Aws } } - bool TransferManager::ValidateDownloadChecksum(const std::shared_ptr& handle) { - if (handle->GetChecksum().empty() || handle->GetTargetFilePath().empty()) { - return true; + Aws::Utils::Outcome> TransferManager::ValidateDownloadChecksum(const std::shared_ptr& handle) { + if (handle->GetChecksum().empty()) { + if (!handle->GetTempFilePath().empty() && !handle->GetTargetFilePath().empty()) { + if (!Aws::FileSystem::RelocateFileOrDirectory(handle->GetTempFilePath().c_str(), handle->GetTargetFilePath().c_str())) { + AWS_LOGSTREAM_ERROR(CLASS_TAG, "Failed to rename temp file from " << handle->GetTempFilePath() << " to " << handle->GetTargetFilePath()); + Aws::FileSystem::RemoveFileIfExists(handle->GetTempFilePath().c_str()); + return Aws::Client::AWSError(Aws::S3::S3Errors::INTERNAL_FAILURE, "FileRenameError", "Failed to rename temporary file to target", false); + } + } + return Aws::NoResult(); + } + + Aws::String fileToValidate = handle->GetTempFilePath().empty() ? handle->GetTargetFilePath() : handle->GetTempFilePath(); + if (fileToValidate.empty()) { + return Aws::NoResult(); } std::shared_ptr hashCalculator; @@ -1660,13 +1692,16 @@ namespace Aws } auto fileStream = Aws::MakeShared("TransferManager", - handle->GetTargetFilePath().c_str(), + fileToValidate.c_str(), std::ios_base::in | std::ios_base::binary); if (!fileStream->good()) { AWS_LOGSTREAM_ERROR(CLASS_TAG, "Transfer handle [" << handle->GetId() - << "] Failed to open downloaded file for checksum validation: " << handle->GetTargetFilePath()); - return false; + << "] Failed to open downloaded file for checksum validation: " << fileToValidate); + if (!handle->GetTempFilePath().empty()) { + Aws::FileSystem::RemoveFileIfExists(handle->GetTempFilePath().c_str()); + } + return Aws::Client::AWSError(Aws::S3::S3Errors::INTERNAL_FAILURE, "FileOpenError", "Failed to open downloaded file for checksum validation", false); } unsigned char buffer[8192]; @@ -1678,7 +1713,10 @@ namespace Aws if (!hashResult.IsSuccess()) { AWS_LOGSTREAM_ERROR(CLASS_TAG, "Transfer handle [" << handle->GetId() << "] Failed to calculate checksum for downloaded file"); - return false; + if (!handle->GetTempFilePath().empty()) { + Aws::FileSystem::RemoveFileIfExists(handle->GetTempFilePath().c_str()); + } + return Aws::Client::AWSError(Aws::S3::S3Errors::INTERNAL_FAILURE, "ChecksumCalculationError", "Failed to calculate checksum for downloaded file", false); } Aws::String calculatedChecksum = Aws::Utils::HashingUtils::Base64Encode(hashResult.GetResult()); @@ -1686,10 +1724,22 @@ namespace Aws AWS_LOGSTREAM_ERROR(CLASS_TAG, "Transfer handle [" << handle->GetId() << "] Checksum validation failed. Expected: " << handle->GetChecksum() << ", Calculated: " << calculatedChecksum); - return false; + if (!handle->GetTempFilePath().empty()) { + Aws::FileSystem::RemoveFileIfExists(handle->GetTempFilePath().c_str()); + } + return Aws::Client::AWSError(Aws::S3::S3Errors::INTERNAL_FAILURE, "ChecksumMismatch", "Downloaded file checksum does not match expected checksum", false); } - return true; + // Validation succeeded, rename temp to target + if (!handle->GetTempFilePath().empty() && !handle->GetTargetFilePath().empty()) { + if (!Aws::FileSystem::RelocateFileOrDirectory(handle->GetTempFilePath().c_str(), handle->GetTargetFilePath().c_str())) { + AWS_LOGSTREAM_ERROR(CLASS_TAG, "Failed to rename temp file from " << handle->GetTempFilePath() << " to " << handle->GetTargetFilePath()); + Aws::FileSystem::RemoveFileIfExists(handle->GetTempFilePath().c_str()); + return Aws::Client::AWSError(Aws::S3::S3Errors::INTERNAL_FAILURE, "FileRenameError", "Failed to rename temporary file to target", false); + } + } + + return Aws::NoResult(); } } } \ No newline at end of file