Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,13 @@ namespace Aws
*/
void SetChecksumForAlgorithm(const std::shared_ptr<PartState>& state, Aws::S3::Model::CompletedPart& part);

/**
* Validates downloaded file checksum against expected checksum from HeadObject.
* @param handle The transfer handle containing checksum and file path.
* @return True if validation passes or no checksum to validate, false if validation fails.
*/
bool ValidateDownloadChecksum(const std::shared_ptr<TransferHandle>& handle);

static Aws::String DetermineFilePath(const Aws::String& directory, const Aws::String& prefix, const Aws::String& keyName);

Aws::Utils::ExclusiveOwnershipResourceManager<unsigned char*> m_bufferManager;
Expand Down
97 changes: 97 additions & 0 deletions src/aws-cpp-sdk-transfer/source/transfer/TransferManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -938,6 +938,21 @@ namespace Aws
handle->SetContentType(getObjectOutcome.GetResult().GetContentType());
handle->ChangePartToCompleted(partState, getObjectOutcome.GetResult().GetETag());
getObjectOutcome.GetResult().GetBody().flush();

if (!ValidateDownloadChecksum(handle)) {
Aws::Client::AWSError<Aws::S3::S3Errors> checksumError(
Aws::S3::S3Errors::INTERNAL_FAILURE,
"ChecksumMismatch",
"Downloaded file checksum does not match expected checksum",
false);
handle->ChangePartToFailed(partState);
handle->UpdateStatus(TransferStatus::FAILED);
handle->SetError(checksumError);
TriggerErrorCallback(handle, checksumError);
TriggerTransferStatusUpdatedCallback(handle);
return;
}

handle->UpdateStatus(TransferStatus::COMPLETED);
}
else
Expand Down Expand Up @@ -1000,6 +1015,21 @@ namespace Aws
handle->SetContentType(headObjectOutcome.GetResult().GetContentType());
handle->SetMetadata(headObjectOutcome.GetResult().GetMetadata());
handle->SetEtag(headObjectOutcome.GetResult().GetETag());

if (headObjectOutcome.GetResult().GetChecksumType() == S3::Model::ChecksumType::FULL_OBJECT) {
if (!headObjectOutcome.GetResult().GetChecksumCRC32().empty()) {
handle->SetChecksum(headObjectOutcome.GetResult().GetChecksumCRC32());
} else if (!headObjectOutcome.GetResult().GetChecksumCRC32C().empty()) {
handle->SetChecksum(headObjectOutcome.GetResult().GetChecksumCRC32C());
} else if (!headObjectOutcome.GetResult().GetChecksumSHA256().empty()) {
handle->SetChecksum(headObjectOutcome.GetResult().GetChecksumSHA256());
} else if (!headObjectOutcome.GetResult().GetChecksumSHA1().empty()) {
handle->SetChecksum(headObjectOutcome.GetResult().GetChecksumSHA1());
} else if (!headObjectOutcome.GetResult().GetChecksumCRC64NVME().empty()) {
handle->SetChecksum(headObjectOutcome.GetResult().GetChecksumCRC64NVME());
}
}

/* When bucket versioning is suspended, head object will return "null" for unversioned object.
* Send following GetObject with "null" as versionId will result in 403 access denied error if your IAM role or policy
* doesn't have GetObjectVersion permission.
Expand Down Expand Up @@ -1240,6 +1270,22 @@ namespace Aws
if (failedParts.size() == 0 && handle->GetBytesTransferred() == handle->GetBytesTotalSize())
{
outcome.GetResult().GetBody().flush();

// Validate checksum if available
if (!ValidateDownloadChecksum(handle)) {
Aws::Client::AWSError<Aws::S3::S3Errors> checksumError(
Aws::S3::S3Errors::INTERNAL_FAILURE,
"ChecksumMismatch",
"Downloaded file checksum does not match expected checksum",
false);
handle->UpdateStatus(TransferStatus::FAILED);
handle->SetError(checksumError);
TriggerErrorCallback(handle, checksumError);
TriggerTransferStatusUpdatedCallback(handle);
partState->SetDownloadPartStream(nullptr);
return;
}

handle->UpdateStatus(TransferStatus::COMPLETED);
}
else
Expand Down Expand Up @@ -1594,5 +1640,56 @@ namespace Aws
partFunc->second(part, state->GetChecksum());
}
}

bool TransferManager::ValidateDownloadChecksum(const std::shared_ptr<TransferHandle>& handle) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

whenever we return a bool from a function we usually want a Aws::Utils::Outcome instead. instead of simply returning a true or false fail, we should return a proper error if something happened.

if (handle->GetChecksum().empty() || handle->GetTargetFilePath().empty()) {
return true;
}

std::shared_ptr<Aws::Utils::Crypto::Hash> hashCalculator;
if (m_transferConfig.checksumAlgorithm == S3::Model::ChecksumAlgorithm::CRC32) {
hashCalculator = Aws::MakeShared<Aws::Utils::Crypto::CRC32>("TransferManager");
} else if (m_transferConfig.checksumAlgorithm == S3::Model::ChecksumAlgorithm::CRC32C) {
hashCalculator = Aws::MakeShared<Aws::Utils::Crypto::CRC32C>("TransferManager");
} else if (m_transferConfig.checksumAlgorithm == S3::Model::ChecksumAlgorithm::SHA1) {
hashCalculator = Aws::MakeShared<Aws::Utils::Crypto::Sha1>("TransferManager");
} else if (m_transferConfig.checksumAlgorithm == S3::Model::ChecksumAlgorithm::SHA256) {
hashCalculator = Aws::MakeShared<Aws::Utils::Crypto::Sha256>("TransferManager");
} else {
hashCalculator = Aws::MakeShared<Aws::Utils::Crypto::CRC64>("TransferManager");
}

auto fileStream = Aws::MakeShared<Aws::FStream>("TransferManager",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So we are opening the file and re-reading the entire stream buffering and then failing the download if the file is different. this gives me two concerns:

  • a file that is corrupted is written to disc

Failure handling: The S3 Transfer Manager SHOULD download to a file in the same directory with .s3tmp.{uniqueIdentifier} (uniqueIdentifier <= 8 chars) suffix to indicate the file is in the process of being downloaded, but not yet complete. After all content has been written, rename the temporary file to the destination file using atomic rename operation. If there is an error, delete the temporary file. The S3 Transfer Manager SHOULD register cleanup handlers to remove the temporary file if the process terminates unexpectedly, if supported. The S3 Transfer Manager MUST NOT pre-validate available space because it can introduce race conditions and false failures.

This concerns me that this would cause an issue where we would leave a file that has failing data intergrity.

  • we are reading the stream twice.

on files that are massive think 5TB, this can introduce overhead. i think this is covered in the spec with

If full object checksum is calculated inline while downloading source, the S3 Transfer Manager MAY buffer full parts in-memory while flushing is blocked on updating the running checksum object.

this really means that while we write out to the file we want to be updating the checksum

handle->GetTargetFilePath().c_str(),
std::ios_base::in | std::ios_base::binary);

if (!fileStream->good()) {
AWS_LOGSTREAM_ERROR(CLASS_TAG, "Transfer handle [" << handle->GetId()
<< "] Failed to open downloaded file for checksum validation: " << handle->GetTargetFilePath());
return false;
}

unsigned char buffer[8192];
while (fileStream->read(reinterpret_cast<char*>(buffer), sizeof(buffer)) || fileStream->gcount() > 0) {
hashCalculator->Update(buffer, static_cast<size_t>(fileStream->gcount()));
}

auto hashResult = hashCalculator->GetHash();
if (!hashResult.IsSuccess()) {
AWS_LOGSTREAM_ERROR(CLASS_TAG, "Transfer handle [" << handle->GetId()
<< "] Failed to calculate checksum for downloaded file");
return false;
}

Aws::String calculatedChecksum = Aws::Utils::HashingUtils::Base64Encode(hashResult.GetResult());
if (calculatedChecksum != handle->GetChecksum()) {
AWS_LOGSTREAM_ERROR(CLASS_TAG, "Transfer handle [" << handle->GetId()
<< "] Checksum validation failed. Expected: " << handle->GetChecksum()
<< ", Calculated: " << calculatedChecksum);
return false;
}

return true;
}
}
}
Loading