diff --git a/.clang-tidy b/.clang-tidy index 0ddce2e..fb0fa6f 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -4,6 +4,7 @@ Checks: > bugprone-*, -bugprone-easily-swappable-parameters, clang-analyzer-*, + -clang-analyzer-optin.cplusplus.VirtualCall, clang-diagnostic-*, cppcore-guidelines-*, google-*, diff --git a/CHANGELOG.md b/CHANGELOG.md index 27fc3e2..c895ac7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## 0.45.0 - 2025-12-10 + +### Enhancements +- Added download retry, resumption, and checksum verification to + `Historical::BatchDownload` +- Added new venue, dataset, and publisher for Cboe Futures Exchange (`XCBF.PITCH`) +- Upgraded default `httplib` version to 0.28.0 + ## 0.44.0 - 2025-11-18 ### Enhancements diff --git a/CMakeLists.txt b/CMakeLists.txt index fdf5864..48f7b7d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 3.24..4.0) project( databento - VERSION 0.44.0 + VERSION 0.45.0 LANGUAGES CXX DESCRIPTION "Official Databento client library" ) @@ -128,6 +128,10 @@ endif() find_package(OpenSSL REQUIRED) find_package(zstd REQUIRED) +if (APPLE) + find_library(CORE_FOUNDATION_LIB CoreFoundation REQUIRED) + find_library(CFNETWORK_LIB CFNetwork REQUIRED) +endif() if(NOT TARGET zstd::libzstd) if(TARGET zstd::libzstd_shared) add_library(zstd::libzstd ALIAS zstd::libzstd_shared) @@ -178,7 +182,7 @@ if(${PROJECT_NAME_UPPERCASE}_USE_EXTERNAL_HTTPLIB) find_package(httplib REQUIRED) endif() else() - set(httplib_version 0.20.0) + set(httplib_version 0.28.0) FetchContent_Declare( httplib URL https://github.com/yhirose/cpp-httplib/archive/refs/tags/v${httplib_version}.tar.gz @@ -223,6 +227,9 @@ target_link_libraries( OpenSSL::SSL Threads::Threads zstd::libzstd + # macOS-specific libraries required by httplib + $<$:${CFNETWORK_LIB}> + $<$:${CORE_FRAMEWORK_LIB}> ) target_compile_definitions( diff --git a/cmake/SourcesAndHeaders.cmake b/cmake/SourcesAndHeaders.cmake index 2c51ce8..48698d5 100644 --- a/cmake/SourcesAndHeaders.cmake +++ b/cmake/SourcesAndHeaders.cmake @@ -13,6 +13,7 @@ set(headers include/databento/detail/json_helpers.hpp include/databento/detail/scoped_fd.hpp include/databento/detail/scoped_thread.hpp + include/databento/detail/sha256_hasher.hpp include/databento/detail/tcp_client.hpp include/databento/detail/zstd_stream.hpp include/databento/enums.hpp @@ -54,6 +55,7 @@ set(sources src/detail/http_client.cpp src/detail/json_helpers.cpp src/detail/scoped_fd.cpp + src/detail/sha256_hasher.cpp src/detail/tcp_client.cpp src/detail/zstd_stream.cpp src/enums.cpp diff --git a/cmake/StandardSettings.cmake b/cmake/StandardSettings.cmake index 5e90442..51938d0 100644 --- a/cmake/StandardSettings.cmake +++ b/cmake/StandardSettings.cmake @@ -2,6 +2,7 @@ # Project settings # +option(${PROJECT_NAME_UPPERCASE}_USE_EXTERNAL_DATE "Use an external date library" OFF) option(${PROJECT_NAME_UPPERCASE}_USE_EXTERNAL_JSON "Use an external JSON library" OFF) option(${PROJECT_NAME_UPPERCASE}_USE_EXTERNAL_HTTPLIB "Use an external httplib library" OFF) option(${PROJECT_NAME_UPPERCASE}_USE_EXTERNAL_GTEST "Use an external google test (gtest) library" ON) diff --git a/include/databento/detail/http_client.hpp b/include/databento/detail/http_client.hpp index bb1f8cc..0b9df4e 100644 --- a/include/databento/detail/http_client.hpp +++ b/include/databento/detail/http_client.hpp @@ -23,7 +23,7 @@ class HttpClient { nlohmann::json GetJson(const std::string& path, const httplib::Params& params); nlohmann::json PostJson(const std::string& path, const httplib::Params& form_params); - void GetRawStream(const std::string& path, const httplib::Params& params, + void GetRawStream(const std::string& path, const httplib::Headers& headers, const httplib::ContentReceiver& callback); void PostRawStream(const std::string& path, const httplib::Params& form_params, const httplib::ContentReceiver& callback); diff --git a/include/databento/detail/sha256_hasher.hpp b/include/databento/detail/sha256_hasher.hpp new file mode 100644 index 0000000..bec508a --- /dev/null +++ b/include/databento/detail/sha256_hasher.hpp @@ -0,0 +1,26 @@ +#pragma once + +#include // byte, size_t +#include // unique_ptr +#include +#include + +// Forward declaration +struct evp_md_ctx_st; +using EVP_MD_CTX = evp_md_ctx_st; + +namespace databento::detail { +// One-off hash +std::string Sha256Hash(std::string_view data); + +class Sha256Hasher { + public: + Sha256Hasher(); + + void Update(const std::byte* buffer, std::size_t length); + std::string Finalize(); + + private: + std::unique_ptr<::EVP_MD_CTX, void (*)(::EVP_MD_CTX*)> ctx_; +}; +} // namespace databento::detail diff --git a/include/databento/file_stream.hpp b/include/databento/file_stream.hpp index 5eaf87f..9f9e011 100644 --- a/include/databento/file_stream.hpp +++ b/include/databento/file_stream.hpp @@ -25,6 +25,7 @@ class InFileStream : public IReadable { class OutFileStream : public IWritable { public: explicit OutFileStream(const std::filesystem::path& file_path); + OutFileStream(const std::filesystem::path& file_path, std::ios_base::openmode mode); void WriteAll(const std::byte* buffer, std::size_t length) override; diff --git a/include/databento/historical.hpp b/include/databento/historical.hpp index 0c5474a..d0a4850 100644 --- a/include/databento/historical.hpp +++ b/include/databento/historical.hpp @@ -4,6 +4,7 @@ #include #include // multimap #include +#include #include #include "databento/batch.hpp" // BatchJob @@ -225,7 +226,8 @@ class Historical { std::string user_agent_ext); BatchJob BatchSubmitJob(const HttplibParams& params); - void DownloadFile(const std::string& url, const std::filesystem::path& output_path); + void DownloadFile(const std::string& url, const std::filesystem::path& output_path, + std::string_view hash, std::uint64_t exp_size); std::vector BatchListJobs(const HttplibParams& params); std::vector MetadataGetDatasetCondition( const HttplibParams& params); diff --git a/include/databento/publishers.hpp b/include/databento/publishers.hpp index 3e34643..3826ffb 100644 --- a/include/databento/publishers.hpp +++ b/include/databento/publishers.hpp @@ -109,6 +109,8 @@ enum class Venue : std::uint16_t { Xeur = 50, // European Energy Exchange Xeee = 51, + // Cboe Futures Exchange + Xcbf = 52, }; // A source of data. @@ -191,6 +193,8 @@ enum class Dataset : std::uint16_t { XeurEobi = 38, // European Energy Exchange EOBI XeeeEobi = 39, + // Cboe Futures Exchange PITCH + XcbfPitch = 40, }; // A specific Venue from a specific data source. @@ -403,6 +407,8 @@ enum class Publisher : std::uint16_t { XeurEobiXoff = 103, // European Energy Exchange EOBI - Off-Market Trades XeeeEobiXoff = 104, + // Cboe Futures Exchange + XcbfPitchXcbf = 105, }; // Get a Publisher's Venue. diff --git a/pkg/PKGBUILD b/pkg/PKGBUILD index 7178466..c4f90c8 100644 --- a/pkg/PKGBUILD +++ b/pkg/PKGBUILD @@ -1,7 +1,7 @@ # Maintainer: Databento _pkgname=databento-cpp pkgname=databento-cpp-git -pkgver=0.44.0 +pkgver=0.45.0 pkgrel=1 pkgdesc="Official C++ client for Databento" arch=('any') diff --git a/src/detail/http_client.cpp b/src/detail/http_client.cpp index 8921a83..9d257bc 100644 --- a/src/detail/http_client.cpp +++ b/src/detail/http_client.cpp @@ -46,13 +46,12 @@ nlohmann::json HttpClient::PostJson(const std::string& path, return HttpClient::CheckAndParseResponse(path, std::move(res)); } -void HttpClient::GetRawStream(const std::string& path, const httplib::Params& params, +void HttpClient::GetRawStream(const std::string& path, const httplib::Headers& headers, const httplib::ContentReceiver& callback) { - const std::string full_path = httplib::append_query_params(path, params); std::string err_body{}; int err_status{}; const httplib::Result res = client_.Get( - full_path, MakeStreamResponseHandler(err_status), + path, headers, MakeStreamResponseHandler(err_status), [&callback, &err_body, &err_status](const char* data, std::size_t length) { // if an error response was received, read all content into // err_body diff --git a/src/detail/sha256_hasher.cpp b/src/detail/sha256_hasher.cpp new file mode 100644 index 0000000..0f9d280 --- /dev/null +++ b/src/detail/sha256_hasher.cpp @@ -0,0 +1,40 @@ +#include "databento/detail/sha256_hasher.hpp" + +#include + +#include // hex, setw, setfill +#include + +#include "databento/exceptions.hpp" + +std::string databento::detail::Sha256Hash(std::string_view data) { + Sha256Hasher hasher{}; + hasher.Update(reinterpret_cast(data.data()), data.length()); + return hasher.Finalize(); +} + +using databento::detail::Sha256Hasher; + +Sha256Hasher::Sha256Hasher() : ctx_{::EVP_MD_CTX_new(), &::EVP_MD_CTX_free} { + ::EVP_DigestInit_ex(ctx_.get(), ::EVP_sha256(), NULL); +} + +void Sha256Hasher::Update(const std::byte* buffer, std::size_t length) { + if (!::EVP_DigestUpdate(ctx_.get(), buffer, length)) { + throw databento::Exception{"Failed to update SHA256 digest"}; + } +} + +std::string Sha256Hasher::Finalize() { + std::array hash{}; + unsigned int hash_length = 0; + if (!::EVP_DigestFinal_ex(ctx_.get(), hash.data(), &hash_length)) { + throw databento::Exception{"Failed to finalize SHA256 digest"}; + } + + std::ostringstream hash_hex_stream; + for (size_t i = 0; i < hash_length; ++i) { + hash_hex_stream << std::hex << std::setw(2) << std::setfill('0') << +hash[i]; + } + return hash_hex_stream.str(); +} diff --git a/src/file_stream.cpp b/src/file_stream.cpp index 8ffb0dd..9782dcb 100644 --- a/src/file_stream.cpp +++ b/src/file_stream.cpp @@ -11,7 +11,7 @@ InFileStream::InFileStream(const std::filesystem::path& file_path) : stream_{file_path, std::ios::binary} { if (stream_.fail()) { throw InvalidArgumentError{"InFileStream", "file_path", - "Non-existent or invalid file at " + file_path.string()}; + "Non-existent or invalid file: " + file_path.string()}; } } @@ -33,10 +33,15 @@ std::size_t InFileStream::ReadSome(std::byte* buffer, std::size_t max_length) { using databento::OutFileStream; OutFileStream::OutFileStream(const std::filesystem::path& file_path) - : stream_{file_path, std::ios::binary} { + : OutFileStream{file_path, std::ios::binary} {} + +OutFileStream::OutFileStream(const std::filesystem::path& file_path, + std::ios_base::openmode mode) + : stream_{file_path, mode} { if (stream_.fail()) { - throw InvalidArgumentError{"OutFileStream", "file_path", - "Non-existent or invalid file"}; + throw InvalidArgumentError{ + "OutFileStream", "file_path", + "Can't open file for writing at path: " + file_path.string()}; } } diff --git a/src/historical.cpp b/src/historical.cpp index 34fe9ce..ecf2589 100644 --- a/src/historical.cpp +++ b/src/historical.cpp @@ -9,18 +9,20 @@ #include // find_if #include // size_t #include // get_env -#include -#include // back_inserter +#include // openmode +#include // back_inserter +#include #include -#include #include #include // move +#include #include "databento/constants.hpp" #include "databento/datetime.hpp" #include "databento/dbn_file_store.hpp" #include "databento/detail/dbn_buffer_decoder.hpp" #include "databento/detail/json_helpers.hpp" +#include "databento/detail/sha256_hasher.hpp" #include "databento/enums.hpp" #include "databento/exceptions.hpp" // Exception, JsonResponseError #include "databento/file_stream.hpp" @@ -118,6 +120,76 @@ void TryCreateDir(const std::filesystem::path& dir_name) { throw databento::Exception{"Unable to create directory "s + dir_name.generic_string() + ": " + ec.message()}; } + +struct AlreadyDownloaded {}; +using FileExistsResult = std::variant>; + +FileExistsResult CheckIfFileExists( + databento::ILogReceiver* log_receiver, const std::filesystem::path& output_path, + std::uint64_t exp_size, std::optional& hasher) { + static constexpr auto kMethod = "Historical::CheckIfFileExists"; + std::error_code ec{}; + const auto actual_size = std::filesystem::file_size(output_path, ec); + if (ec) { + return std::nullopt; + } + if (actual_size < exp_size) { + if (log_receiver->ShouldLog(databento::LogLevel::Debug)) { + std::ostringstream log; + log << '[' << kMethod << "] Found existing file, resuming download to " + << output_path << ", previously downloaded " << actual_size + << " bytes, total: " << exp_size << " bytes"; + log_receiver->Receive(databento::LogLevel::Debug, log.str()); + } + if (hasher) { + // Hash previously downloaded data + std::vector buf(1 << 23, std::byte{}); + databento::InFileStream existing_file{output_path}; + while (auto read_size = existing_file.ReadSome(buf.data(), buf.size())) { + hasher->Update(buf.data(), read_size); + } + } + + return httplib::Range{actual_size, -1}; + } + if (actual_size == exp_size) { + if (log_receiver->ShouldLog(databento::LogLevel::Debug)) { + std::ostringstream log; + log << '[' << kMethod << "] Skipping download as file at " << output_path + << " already exists and matches expected size"; + log_receiver->Receive(databento::LogLevel::Debug, log.str()); + } + return AlreadyDownloaded{}; + } + std::ostringstream err; + err << "Batch file " << output_path << " already exists with size " << actual_size + << " which is larger than expected size " << exp_size; + throw databento::Exception{err.str()}; +} + +void VerifyHash(databento::ILogReceiver* log_receiver, + std::optional& hasher, + std::string_view exp_hash) { + static constexpr auto kMethod = "Historical::VerifyHash"; + + if (!hasher) { + return; + } + const auto hash = hasher->Finalize(); + if (hash == exp_hash) { + if (log_receiver->ShouldLog(databento::LogLevel::Debug)) { + std::ostringstream log; + log << '[' << kMethod << "] Successfully verified checksum"; + log_receiver->Receive(databento::LogLevel::Debug, log.str()); + } + } else { + std::ostringstream log; + log << '[' << kMethod + << "] Downloaded file failed checksum verification, hash: " << hash + << " expected: " << exp_hash; + log_receiver->Receive(databento::LogLevel::Warning, log.str()); + } +} } // namespace databento::HistoricalBuilder Historical::Builder() { @@ -296,7 +368,7 @@ std::vector Historical::BatchDownload( std::vector paths; for (const auto& file_desc : file_descs) { std::filesystem::path output_path = job_dir / file_desc.filename; - DownloadFile(file_desc.https_url, output_path); + DownloadFile(file_desc.https_url, output_path, file_desc.hash, file_desc.size); paths.emplace_back(std::move(output_path)); } return paths; @@ -318,45 +390,105 @@ std::filesystem::path Historical::BatchDownload( "Filename not found for batch job " + job_id}; } std::filesystem::path output_path = job_dir / file_desc_it->filename; - DownloadFile(file_desc_it->https_url, output_path); + DownloadFile(file_desc_it->https_url, output_path, file_desc_it->hash, + file_desc_it->size); return output_path; } void Historical::DownloadFile(const std::string& url, - const std::filesystem::path& output_path) { - static const std::string kMethod = "Historical::DownloadFile"; + const std::filesystem::path& output_path, + std::string_view hash, std::uint64_t exp_size) { + static constexpr auto kMethod = "Historical::DownloadFile"; // extract path from URL - const auto protocol_divider = url.find("://"); - std::string path; - - if (protocol_divider == std::string::npos) { - const auto slash = url.find_first_of('/'); - if (slash == std::string::npos) { - throw InvalidArgumentError{kMethod, "url", "No slashes"}; + const std::string path = [&url] { + const auto protocol_divider = url.find("://"); + if (protocol_divider == std::string::npos) { + const auto slash = url.find_first_of('/'); + if (slash == std::string::npos) { + throw InvalidArgumentError{kMethod, "url", "No slashes"}; + } + return url.substr(slash); + } else { + const auto slash = url.find('/', protocol_divider + 3); + if (slash == std::string::npos) { + throw InvalidArgumentError{kMethod, "url", "No slashes"}; + } + return url.substr(slash); } - path = url.substr(slash); + }(); + + const auto delimiter_idx = hash.find(':'); + if (delimiter_idx == std::string::npos) { + throw databento::Exception{std::string{"Unexpected hash string format: "} + + std::string{hash}}; + } + const auto hash_algo = hash.substr(0, delimiter_idx); + const auto exp_hash = hash.substr(delimiter_idx + 1); + std::optional hasher{}; + if (hash_algo == "sha256") { + hasher = detail::Sha256Hasher{}; } else { - const auto slash = url.find('/', protocol_divider + 3); - if (slash == std::string::npos) { - throw InvalidArgumentError{kMethod, "url", "No slashes"}; - } - path = url.substr(slash); + log_receiver_->Receive( + LogLevel::Warning, + "Skipping checksum with unsupported hash algorithm " + std::string{hash_algo}); } + std::ostringstream ss; ss << '[' << kMethod << "] Downloading batch file " << path << " to " << output_path; log_receiver_->Receive(LogLevel::Info, ss.str()); - OutFileStream out_file{output_path}; - this->client_.GetRawStream( - path, {}, [&out_file](const char* data, std::size_t length) { - out_file.WriteAll(reinterpret_cast(data), length); - return true; - }); + constexpr auto kMaxRetries = 5; + auto retry = 0; + while (true) { + const auto exists_res = + ::CheckIfFileExists(log_receiver_, output_path, exp_size, hasher); + if (std::holds_alternative(exists_res)) { + return; + } + httplib::Headers http_headers; + const auto opt_range = std::get>(exists_res); + std::ios::openmode mode = std::ios::binary; + if (opt_range) { + auto [key, val] = httplib::make_range_header({*opt_range}); + http_headers.emplace(std::move(key), std::move(val)); + // `ate` **and** `in` required to properly append + mode |= std::ios::in | std::ios::ate; + } + OutFileStream out_file{output_path, mode}; + try { + this->client_.GetRawStream( + path, http_headers, + [&hasher, &out_file](const char* data, std::size_t length) { + const auto bytes = reinterpret_cast(data); + if (hasher) { + hasher->Update(bytes, length); + } + out_file.WriteAll(bytes, length); + return true; + }); + } catch (const databento::Exception& exc) { + retry += 1; + if (retry == kMaxRetries) { + throw exc; + } + ss.str(""); + ss << '[' << kMethod << "] Retrying download attempt " << retry + 1 << " after " + << exc.what(); + log_receiver_->Receive(LogLevel::Error, ss.str()); + // reset hasher + if (hasher) { + hasher = detail::Sha256Hasher{}; + } + continue; + } - if (log_receiver_->ShouldLog(LogLevel::Debug)) { - ss.str(""); - ss << '[' << kMethod << ']' << " Completed download of " << path; - log_receiver_->Receive(LogLevel::Debug, ss.str()); + if (log_receiver_->ShouldLog(LogLevel::Debug)) { + ss.str(""); + ss << '[' << kMethod << ']' << " Completed download of " << path; + log_receiver_->Receive(LogLevel::Debug, ss.str()); + } + ::VerifyHash(log_receiver_, hasher, exp_hash); + return; } } diff --git a/src/live_blocking.cpp b/src/live_blocking.cpp index f7e2f01..8f9e9cb 100644 --- a/src/live_blocking.cpp +++ b/src/live_blocking.cpp @@ -1,19 +1,17 @@ #include "databento/live_blocking.hpp" -#include // SHA256, SHA256_DIGEST_LENGTH - #include // copy #include // tolower #include #include // ptrdiff_t #include -#include // hex, setfill, setw #include #include #include #include "databento/constants.hpp" // kApiKeyLength #include "databento/dbn_decoder.hpp" +#include "databento/detail/sha256_hasher.hpp" #include "databento/detail/tcp_client.hpp" #include "databento/exceptions.hpp" // LiveApiError #include "databento/live.hpp" // LiveBuilder @@ -320,20 +318,9 @@ std::uint64_t LiveBlocking::Authenticate() { } std::string LiveBlocking::GenerateCramReply(std::string_view challenge_key) { - std::array sha{}; - const unsigned char* sha_res = - ::SHA256(reinterpret_cast(challenge_key.data()), - challenge_key.size(), sha.data()); - if (sha_res == nullptr) { - throw LiveApiError{"Unable to generate SHA 256"}; - } - std::ostringstream auth_stream; - for (const unsigned char c : sha) { - auth_stream << std::hex << std::setw(2) << std::setfill('0') - << static_cast(c); - } - auth_stream << '-' << key_.substr(kApiKeyLength - kBucketIdLength); + auth_stream << detail::Sha256Hash(challenge_key) << '-' + << key_.substr(kApiKeyLength - kBucketIdLength); return auth_stream.str(); } diff --git a/src/publishers.cpp b/src/publishers.cpp index da981cb..4e21ff1 100644 --- a/src/publishers.cpp +++ b/src/publishers.cpp @@ -164,6 +164,9 @@ const char* ToString(Venue venue) { case Venue::Xeee: { return "XEEE"; } + case Venue::Xcbf: { + return "XCBF"; + } default: { return "Unknown"; } @@ -330,6 +333,9 @@ Venue FromString(const std::string& str) { if (str == "XEEE") { return Venue::Xeee; } + if (str == "XCBF") { + return Venue::Xcbf; + } throw InvalidArgumentError{"FromString", "str", "unknown value '" + str + '\''}; } @@ -453,6 +459,9 @@ const char* ToString(Dataset dataset) { case Dataset::XeeeEobi: { return "XEEE.EOBI"; } + case Dataset::XcbfPitch: { + return "XCBF.PITCH"; + } default: { return "Unknown"; } @@ -583,6 +592,9 @@ Dataset FromString(const std::string& str) { if (str == "XEEE.EOBI") { return Dataset::XeeeEobi; } + if (str == "XCBF.PITCH") { + return Dataset::XcbfPitch; + } throw InvalidArgumentError{"FromString", "str", "unknown value '" + str + '\''}; } @@ -901,6 +913,9 @@ Venue PublisherVenue(Publisher publisher) { case Publisher::XeeeEobiXoff: { return Venue::Xoff; } + case Publisher::XcbfPitchXcbf: { + return Venue::Xcbf; + } default: { throw InvalidArgumentError{ "PublisherVenue", "publisher", @@ -1223,6 +1238,9 @@ Dataset PublisherDataset(Publisher publisher) { case Publisher::XeeeEobiXoff: { return Dataset::XeeeEobi; } + case Publisher::XcbfPitchXcbf: { + return Dataset::XcbfPitch; + } default: { throw InvalidArgumentError{ "PublisherDataset", "publisher", @@ -1546,6 +1564,9 @@ const char* ToString(Publisher publisher) { case Publisher::XeeeEobiXoff: { return "XEEE.EOBI.XOFF"; } + case Publisher::XcbfPitchXcbf: { + return "XCBF.PITCH.XCBF"; + } default: { return "Unknown"; } @@ -1871,6 +1892,9 @@ Publisher FromString(const std::string& str) { if (str == "XEEE.EOBI.XOFF") { return Publisher::XeeeEobiXoff; } + if (str == "XCBF.PITCH.XCBF") { + return Publisher::XcbfPitchXcbf; + } throw InvalidArgumentError{"FromString", "str", "unknown value '" + str + '\''}; } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 5f6bb50..e4ad596 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -48,6 +48,7 @@ set( src/pretty_tests.cpp src/record_tests.cpp src/scoped_thread_tests.cpp + src/sha256_hasher_tests.cpp src/stream_op_helper_tests.cpp src/symbol_map_tests.cpp src/symbology_tests.cpp diff --git a/tests/include/mock/mock_http_server.hpp b/tests/include/mock/mock_http_server.hpp index 10e512f..0189348 100644 --- a/tests/include/mock/mock_http_server.hpp +++ b/tests/include/mock/mock_http_server.hpp @@ -35,9 +35,6 @@ class MockHttpServer { void MockPostJson(const std::string& path, const std::map& params, const nlohmann::json& json); - void MockGetDbn(const std::string& path, - const std::map& params, - const std::string& dbn_path); void MockPostDbn(const std::string& path, const std::map& params, const std::string& dbn_path); @@ -48,6 +45,10 @@ class MockHttpServer { const std::map& params, Record record, std::size_t count, std::size_t extra_bytes, std::size_t chunk_size); + void MockGetDbnFile(const std::string& path, + + const std::string& dbn_path); + private: using SharedConstBuffer = std::shared_ptr; diff --git a/tests/include/temp_file.hpp b/tests/include/temp_file.hpp index d06bd77..3a7c3d9 100644 --- a/tests/include/temp_file.hpp +++ b/tests/include/temp_file.hpp @@ -11,12 +11,10 @@ #include "databento/exceptions.hpp" namespace databento { -// A RAII for creating a file on construction and removing it when the class -// goes out of scope. +// A RAII class removing a file when the object goes out of scope. class TempFile { public: explicit TempFile(std::filesystem::path path) : path_{std::move(path)} { - std::ifstream f{path_}; if (Exists()) { throw InvalidArgumentError{ "TempFile::TempFile", "path", diff --git a/tests/src/historical_tests.cpp b/tests/src/historical_tests.cpp index 19afef8..9ada439 100644 --- a/tests/src/historical_tests.cpp +++ b/tests/src/historical_tests.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -20,6 +21,7 @@ #include "databento/dbn_file_store.hpp" #include "databento/enums.hpp" #include "databento/exceptions.hpp" // Exception +#include "databento/file_stream.hpp" #include "databento/historical.hpp" #include "databento/log.hpp" #include "databento/metadata.hpp" @@ -223,14 +225,18 @@ TEST_F(HistoricalTests, TestBatchListFiles) { static const nlohmann::json kListFilesResp{ {{"filename", "test.dbn"}, - {"size", {}}, - {"hash", {}}, + // size of test_data.mbo.v3.dbn + {"size", 472}, + {"hash", + // $ sha256 test_data.mbo.v3.dbn + "sha256:d2a526d952f845ab8f03ceac42ec828a2541e0b5e2e8a64df2e8bbaa1e898184"}, {"urls", {{"https", "https://api.databento.com/v0/job_id/test.dbn"}, {"ftp", "ftp://ftp.databento.com/job_id/test.dbn"}}}}, {{"filename", "test_metadata.json"}, - {"size", {}}, - {"hash", {}}, + {"size", 15}, + {"hash", + "sha256:e43abcf3375244839c012f9633f95862d232a95b00d5bc7348b3098b9fed7f32"}, {"urls", {{"https", "https://api.databento.com/v0/job_id/test_metadata.json"}, {"ftp", "ftp://ftp.databento.com/job_id/test_metadata.json"}}}}}; @@ -241,14 +247,12 @@ TEST_F(HistoricalTests, TestBatchDownloadAll) { const TempFile temp_dbn_file{tmp_path_ / "job123/test.dbn"}; mock_server_.MockGetJson("/v0/batch.list_files", {{"job_id", kJobId}}, kListFilesResp); - mock_server_.MockGetDbn("/v0/job_id/test.dbn", {}, - TEST_DATA_DIR "/test_data.mbo.v3.dbn"); + mock_server_.MockGetDbnFile("/v0/job_id/test.dbn", + TEST_DATA_DIR "/test_data.mbo.v3.dbn"); mock_server_.MockGetJson("/v0/job_id/test_metadata.json", {{"key", "value"}}); const auto port = mock_server_.ListenOnThread(); databento::Historical target = Client(port); - ASSERT_FALSE(temp_metadata_file.Exists()); - ASSERT_FALSE(temp_dbn_file.Exists()); const std::vector paths = target.BatchDownload(tmp_path_, kJobId); EXPECT_TRUE(temp_metadata_file.Exists()); @@ -278,13 +282,38 @@ TEST_F(HistoricalTests, TestBatchDownloadSingle) { const auto port = mock_server_.ListenOnThread(); databento::Historical target = Client(port); - ASSERT_FALSE(temp_metadata_file.Exists()); const std::filesystem::path path = target.BatchDownload(tmp_path_, kJobId, "test_metadata.json"); EXPECT_TRUE(temp_metadata_file.Exists()); EXPECT_EQ(path.lexically_normal(), temp_metadata_file.Path().lexically_normal()); } +TEST_F(HistoricalTests, TestBatchDownloadResume) { + const auto kJobId = "job123"; + const TempFile temp_dbn_file{tmp_path_ / "job123/test.dbn"}; + const auto source_path = TEST_DATA_DIR "/test_data.mbo.v3.dbn"; + mock_server_.MockGetJson("/v0/batch.list_files", {{"job_id", kJobId}}, + kListFilesResp); + mock_server_.MockGetDbnFile("/v0/job_id/test.dbn", source_path); + // Copy some of the file + { + InFileStream source{source_path}; + OutFileStream partial_dbn_file{temp_dbn_file.Path()}; + std::array buf; + source.ReadExact(buf.data(), buf.size()); + partial_dbn_file.WriteAll(buf.data(), buf.size()); + } + ASSERT_EQ(std::filesystem::file_size(temp_dbn_file.Path()), 50); + const auto port = mock_server_.ListenOnThread(); + databento::Historical target = Client(port); + const std::filesystem::path path = + target.BatchDownload(tmp_path_, kJobId, "test.dbn"); + EXPECT_EQ(path.lexically_normal(), temp_dbn_file.Path().lexically_normal()); + // SHA verification will happen within `BatchDownload`, but check sizes as a sanity + // check + EXPECT_EQ(std::filesystem::file_size(path), std::filesystem::file_size(source_path)); +} + TEST_F(HistoricalTests, TestBatchDownloadSingleInvalidFile) { const auto kJobId = "654"; mock_server_.MockGetJson("/v0/batch.list_files", {{"job_id", kJobId}}, diff --git a/tests/src/mock_http_server.cpp b/tests/src/mock_http_server.cpp index 5947694..a9e44e9 100644 --- a/tests/src/mock_http_server.cpp +++ b/tests/src/mock_http_server.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include "databento/constants.hpp" @@ -75,18 +76,6 @@ void MockHttpServer::MockPostJson(const std::string& path, }); } -void MockHttpServer::MockGetDbn(const std::string& path, - const std::map& params, - const std::string& dbn_path) { - constexpr std::size_t kChunkSize = 32; - - // Read contents into buffer - auto buffer = EncodeToBuffer(dbn_path); - - // Serve - server_.Get(path, MakeDbnStreamHandler(params, std::move(buffer), kChunkSize)); -} - void MockHttpServer::MockPostDbn(const std::string& path, const std::map& params, const std::string& dbn_path) { @@ -134,6 +123,17 @@ void MockHttpServer::MockPostDbn(const std::string& path, server_.Post(path, MakeDbnStreamHandler(params, std::move(buffer), chunk_size)); } +void MockHttpServer::MockGetDbnFile(const std::string& path, + const std::string& dbn_path) { + server_.Get(path, [dbn_path](const httplib::Request& req, httplib::Response& resp) { + if (!req.has_header("Authorization")) { + resp.status = 401; + return; + } + resp.set_file_content(dbn_path, "application/octet-stream"); + }); +} + void MockHttpServer::CheckParams(const std::map& params, const httplib::Request& req) { for (const auto& param : params) { diff --git a/tests/src/sha256_hasher_tests.cpp b/tests/src/sha256_hasher_tests.cpp new file mode 100644 index 0000000..25a8b55 --- /dev/null +++ b/tests/src/sha256_hasher_tests.cpp @@ -0,0 +1,25 @@ +#include + +#include + +#include "databento/detail/sha256_hasher.hpp" + +namespace databento::detail::tests { +TEST(Sha256HasherTests, SanityCheck) { + ASSERT_EQ(Sha256Hash("123DBN\n"), + // obtained with `echo 123DBN | sha256sum` + "f15e88dd823646fb031d871850a352bf081d0686933dee4c5d9ff3c376f15ea7"); +} + +TEST(Sha256HasherTests, Equivalence) { + const auto one_shot = Sha256Hash("1234567890"); + Sha256Hasher hasher; + auto update = [&hasher](std::string_view s) { + hasher.Update(reinterpret_cast(s.data()), s.length()); + }; + update("123"); + update("4567"); + update("890"); + ASSERT_EQ(hasher.Finalize(), one_shot); +} +} // namespace databento::detail::tests