diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index b06d213..496d87c 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -19,7 +19,6 @@ jobs: extension_name: httpfs duckdb_version: v1.3-ossivalis ci_tools_version: main - exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads' duckdb-stable-deploy: @@ -32,4 +31,3 @@ jobs: duckdb_version: v1.3-ossivalis ci_tools_version: main deploy_latest: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }} - exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads' diff --git a/.github/workflows/MinioTests.yml b/.github/workflows/MinioTests.yml index dca4c52..467b5b3 100644 --- a/.github/workflows/MinioTests.yml +++ b/.github/workflows/MinioTests.yml @@ -10,7 +10,7 @@ defaults: jobs: minio-tests: name: Minio Tests - runs-on: ubuntu-24.04 + runs-on: ubuntu-latest env: S3_TEST_SERVER_AVAILABLE: 1 AWS_DEFAULT_REGION: eu-west-1 @@ -18,7 +18,9 @@ jobs: AWS_SECRET_ACCESS_KEY: minio_duckdb_user_password DUCKDB_S3_ENDPOINT: duckdb-minio.com:9000 DUCKDB_S3_USE_SSL: false + CORE_EXTENSIONS: 'parquet;json' GEN: ninja + VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake VCPKG_TARGET_TRIPLET: x64-linux steps: diff --git a/CMakeLists.txt b/CMakeLists.txt index 92d4547..133ced9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,16 +9,24 @@ add_extension_definitions() include_directories(extension/httpfs/include ${DUCKDB_MODULE_BASE_DIR}/third_party/httplib) +if (NOT EMSCRIPTEN) + set(EXTRA_SOURCES extension/httpfs/crypto.cpp extension/httpfs/httpfs_httplib_client.cpp extension/httpfs/httpfs_curl_client.cpp) + add_definitions(-DOVERRIDE_ENCRYPTION_UTILS=1) +else() + set(EXTRA_SOURCES extension/httpfs/httpfs_client_wasm.cpp) +endif() + build_static_extension( httpfs extension/httpfs/hffs.cpp extension/httpfs/s3fs.cpp extension/httpfs/httpfs.cpp - extension/httpfs/httpfs_client.cpp extension/httpfs/http_state.cpp extension/httpfs/crypto.cpp + extension/httpfs/hash_functions.cpp extension/httpfs/create_secret_functions.cpp - extension/httpfs/httpfs_extension.cpp) + extension/httpfs/httpfs_extension.cpp + ${EXTRA_SOURCES} ) set(PARAMETERS "-warnings") build_loadable_extension( @@ -27,18 +35,21 @@ build_loadable_extension( extension/httpfs/hffs.cpp extension/httpfs/s3fs.cpp extension/httpfs/httpfs.cpp - extension/httpfs/httpfs_client.cpp extension/httpfs/http_state.cpp extension/httpfs/crypto.cpp + extension/httpfs/hash_functions.cpp extension/httpfs/create_secret_functions.cpp - extension/httpfs/httpfs_extension.cpp) + extension/httpfs/httpfs_extension.cpp + ${EXTRA_SOURCES} ) if(MINGW) set(OPENSSL_USE_STATIC_LIBS TRUE) endif() find_package(OpenSSL REQUIRED) +find_package(CURL REQUIRED) include_directories(${OPENSSL_INCLUDE_DIR}) +include_directories(${CURL_INCLUDE_DIRS}) if(EMSCRIPTEN) else() @@ -46,6 +57,11 @@ else() ${OPENSSL_LIBRARIES}) target_link_libraries(httpfs_extension duckdb_mbedtls ${OPENSSL_LIBRARIES}) + # Link dependencies into extension + target_link_libraries(httpfs_loadable_extension ${CURL_LIBRARIES}) + target_link_libraries(httpfs_extension ${CURL_LIBRARIES}) + + if(MINGW) find_package(ZLIB) target_link_libraries(httpfs_loadable_extension ZLIB::ZLIB -lcrypt32) @@ -53,6 +69,7 @@ else() endif() endif() + install( TARGETS httpfs_extension EXPORT "${DUCKDB_EXPORT_SET}" diff --git a/extension/httpfs/crypto.cpp b/extension/httpfs/crypto.cpp index 04bd795..1752ac6 100644 --- a/extension/httpfs/crypto.cpp +++ b/extension/httpfs/crypto.cpp @@ -1,35 +1,23 @@ #include "crypto.hpp" +#include "hash_functions.hpp" #include "mbedtls_wrapper.hpp" #include #include "duckdb/common/common.hpp" #include #define CPPHTTPLIB_OPENSSL_SUPPORT -#include "httplib.hpp" -namespace duckdb { +#include +#include +#include +#include +#include -void sha256(const char *in, size_t in_len, hash_bytes &out) { - duckdb_mbedtls::MbedTlsWrapper::ComputeSha256Hash(in, in_len, (char *)out); -} +#if defined(_WIN32) && defined(OPENSSL_USE_APPLINK) +#include +#endif -void hmac256(const std::string &message, const char *secret, size_t secret_len, hash_bytes &out) { - duckdb_mbedtls::MbedTlsWrapper::Hmac256(secret, secret_len, message.data(), message.size(), (char *)out); -} - -void hmac256(std::string message, hash_bytes secret, hash_bytes &out) { - hmac256(message, (char *)secret, sizeof(hash_bytes), out); -} - -void hex256(hash_bytes &in, hash_str &out) { - const char *hex = "0123456789abcdef"; - unsigned char *pin = in; - unsigned char *pout = out; - for (; pin < in + sizeof(in); pout += 2, pin++) { - pout[0] = hex[(*pin >> 4) & 0xF]; - pout[1] = hex[*pin & 0xF]; - } -} +namespace duckdb { AESStateSSL::AESStateSSL(const std::string *key) : context(EVP_CIPHER_CTX_new()) { if (!(context)) { diff --git a/extension/httpfs/hash_functions.cpp b/extension/httpfs/hash_functions.cpp new file mode 100644 index 0000000..1e6bb8f --- /dev/null +++ b/extension/httpfs/hash_functions.cpp @@ -0,0 +1,28 @@ +#include "mbedtls_wrapper.hpp" +#include "hash_functions.hpp" + +namespace duckdb { + +void sha256(const char *in, size_t in_len, hash_bytes &out) { + duckdb_mbedtls::MbedTlsWrapper::ComputeSha256Hash(in, in_len, (char *)out); +} + +void hmac256(const std::string &message, const char *secret, size_t secret_len, hash_bytes &out) { + duckdb_mbedtls::MbedTlsWrapper::Hmac256(secret, secret_len, message.data(), message.size(), (char *)out); +} + +void hmac256(std::string message, hash_bytes secret, hash_bytes &out) { + hmac256(message, (char *)secret, sizeof(hash_bytes), out); +} + +void hex256(hash_bytes &in, hash_str &out) { + const char *hex = "0123456789abcdef"; + unsigned char *pin = in; + unsigned char *pout = out; + for (; pin < in + sizeof(in); pout += 2, pin++) { + pout[0] = hex[(*pin >> 4) & 0xF]; + pout[1] = hex[*pin & 0xF]; + } +} + +} // namespace duckdb diff --git a/extension/httpfs/httpfs.cpp b/extension/httpfs/httpfs.cpp index 56c8b8e..20f85a6 100644 --- a/extension/httpfs/httpfs.cpp +++ b/extension/httpfs/httpfs.cpp @@ -727,4 +727,9 @@ void HTTPFileHandle::StoreClient(unique_ptr client) { HTTPFileHandle::~HTTPFileHandle() { DUCKDB_LOG_FILE_SYSTEM_CLOSE((*this)); }; + +string HTTPFSUtil::GetName() const { + return "HTTPFS"; +} + } // namespace duckdb diff --git a/extension/httpfs/httpfs_client_wasm.cpp b/extension/httpfs/httpfs_client_wasm.cpp new file mode 100644 index 0000000..aaa22bb --- /dev/null +++ b/extension/httpfs/httpfs_client_wasm.cpp @@ -0,0 +1,16 @@ +#include "httpfs_client.hpp" +#include "http_state.hpp" + +namespace duckdb { + +unique_ptr HTTPFSUtil::InitializeClient(HTTPParams &http_params, const string &proto_host_port) { + throw InternalException("HTTPFSUtil::InitializeClient is not expected to be called"); +} + +unordered_map HTTPFSUtil::ParseGetParameters(const string &text) { + unordered_map result; + //TODO: HTTPFSUtil::ParseGetParameters is currently not implemented + return result; +} + +} // namespace duckdb diff --git a/extension/httpfs/httpfs_curl_client.cpp b/extension/httpfs/httpfs_curl_client.cpp new file mode 100644 index 0000000..d648269 --- /dev/null +++ b/extension/httpfs/httpfs_curl_client.cpp @@ -0,0 +1,489 @@ +#include "httpfs_client.hpp" +#include "http_state.hpp" + +#define CPPHTTPLIB_OPENSSL_SUPPORT + +#include +#include +#include "duckdb/common/exception/http_exception.hpp" + +namespace duckdb { + +// we statically compile in libcurl, which means the cert file location of the build machine is the +// place curl will look. But not every distro has this file in the same location, so we search a +// number of common locations and use the first one we find. +static std::string certFileLocations[] = { + // Arch, Debian-based, Gentoo + "/etc/ssl/certs/ca-certificates.crt", + // RedHat 7 based + "/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem", + // Redhat 6 based + "/etc/pki/tls/certs/ca-bundle.crt", + // OpenSUSE + "/etc/ssl/ca-bundle.pem", + // Alpine + "/etc/ssl/cert.pem"}; + + +//! Grab the first path that exists, from a list of well-known locations +static std::string SelectCURLCertPath() { + for (std::string &caFile : certFileLocations) { + struct stat buf; + if (stat(caFile.c_str(), &buf) == 0) { + return caFile; + } + } + return std::string(); +} + +static std::string cert_path = SelectCURLCertPath(); + +static size_t RequestWriteCallback(void *contents, size_t size, size_t nmemb, void *userp) { + size_t totalSize = size * nmemb; + std::string* str = static_cast(userp); + str->append(static_cast(contents), totalSize); + return totalSize; +} + +static size_t RequestHeaderCallback(void *contents, size_t size, size_t nmemb, void *userp) { + size_t totalSize = size * nmemb; + std::string header(static_cast(contents), totalSize); + HeaderCollector* header_collection = static_cast(userp); + + // Trim trailing \r\n + if (!header.empty() && header.back() == '\n') { + header.pop_back(); + if (!header.empty() && header.back() == '\r') { + header.pop_back(); + } + } + + // If header starts with HTTP/... curl has followed a redirect and we have a new Header, + // so we push back a new header_collection and store headers from the redirect there. + if (header.rfind("HTTP/", 0) == 0) { + header_collection->header_collection.push_back(HTTPHeaders()); + header_collection->header_collection.back().Insert("__RESPONSE_STATUS__", header); + } + + size_t colonPos = header.find(':'); + + if (colonPos != std::string::npos) { + // Split the string into two parts + std::string part1 = header.substr(0, colonPos); + std::string part2 = header.substr(colonPos + 1); + if (part2.at(0) == ' ') { + part2.erase(0, 1); + } + + header_collection->header_collection.back().Insert(part1, part2); + } + // TODO: some headers may not follow standard response header formats. + // what to do in this case? Invalid does not mean we should abort. + + return totalSize; +} + + CURLHandle::CURLHandle(const string &token, const string &cert_path) { + curl = curl_easy_init(); + if (!curl) { + throw InternalException("Failed to initialize curl"); + } + if (!token.empty()) { + curl_easy_setopt(curl, CURLOPT_XOAUTH2_BEARER, token.c_str()); + curl_easy_setopt(curl, CURLOPT_HTTPAUTH, CURLAUTH_BEARER); + } + if (!cert_path.empty()) { + curl_easy_setopt(curl, CURLOPT_CAINFO, cert_path.c_str()); + } +} + +CURLHandle::~CURLHandle() { + curl_easy_cleanup(curl); +} + + +struct RequestInfo { + string url = ""; + string body = ""; + uint16_t response_code = 0; + std::vector header_collection; +}; + + +static idx_t httpfs_client_count = 0; + +class HTTPFSCurlClient : public HTTPClient { +public: + HTTPFSCurlClient(HTTPFSParams &http_params, const string &proto_host_port) { + auto bearer_token = ""; + if (!http_params.bearer_token.empty()) { + bearer_token = http_params.bearer_token.c_str(); + } + state = http_params.state; + + // call curl_global_init if not already done by another HTTPFS Client + InitCurlGlobal(); + + curl = make_uniq(bearer_token, SelectCURLCertPath()); + request_info = make_uniq(); + + // set curl options + // follow redirects + curl_easy_setopt(*curl, CURLOPT_FOLLOWLOCATION, 1L); + + // Curl re-uses connections by default + if (!http_params.keep_alive) { + curl_easy_setopt(*curl, CURLOPT_FORBID_REUSE, 1L); + } + + // client->enable_server_certificate_verification(http_params.enable_server_cert_verification); + if (http_params.enable_server_cert_verification) { + curl_easy_setopt(*curl, CURLOPT_SSL_VERIFYPEER, 1L); // Verify the cert + curl_easy_setopt(*curl, CURLOPT_SSL_VERIFYHOST, 2L); // Verify that the cert matches the hostname + } + + // TODO: no global write timeout option, but you could put customize a timeout in the write functions + // or handle use CURLOPT_XFERINFOFUNCTION (progress callback) with CURLOPT_TIMEOUT_MS + // we could also set CURLOPT_LOW_SPEED_LIMIT and timeout if the speed is too low for + // too long. + + // set read timeout + curl_easy_setopt(*curl, CURLOPT_TIMEOUT, http_params.timeout); + // set connection timeout + curl_easy_setopt(*curl, CURLOPT_CONNECTTIMEOUT, http_params.timeout); + // accept content as-is (i.e no decompressing) + curl_easy_setopt(*curl, CURLOPT_ACCEPT_ENCODING, "identity"); + // follow redirects + curl_easy_setopt(*curl, CURLOPT_FOLLOWLOCATION, 1L); + + // define the header callback + curl_easy_setopt(*curl, CURLOPT_HEADERFUNCTION, RequestHeaderCallback); + curl_easy_setopt(*curl, CURLOPT_HEADERDATA, &request_info->header_collection); + // define the write data callback (for get requests) + curl_easy_setopt(*curl, CURLOPT_WRITEFUNCTION, RequestWriteCallback); + curl_easy_setopt(*curl, CURLOPT_WRITEDATA, &request_info->body); + + if (!http_params.http_proxy.empty()) { + curl_easy_setopt(*curl, CURLOPT_PROXY, StringUtil::Format("%s:%s", http_params.http_proxy, http_params.http_proxy_port).c_str()); + + if (!http_params.http_proxy_username.empty()) { + curl_easy_setopt(*curl, CURLOPT_PROXYUSERNAME, http_params.http_proxy_username.c_str()); + curl_easy_setopt(*curl, CURLOPT_PROXYPASSWORD, http_params.http_proxy_password.c_str()); + } + } + } + + ~HTTPFSCurlClient() { + DestroyCurlGlobal(); + } + + unique_ptr Get(GetRequestInfo &info) override { + if (state) { + state->get_count++; + } + + auto curl_headers = TransformHeadersCurl(info.headers); + request_info->url = info.url; + if (!info.params.extra_headers.empty()) { + auto curl_params = TransformParamsCurl(info.params); + request_info->url += "?" + curl_params; + } + + CURLcode res; + { + // If the same handle served a HEAD request, we must set NOBODY back to 0L to request content again + curl_easy_setopt(*curl, CURLOPT_NOBODY, 0L); + curl_easy_setopt(*curl, CURLOPT_URL, request_info->url.c_str()); + curl_easy_setopt(*curl, CURLOPT_HTTPHEADER, curl_headers ? curl_headers.headers : nullptr); + res = curl->Execute(); + } + + curl_easy_getinfo(*curl, CURLINFO_RESPONSE_CODE, &request_info->response_code); + + idx_t bytes_received = 0; + if (!request_info->header_collection.empty() && request_info->header_collection.back().HasHeader("content-length")) { + bytes_received = std::stoi(request_info->header_collection.back().GetHeaderValue("content-length")); + D_ASSERT(bytes_received == request_info->body.size()); + } else { + bytes_received = request_info->body.size(); + } + if (state) { + state->total_bytes_received += bytes_received; + } + + const char* data = request_info->body.c_str(); + info.content_handler(const_data_ptr_cast(data), bytes_received); + return TransformResponseCurl(res); + } + + unique_ptr Put(PutRequestInfo &info) override { + if (state) { + state->put_count++; + state->total_bytes_sent += info.buffer_in_len; + } + + auto curl_headers = TransformHeadersCurl(info.headers); + // Add content type header from info + curl_headers.Add("Content-Type: " + info.content_type); + // transform parameters + request_info->url = info.url; + if (!info.params.extra_headers.empty()) { + auto curl_params = TransformParamsCurl(info.params); + request_info->url += "?" + curl_params; + } + + CURLcode res; + { + curl_easy_setopt(*curl, CURLOPT_URL, request_info->url.c_str()); + // Perform PUT + curl_easy_setopt(*curl, CURLOPT_CUSTOMREQUEST, "PUT"); + // Include PUT body + curl_easy_setopt(*curl, CURLOPT_POSTFIELDS, const_char_ptr_cast(info.buffer_in)); + curl_easy_setopt(*curl, CURLOPT_POSTFIELDSIZE, info.buffer_in_len); + + // Apply headers + curl_easy_setopt(*curl, CURLOPT_HTTPHEADER, curl_headers ? curl_headers.headers : nullptr); + + res = curl->Execute(); + } + + curl_easy_getinfo(*curl, CURLINFO_RESPONSE_CODE, &request_info->response_code); + + return TransformResponseCurl(res); + } + + unique_ptr Head(HeadRequestInfo &info) override { + if (state) { + state->head_count++; + } + + auto curl_headers = TransformHeadersCurl(info.headers); + request_info->url = info.url; + // transform parameters + if (!info.params.extra_headers.empty()) { + auto curl_params = TransformParamsCurl(info.params); + request_info->url += "?" + curl_params; + } + + CURLcode res; + { + // Set URL + curl_easy_setopt(*curl, CURLOPT_URL, request_info->url.c_str()); + + // Perform HEAD request instead of GET + curl_easy_setopt(*curl, CURLOPT_NOBODY, 1L); + curl_easy_setopt(*curl, CURLOPT_HTTPGET, 0L); + + // Add headers if any + curl_easy_setopt(*curl, CURLOPT_HTTPHEADER, curl_headers ? curl_headers.headers : nullptr); + + // Execute HEAD request + res = curl->Execute(); + } + + curl_easy_getinfo(*curl, CURLINFO_RESPONSE_CODE, &request_info->response_code); + return TransformResponseCurl(res); + } + + unique_ptr Delete(DeleteRequestInfo &info) override { + if (state) { + state->delete_count++; + } + + auto curl_headers = TransformHeadersCurl(info.headers); + // transform parameters + request_info->url = info.url; + if (!info.params.extra_headers.empty()) { + auto curl_params = TransformParamsCurl(info.params); + request_info->url += "?" + curl_params; + } + + CURLcode res; + { + // Set URL + curl_easy_setopt(*curl, CURLOPT_URL, request_info->url.c_str()); + + // Set DELETE request method + curl_easy_setopt(*curl, CURLOPT_CUSTOMREQUEST, "DELETE"); + + // Follow redirects + curl_easy_setopt(*curl, CURLOPT_FOLLOWLOCATION, 1L); + + // Add headers if any + curl_easy_setopt(*curl, CURLOPT_HTTPHEADER, curl_headers ? curl_headers.headers : nullptr); + + // Execute DELETE request + res = curl->Execute(); + } + + // Get HTTP response status code + curl_easy_getinfo(*curl, CURLINFO_RESPONSE_CODE, &request_info->response_code); + return TransformResponseCurl( res); + } + + unique_ptr Post(PostRequestInfo &info) override { + if (state) { + state->post_count++; + state->total_bytes_sent += info.buffer_in_len; + } + + auto curl_headers = TransformHeadersCurl(info.headers); + const string content_type = "Content-Type: application/octet-stream"; + curl_headers.Add(content_type.c_str()); + // transform parameters + request_info->url = info.url; + if (!info.params.extra_headers.empty()) { + auto curl_params = TransformParamsCurl(info.params); + request_info->url += "?" + curl_params; + } + + CURLcode res; + { + curl_easy_setopt(*curl, CURLOPT_URL, request_info->url.c_str()); + curl_easy_setopt(*curl, CURLOPT_POST, 1L); + + // Set POST body + curl_easy_setopt(*curl, CURLOPT_POSTFIELDS, const_char_ptr_cast(info.buffer_in)); + curl_easy_setopt(*curl, CURLOPT_POSTFIELDSIZE, info.buffer_in_len); + + // Add headers if any + curl_easy_setopt(*curl, CURLOPT_HTTPHEADER, curl_headers ? curl_headers.headers : nullptr); + + // Execute POST request + res = curl->Execute(); + } + + curl_easy_getinfo(*curl, CURLINFO_RESPONSE_CODE, &request_info->response_code); + info.buffer_out = request_info->body; + // Construct HTTPResponse + return TransformResponseCurl( res); + } + +private: + CURLRequestHeaders TransformHeadersCurl(const HTTPHeaders &header_map) { + std::vector headers; + for (auto &entry : header_map) { + const std::string new_header = entry.first + ": " + entry.second; + headers.push_back(new_header); + } + CURLRequestHeaders curl_headers; + for (auto &header : headers) { + curl_headers.Add(header); + } + return curl_headers; + } + + string TransformParamsCurl(const HTTPParams ¶ms) { + string result = ""; + unordered_map escaped_params; + bool first_param = true; + for (auto &entry : params.extra_headers) { + const string key = entry.first; + const string value = curl_easy_escape(*curl, entry.second.c_str(), 0); + if (!first_param) { + result += "&"; + } + result += key + "=" + value; + first_param = false; + } + return result; + } + + void ResetRequestInfo() { + // clear headers after transform + request_info->header_collection.clear(); + // reset request info. + request_info->body = ""; + request_info->url = ""; + request_info->response_code = 0; + } + + unique_ptr TransformResponseCurl(CURLcode res) { + auto status_code = HTTPStatusCode(request_info->response_code); + auto response = make_uniq(status_code); + if (res != CURLcode::CURLE_OK) { + // TODO: request error can come from HTTPS Status code toString() value. + if (!request_info->header_collection.empty() && request_info->header_collection.back().HasHeader("__RESPONSE_STATUS__")) { + response->request_error = request_info->header_collection.back().GetHeaderValue("__RESPONSE_STATUS__"); + } else { + response->request_error = curl_easy_strerror(res); + } + return response; + } + response->body = request_info->body; + response->url= request_info->url; + if (!request_info->header_collection.empty()) { + for (auto &header : request_info->header_collection.back()) { + response->headers.Insert(header.first, header.second); + } + } + ResetRequestInfo(); + return response; + } + +private: + unique_ptr curl; + optional_ptr state; + unique_ptr request_info; + + static std::mutex &GetRefLock() { + static std::mutex mtx; + return mtx; + } + + static void InitCurlGlobal() { + GetRefLock(); + if (httpfs_client_count == 0) { + curl_global_init(CURL_GLOBAL_DEFAULT); + } + ++httpfs_client_count; + } + + static void DestroyCurlGlobal() { + // TODO: when to call curl_global_cleanup() + // calling it on client destruction causes SSL errors when verification is on (due to many requests). + // GetRefLock(); + // if (httpfs_client_count == 0) { + // throw InternalException("Destroying Httpfs client that did not initialize CURL"); + // } + // --httpfs_client_count; + // if (httpfs_client_count == 0) { + // curl_global_cleanup(); + // } + } +}; + +unique_ptr HTTPFSCurlUtil::InitializeClient(HTTPParams &http_params, const string &proto_host_port) { + auto client = make_uniq(http_params.Cast(), proto_host_port); + return std::move(client); +} + +unordered_map HTTPFSCurlUtil::ParseGetParameters(const string &text) { + unordered_map params; + + auto pos = text.find('?'); + if (pos == std::string::npos) return params; + + std::string query = text.substr(pos + 1); + std::stringstream ss(query); + std::string item; + + while (std::getline(ss, item, '&')) { + auto eq_pos = item.find('='); + if (eq_pos != std::string::npos) { + std::string key = item.substr(0, eq_pos); + std::string value = StringUtil::URLDecode(item.substr(eq_pos + 1)); + params[key] = value; + } else { + params[item] = ""; // key with no value + } + } + + return params; +} + +string HTTPFSCurlUtil::GetName() const { + return "HTTPFS-Curl"; +} + +} // namespace duckdb diff --git a/extension/httpfs/httpfs_extension.cpp b/extension/httpfs/httpfs_extension.cpp index c9bc985..2ec2ce6 100644 --- a/extension/httpfs/httpfs_extension.cpp +++ b/extension/httpfs/httpfs_extension.cpp @@ -2,11 +2,14 @@ #include "httpfs_extension.hpp" +#include "httpfs_client.hpp" #include "create_secret_functions.hpp" #include "duckdb.hpp" #include "s3fs.hpp" #include "hffs.hpp" +#ifdef OVERRIDE_ENCRYPTION_UTILS #include "crypto.hpp" +#endif // OVERRIDE_ENCRYPTION_UTILS namespace duckdb { @@ -61,7 +64,39 @@ static void LoadInternal(DatabaseInstance &instance) { // HuggingFace options config.AddExtensionOption("hf_max_per_page", "Debug option to limit number of items returned in list requests", LogicalType::UBIGINT, Value::UBIGINT(0)); - config.http_util = make_shared_ptr(); + + auto callback_httpfs_client_implementation = [](ClientContext &context, SetScope scope, Value ¶meter) { + auto &config = DBConfig::GetConfig(context); + string value = StringValue::Get(parameter); + if (config.http_util && config.http_util->GetName() == "WasmHTTPUtils") { + if (value == "wasm" || value == "default") { + return; + } + throw InvalidInputException("Unsupported option for httpfs_client_implementation, only `wasm` and " + "`default` are currently supported for duckdb-wasm"); + } + if (value == "curl") { + if (!config.http_util || config.http_util->GetName() != "HTTPFSUtil-Curl") { + config.http_util = make_shared_ptr(); + } + return; + } else if (value == "httplib" || value == "default") { + if (!config.http_util || config.http_util->GetName() != "HTTPFSUtil") { + config.http_util = make_shared_ptr(); + } + return; + } + throw InvalidInputException("Unsupported option for httpfs_client_implementation, only `curl`, `httplib` and " + "`default` are currently supported"); + }; + config.AddExtensionOption("httpfs_client_implementation", "Select which is the HTTPUtil implementation to be used", + LogicalType::VARCHAR, "default", callback_httpfs_client_implementation); + + if (config.http_util && config.http_util->GetName() == "WasmHTTPUtils") { + // Already handled, do not override + } else { + config.http_util = make_shared_ptr(); + } auto provider = make_uniq(config); provider->SetAll(); @@ -69,8 +104,10 @@ static void LoadInternal(DatabaseInstance &instance) { CreateS3SecretFunctions::Register(instance); CreateBearerTokenFunctions::Register(instance); +#ifdef OVERRIDE_ENCRYPTION_UTILS // set pointer to OpenSSL encryption state config.encryption_util = make_shared_ptr(); +#endif // OVERRIDE_ENCRYPTION_UTILS } void HttpfsExtension::Load(DuckDB &db) { LoadInternal(*db.instance); diff --git a/extension/httpfs/httpfs_client.cpp b/extension/httpfs/httpfs_httplib_client.cpp similarity index 98% rename from extension/httpfs/httpfs_client.cpp rename to extension/httpfs/httpfs_httplib_client.cpp index 84eb457..3bf5a64 100644 --- a/extension/httpfs/httpfs_client.cpp +++ b/extension/httpfs/httpfs_httplib_client.cpp @@ -160,8 +160,4 @@ unordered_map HTTPFSUtil::ParseGetParameters(const string &text) return result; } -string HTTPFSUtil::GetName() const { - return "HTTPFS"; -} - } // namespace duckdb diff --git a/extension/httpfs/include/hash_functions.hpp b/extension/httpfs/include/hash_functions.hpp new file mode 100644 index 0000000..bfefe79 --- /dev/null +++ b/extension/httpfs/include/hash_functions.hpp @@ -0,0 +1,18 @@ +#pragma once + +#include "duckdb/common/helper.hpp" + +namespace duckdb { + +typedef unsigned char hash_bytes[32]; +typedef unsigned char hash_str[64]; + +void sha256(const char *in, size_t in_len, hash_bytes &out); + +void hmac256(const std::string &message, const char *secret, size_t secret_len, hash_bytes &out); + +void hmac256(std::string message, hash_bytes secret, hash_bytes &out); + +void hex256(hash_bytes &in, hash_str &out); + +} // namespace duckdb diff --git a/extension/httpfs/include/httpfs_client.hpp b/extension/httpfs/include/httpfs_client.hpp index 1d7620c..d540ce8 100644 --- a/extension/httpfs/include/httpfs_client.hpp +++ b/extension/httpfs/include/httpfs_client.hpp @@ -1,6 +1,7 @@ #pragma once #include "duckdb/common/http_util.hpp" +#include namespace duckdb { class HTTPLogger; @@ -36,4 +37,63 @@ class HTTPFSUtil : public HTTPUtil { string GetName() const override; }; +class HTTPFSCurlUtil : public HTTPFSUtil { +public: + unique_ptr InitializeClient(HTTPParams &http_params, const string &proto_host_port) override; + + static unordered_map ParseGetParameters(const string &text); + + string GetName() const override; +}; + +class CURLHandle { +public: + CURLHandle(const string &token, const string &cert_path); + ~CURLHandle(); + +public: + operator CURL *() { + return curl; + } + CURLcode Execute() { + return curl_easy_perform(curl); + } + +private: + CURL *curl = NULL; +}; + +class CURLRequestHeaders { +public: + CURLRequestHeaders(vector &input) { + for (auto &header : input) { + Add(header); + } + } + CURLRequestHeaders() {} + + ~CURLRequestHeaders() { + if (headers) { + curl_slist_free_all(headers); + } + headers = NULL; + } + operator bool() const { + return headers != NULL; + } + +public: + void Add(const string &header) { + headers = curl_slist_append(headers, header.c_str()); + } + +public: + curl_slist *headers = NULL; +}; + +struct HeaderCollector { + std::vector header_collection; +}; + + } // namespace duckdb diff --git a/extension_config.cmake b/extension_config.cmake index 5881043..2664bc2 100644 --- a/extension_config.cmake +++ b/extension_config.cmake @@ -15,4 +15,5 @@ duckdb_extension_load(httpfs SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR} INCLUDE_DIR ${CMAKE_CURRENT_LIST_DIR}/extension/httpfs/include ${LOAD_HTTPFS_TESTS} + LINKED_LIBS "../../third_party/mbedtls/libduckdb_mbedtls.a" ) diff --git a/test/sql/copy/csv/test_csv_httpfs.test b/test/sql/copy/csv/test_csv_httpfs.test index 869a0b6..d62683e 100644 --- a/test/sql/copy/csv/test_csv_httpfs.test +++ b/test/sql/copy/csv/test_csv_httpfs.test @@ -4,99 +4,43 @@ require httpfs +require parquet + statement ok PRAGMA enable_verification +statement ok +pragma enable_logging('HTTP'); -#FIXME this test fails: file is nonexistent -mode skip - -query IIIIII rowsort -SELECT * from read_csv_auto('https://www.data.gouv.fr/fr/datasets/r/6d186965-f41b-41f3-9b23-88241cc6890c'); ----- -2020 Allemagne Germany 26.1 53196.069 200601.2 -2020 Autriche Austria 18.0 4723.5 26215.8 -2020 Belgique Belgium 28.999999999999996 9436.1 32553.0 -2020 Bulgarie Bulgaria 11.600000000000001 1124.1 9698.7 -2020 Chypre Cyprus 0.0 0.0 1627.6 -2020 Croatie Croatia 16.3 1094.8 6726.3 -2020 Danemark Denmark 11.600000000000001 1579.0 13601.4 -2020 Espagne Spain 17.4 14211.7 81512.9 -2020 Estonie Estonia 8.5 241.1 2827.3 -2020 Finlande Finland 2.8000000000000003 692.3 24674.4 -2020 France France 20.3 28278.9 139375.8 -2020 Grèce Greece 5.800000000000001 896.5 15401.9 -2020 Hongrie Hungary 30.5 5486.7 17872.4 -2020 Irlande Ireland 17.4 1968.477 11296.601 -2020 Italie Italy 29.2 33042.585 113119.475 -2020 Lettonie Latvia 8.200000000000001 323.605 3926.131 -2020 Lituanie Lithuania 10.7 584.104 5457.728 -2020 Luxembourg Luxembourg 16.5 623.165 3786.785 -2020 Malte Malta 0.0 0.0 547.5 -2020 Pays-Bas Netherlands 37.1 16588.314 44682.656 -2020 Pologne Poland 13.5 9323.205 69135.018 -2020 Portugal Portugal 11.1 1814.878 16354.725 -2020 Roumanie Romania 23.7 5626.161 23712.653 -2020 Royaume-Uni United Kingdom 32.4 39311.416 121414.483 -2020 République tchèque Czech Republic 21.4 5187.282 24263.896 -2020 Slovaquie Slovakia 25.0 2564.876 10248.401 -2020 Slovénie Slovenia 12.1 590.243 4861.315 -2020 Suède Sweden 1.5 475.195 31311.413 -2020 UE 28 Europe 28 22.5 238152.4 1056907.5 -2021 Allemagne Germany 26.760345686044435 51812.567 193616.957 -2021 Autriche Austria 18.720006775926056 4645.795 24817.272 -2021 Belgique Belgium 29.279402721103864 9088.083 31039.168 -2021 Bulgarie Bulgaria 12.368015142641884 1176.537 9512.739 -2021 Chypre Cyprus 0.0 0.0 1528.558 -2021 Croatie Croatia 17.10389029082304 1100.12 6431.987 -2021 Danemark Denmark 11.485631727184947 1508.152 13130.771 -2021 Espagne Spain 19.10173955663722 13815.0 72323.256 -2021 Estonie Estonia 8.988278645659518 245.094 2726.818 -2021 Finlande Finland 2.9937725178230212 694.288 23191.074 -2021 France France 20.649030024470434 26465.646 128168.955 -2021 Grèce Greece 7.580480506088059 1097.87 14482.855 -2021 Hongrie Hungary 32.344729318831554 5693.164 17601.52 -2021 Irlande Ireland 18.020604987495144 1953.468 10840.191 -2021 Italie Italy 30.86368769746751 31807.236 103057.147 -2021 Lettonie Latvia 8.502139837843602 322.927 3798.185 -2021 Lituanie Lithuania 11.029023816606903 582.797 5284.212 -2021 Luxembourg Luxembourg 17.282784281000467 564.365 3265.475 -2021 Malte Malta 0.0 0.0 499.875 -2021 Pays-Bas Netherlands 37.61392206122467 15896.316 42261.788 -2021 Pologne Poland 13.146720200313602 9235.656 70250.647 -2021 Portugal Portugal 11.437926753365227 1740.3 15215.17 -2021 Roumanie Romania 24.909638477223016 5846.885 23472.38 -2021 République tchèque Czech Republic 21.716683280446812 5158.445 23753.374 -2021 Slovaquie Slovakia 25.253930010417324 2427.134 9610.916 -2021 Slovénie Slovenia 13.141683407321874 582.024 4428.839 -2021 Suède Sweden 1.497679952802663 471.085 31454.317 -2021 UE 27 UE 27 21.894190365821018 193930.95399999994 885764.4460000001 - -query IIIIII rowsort res -SELECT * from read_csv('https://www.data.gouv.fr/fr/datasets/r/6d186965-f41b-41f3-9b23-88241cc6890c',DELIM=';',Columns={'annee_de_reference':'VARCHAR','pays':'VARCHAR','label_en':'VARCHAR','part_du_gaz_naturel_dans_la_consommation_finale_d_energie0':'VARCHAR','consommation_finale_de_gaz_naturel_mtep':'VARCHAR','consommation_finale_d_energie_totale_mtep':'VARCHAR'}); - - -query IIIIII rowsort res -SELECT * from read_csv('https://www.data.gouv.fr/fr/datasets/r/6d186965-f41b-41f3-9b23-88241cc6890c',DELIM=';',Columns={'annee_de_reference':'VARCHAR','pays':'VARCHAR','label_en':'VARCHAR','part_du_gaz_naturel_dans_la_consommation_finale_d_energie0':'VARCHAR','consommation_finale_de_gaz_naturel_mtep':'VARCHAR','consommation_finale_d_energie_totale_mtep':'VARCHAR'}); - - -# Give it a try to a request that returns length 0 -query I -SELECT count(*) from read_csv_auto('https://query1.finance.yahoo.com/v7/finance/download/^GSPC?period1=1512086400&period2=1670630400&interval=1d&events=history') ----- -1265 - -# Give it a try to a request that returns length 0 -query I -SELECT count(*) from read_csv_auto('https://query1.finance.yahoo.com/v7/finance/download/^GSPC?period1=1512086400&period2=1670630400&interval=1d&events=history') +query II +select * from 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/job_role_type.parquet' order by all; ---- -1265 +1 actor +2 actress +3 producer +4 writer +5 cinematographer +6 composer +7 costume designer +8 director +9 editor +10 miscellaneous crew +11 production designer +12 guest -# Give it a try to a request that returns length 0 -query I -SELECT count(*) from read_csv_auto('https://query1.finance.yahoo.com/v7/finance/download/^GSPC?period1=1512086400&period2=1670630400&interval=1d&events=history') +query IIIIIIIIIIIIIIIIII +select * from 'https://github.com/duckdb/duckdb/raw/9cf66f950dde0173e1a863a7659b3ecf11bf3978/data/csv/customer.csv'; ---- -1265 +1 AAAAAAAABAAAAAAA 980124 7135 32946 2452238 2452208 Mr. Javier Lewis Y 9 12 1936 CHILE NULL Javier.Lewis@VFAxlnZEvOx.org 2452508 +2 AAAAAAAACAAAAAAA 819667 1461 31655 2452318 2452288 Dr. Amy Moses Y 9 4 1966 TOGO NULL Amy.Moses@Ovk9KjHH.com 2452318 +3 AAAAAAAADAAAAAAA 1473522 6247 48572 2449130 2449100 Miss Latisha Hamilton Y 18 9 1979 NIUE NULL Latisha.Hamilton@V.com 2452313 +4 AAAAAAAAEAAAAAAA 1703214 3986 39558 2450030 2450000 Dr. Michael White Y 7 6 1983 MEXICO NULL Michael.White@i.org 2452361 +5 AAAAAAAAFAAAAAAA 953372 4470 36368 2449438 2449408 Sir Robert Moran N 8 5 1956 FIJI NULL Robert.Moran@Hh.edu 2452469 +6 AAAAAAAAGAAAAAAA 213219 6374 27082 2451883 2451853 Ms. Brunilda Sharp Y 4 12 1925 SURINAME NULL Brunilda.Sharp@T3pylZEUQjm.org 2452430 +7 AAAAAAAAHAAAAAAA 68377 3219 44814 2451438 2451408 Ms. Fonda Wiles N 24 4 1985 GAMBIA NULL Fonda.Wiles@S9KnyEtz9hv.org 2452360 +8 AAAAAAAAIAAAAAAA 1215897 2471 16598 2449406 2449376 Sir Ollie Shipman N 26 12 1938 KOREA, REPUBLIC OF NULL Ollie.Shipman@be.org 2452334 +9 AAAAAAAAJAAAAAAA 1168667 1404 49388 2452275 2452245 Sir Karl Gilbert N 26 10 1966 MONTSERRAT NULL Karl.Gilbert@Crg5KyP2IxX9C4d6.edu 2452454 +10 AAAAAAAAKAAAAAAA 1207553 5143 19580 2451353 2451323 Ms. Albert Brunson N 15 10 1973 JORDAN NULL Albert.Brunson@62.com 2452641 #Add test for 5924 query IIIIII @@ -353,3 +297,94 @@ select * from read_csv_auto('https://csvbase.com/meripaterson/stock-exchanges'); 249 North America United States of America Members' Exchange NULL 2020-09-24 250 Africa Zimbabwe Victoria Falls Stock Exchange NULL 2020-11-01 251 Asia China Beijing Stock Exchange NULL 2021-12-27 + + +#FIXME this test fails: file is nonexistent +mode skip + +query IIIIII rowsort +SELECT * from read_csv_auto('https://github.com/duckdb/duckdb/raw/9cf66f950dde0173e1a863a7659b3ecf11bf3978/data/csv/customer.csv'); +---- +2020 Allemagne Germany 26.1 53196.069 200601.2 +2020 Autriche Austria 18.0 4723.5 26215.8 +2020 Belgique Belgium 28.999999999999996 9436.1 32553.0 +2020 Bulgarie Bulgaria 11.600000000000001 1124.1 9698.7 +2020 Chypre Cyprus 0.0 0.0 1627.6 +2020 Croatie Croatia 16.3 1094.8 6726.3 +2020 Danemark Denmark 11.600000000000001 1579.0 13601.4 +2020 Espagne Spain 17.4 14211.7 81512.9 +2020 Estonie Estonia 8.5 241.1 2827.3 +2020 Finlande Finland 2.8000000000000003 692.3 24674.4 +2020 France France 20.3 28278.9 139375.8 +2020 Grèce Greece 5.800000000000001 896.5 15401.9 +2020 Hongrie Hungary 30.5 5486.7 17872.4 +2020 Irlande Ireland 17.4 1968.477 11296.601 +2020 Italie Italy 29.2 33042.585 113119.475 +2020 Lettonie Latvia 8.200000000000001 323.605 3926.131 +2020 Lituanie Lithuania 10.7 584.104 5457.728 +2020 Luxembourg Luxembourg 16.5 623.165 3786.785 +2020 Malte Malta 0.0 0.0 547.5 +2020 Pays-Bas Netherlands 37.1 16588.314 44682.656 +2020 Pologne Poland 13.5 9323.205 69135.018 +2020 Portugal Portugal 11.1 1814.878 16354.725 +2020 Roumanie Romania 23.7 5626.161 23712.653 +2020 Royaume-Uni United Kingdom 32.4 39311.416 121414.483 +2020 République tchèque Czech Republic 21.4 5187.282 24263.896 +2020 Slovaquie Slovakia 25.0 2564.876 10248.401 +2020 Slovénie Slovenia 12.1 590.243 4861.315 +2020 Suède Sweden 1.5 475.195 31311.413 +2020 UE 28 Europe 28 22.5 238152.4 1056907.5 +2021 Allemagne Germany 26.760345686044435 51812.567 193616.957 +2021 Autriche Austria 18.720006775926056 4645.795 24817.272 +2021 Belgique Belgium 29.279402721103864 9088.083 31039.168 +2021 Bulgarie Bulgaria 12.368015142641884 1176.537 9512.739 +2021 Chypre Cyprus 0.0 0.0 1528.558 +2021 Croatie Croatia 17.10389029082304 1100.12 6431.987 +2021 Danemark Denmark 11.485631727184947 1508.152 13130.771 +2021 Espagne Spain 19.10173955663722 13815.0 72323.256 +2021 Estonie Estonia 8.988278645659518 245.094 2726.818 +2021 Finlande Finland 2.9937725178230212 694.288 23191.074 +2021 France France 20.649030024470434 26465.646 128168.955 +2021 Grèce Greece 7.580480506088059 1097.87 14482.855 +2021 Hongrie Hungary 32.344729318831554 5693.164 17601.52 +2021 Irlande Ireland 18.020604987495144 1953.468 10840.191 +2021 Italie Italy 30.86368769746751 31807.236 103057.147 +2021 Lettonie Latvia 8.502139837843602 322.927 3798.185 +2021 Lituanie Lithuania 11.029023816606903 582.797 5284.212 +2021 Luxembourg Luxembourg 17.282784281000467 564.365 3265.475 +2021 Malte Malta 0.0 0.0 499.875 +2021 Pays-Bas Netherlands 37.61392206122467 15896.316 42261.788 +2021 Pologne Poland 13.146720200313602 9235.656 70250.647 +2021 Portugal Portugal 11.437926753365227 1740.3 15215.17 +2021 Roumanie Romania 24.909638477223016 5846.885 23472.38 +2021 République tchèque Czech Republic 21.716683280446812 5158.445 23753.374 +2021 Slovaquie Slovakia 25.253930010417324 2427.134 9610.916 +2021 Slovénie Slovenia 13.141683407321874 582.024 4428.839 +2021 Suède Sweden 1.497679952802663 471.085 31454.317 +2021 UE 27 UE 27 21.894190365821018 193930.95399999994 885764.4460000001 + +query IIIIII rowsort res +SELECT * from read_csv('https://www.data.gouv.fr/fr/datasets/r/6d186965-f41b-41f3-9b23-88241cc6890c',DELIM=';',Columns={'annee_de_reference':'VARCHAR','pays':'VARCHAR','label_en':'VARCHAR','part_du_gaz_naturel_dans_la_consommation_finale_d_energie0':'VARCHAR','consommation_finale_de_gaz_naturel_mtep':'VARCHAR','consommation_finale_d_energie_totale_mtep':'VARCHAR'}); + + +query IIIIII rowsort res +SELECT * from read_csv('https://www.data.gouv.fr/fr/datasets/r/6d186965-f41b-41f3-9b23-88241cc6890c',DELIM=';',Columns={'annee_de_reference':'VARCHAR','pays':'VARCHAR','label_en':'VARCHAR','part_du_gaz_naturel_dans_la_consommation_finale_d_energie0':'VARCHAR','consommation_finale_de_gaz_naturel_mtep':'VARCHAR','consommation_finale_d_energie_totale_mtep':'VARCHAR'}); + + +# Give it a try to a request that returns length 0 +query I +SELECT count(*) from read_csv_auto('https://query1.finance.yahoo.com/v7/finance/download/^GSPC?period1=1512086400&period2=1670630400&interval=1d&events=history') +---- +1265 + +# Give it a try to a request that returns length 0 +query I +SELECT count(*) from read_csv_auto('https://query1.finance.yahoo.com/v7/finance/download/^GSPC?period1=1512086400&period2=1670630400&interval=1d&events=history') +---- +1265 + +# Give it a try to a request that returns length 0 +query I +SELECT count(*) from read_csv_auto('https://query1.finance.yahoo.com/v7/finance/download/^GSPC?period1=1512086400&period2=1670630400&interval=1d&events=history') +---- +1265 \ No newline at end of file diff --git a/test/sql/metadata_stats.test b/test/sql/metadata_stats.test new file mode 100644 index 0000000..4bc6c07 --- /dev/null +++ b/test/sql/metadata_stats.test @@ -0,0 +1,21 @@ +# name: test/sql/metadata_stats.test +# description: Test getting metadata stats +# group: [] + +require parquet + +require httpfs + +require json + +# Test Force download with server that doesn't want to give us the head +statement ok +FROM read_json('https://api.spring.io/projects/spring-boot/generations') + +statement ok +SET force_download=false; + +query II +explain analyze SELECT id, first_name, last_name, email FROM PARQUET_SCAN('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/userdata1.parquet') +---- +analyzed_plan :.*GET: 2.* diff --git a/test/sql/secret/secret_refresh.test b/test/sql/secret/secret_refresh.test index 85c8738..696279e 100644 --- a/test/sql/secret/secret_refresh.test +++ b/test/sql/secret/secret_refresh.test @@ -78,10 +78,11 @@ CREATE SECRET s1 ( REFRESH 1 ) +# TODO: add FORBIDDEN back in once enum util for http status codes is merged into httpfs statement error FROM "s3://test-bucket/test-file.parquet" ---- -HTTP 403 +HTTP Error: Unable to connect to URL "s3://test-bucket/test-file.parquet": 403 () query I SELECT message[0:46] FROM duckdb_logs WHERE message like '%Successfully refreshed secret%' @@ -118,10 +119,11 @@ statement ok set s3_access_key_id='bogus' # Without secret this query will fail, but since there are no suitable secrets, no refresh attempt will be made +# TODO: add FORBIDDEN in once enum util for http status codes is merged into httpfs statement error FROM "s3://test-bucket/test-file.parquet" ---- -HTTP 403 +HTTP Error: Unable to connect to URL "s3://test-bucket/test-file.parquet": 403 () # -> log empty query II diff --git a/test/sql/test_headers_parsed.test b/test/sql/test_headers_parsed.test new file mode 100644 index 0000000..317ec82 --- /dev/null +++ b/test/sql/test_headers_parsed.test @@ -0,0 +1,48 @@ +# name: test/sql/copy/csv/test_headers_parsed.test +# description: This test triggers the http prefetch mechanism. +# group: [csv] + +require httpfs + +require parquet + +statement ok +SET httpfs_client_implementation='curl'; + +statement ok +pragma enable_logging('HTTP'); + +query II +select * from 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/job_role_type.parquet' order by all; +---- +1 actor +2 actress +3 producer +4 writer +5 cinematographer +6 composer +7 costume designer +8 director +9 editor +10 miscellaneous crew +11 production designer +12 guest + +query I +select response.status from duckdb_logs_parsed('HTTP') order by all; +---- +OK_200 +PartialContent_206 + +# response status is either +# HTTP/2 200 +# HTTP/2 206 +# OR +# HTTP/1.1 200 OK +# HTTP/1.1 206 Partial Content +# depending on OS and CA (I think) +query I +select response.headers['__RESPONSE_STATUS__'] LIKE 'HTTP%20%' from duckdb_logs_parsed('HTTP') order by all; +---- +true +true diff --git a/vcpkg.json b/vcpkg.json index 3ed9a36..809e67b 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -1,5 +1,6 @@ { "dependencies": [ - "openssl" + "openssl", + "curl" ] }