diff --git a/.github/workflows/IntegrationTests.yml b/.github/workflows/IntegrationTests.yml index edf1e5f..a2e6cc3 100644 --- a/.github/workflows/IntegrationTests.yml +++ b/.github/workflows/IntegrationTests.yml @@ -27,6 +27,11 @@ jobs: PYTHON_HTTP_SERVER_URL: http://localhost:8008 PYTHON_HTTP_SERVER_DIR: /tmp/python_test_server + WEBDAV_TEST_SERVER_AVAILABLE: 1 + WEBDAV_TEST_USERNAME: duckdb_webdav_user + WEBDAV_TEST_PASSWORD: duckdb_webdav_password + WEBDAV_TEST_BASE_URL: webdav://localhost:9100 + steps: - uses: actions/checkout@v4 with: @@ -83,6 +88,11 @@ jobs: - name: Start test server & run tests shell: bash run: | + # Minio S3 test server source ./scripts/run_s3_test_server.sh source ./scripts/set_s3_test_server_variables.sh - make test + + # WebDav test server + ./scripts/run_webdav_test_server.sh + + make test \ No newline at end of file diff --git a/scripts/run_webdav_test_server.sh b/scripts/run_webdav_test_server.sh new file mode 100755 index 0000000..1c616ce --- /dev/null +++ b/scripts/run_webdav_test_server.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +# Note: DON'T run as root + +docker compose -f scripts/webdav.yml -p duckdb-webdav up -d + +# Get setup container name to monitor logs +container_name=$(docker ps -a --format '{{.Names}}' | grep -m 1 "duckdb-webdav") +echo $container_name + +# Wait for setup completion (up to 360 seconds like Minio) +for i in $(seq 1 360); +do + docker_finish_logs=$(docker logs $container_name 2>/dev/null | grep -m 1 'FINISHED SETTING UP WEBDAV' || echo '') + if [ ! -z "${docker_finish_logs}" ]; then + break + fi + sleep 1 +done + +export WEBDAV_TEST_SERVER_AVAILABLE=1 +export WEBDAV_TEST_BASE_URL="webdav://localhost:9100" diff --git a/scripts/set_webdav_test_server_variables.sh b/scripts/set_webdav_test_server_variables.sh new file mode 100755 index 0000000..94427e4 --- /dev/null +++ b/scripts/set_webdav_test_server_variables.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +# Run this script with 'source' or the shorthand: '.': +# i.e: source scripts/set_webdav_test_server_variables.sh + +# Enable the WebDAV tests to run +export WEBDAV_TEST_SERVER_AVAILABLE=1 + +export WEBDAV_TEST_USERNAME=duckdb_webdav_user +export WEBDAV_TEST_PASSWORD=duckdb_webdav_password +export WEBDAV_TEST_BASE_URL=webdav://localhost:9100 diff --git a/scripts/stop_webdav_test_server.sh b/scripts/stop_webdav_test_server.sh new file mode 100755 index 0000000..c3cda1f --- /dev/null +++ b/scripts/stop_webdav_test_server.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +echo "Stopping WebDAV test server..." +docker compose -f scripts/webdav.yml -p duckdb-webdav down + +echo "WebDAV test server stopped." diff --git a/scripts/webdav.yml b/scripts/webdav.yml new file mode 100644 index 0000000..0f24ff7 --- /dev/null +++ b/scripts/webdav.yml @@ -0,0 +1,74 @@ +services: + webdav: + image: bytemark/webdav + hostname: duckdb-webdav-test.local + ports: + - "9100:80" + environment: + - AUTH_TYPE=Basic + - USERNAME=duckdb_webdav_user + - PASSWORD=duckdb_webdav_password + + webdav_setup: + image: alpine:latest + depends_on: + - webdav + links: + - webdav + entrypoint: + - /bin/sh + - -c + - | + apk add --no-cache curl; + + until ( + curl -u duckdb_webdav_user:duckdb_webdav_password -f http://webdav:80/ >/dev/null 2>&1 + ) do + echo '...waiting for WebDAV server...' && sleep 1; + done; + + echo 'WebDAV server is ready, creating test data...'; + + # Create directories using WebDAV MKCOL method + curl -u duckdb_webdav_user:duckdb_webdav_password -X MKCOL http://webdav:80/test-dir/; + curl -u duckdb_webdav_user:duckdb_webdav_password -X MKCOL http://webdav:80/test-dir/upload-dir/; + curl -u duckdb_webdav_user:duckdb_webdav_password -X MKCOL http://webdav:80/test-dir/subdir1/; + curl -u duckdb_webdav_user:duckdb_webdav_password -X MKCOL http://webdav:80/test-dir/subdir2/; + curl -u duckdb_webdav_user:duckdb_webdav_password -X MKCOL http://webdav:80/glob-test/; + curl -u duckdb_webdav_user:duckdb_webdav_password -X MKCOL http://webdav:80/glob-test/year=2023/; + curl -u duckdb_webdav_user:duckdb_webdav_password -X MKCOL http://webdav:80/glob-test/year=2024/; + + # Create temporary directory for test files + mkdir -p /tmp/webdav-test; + + # Create test files + printf 'Hello from WebDAV' > /tmp/webdav-test/hello.txt; + + echo 'id,name,value' > /tmp/webdav-test/test1.csv; + echo '1,Alice,100' >> /tmp/webdav-test/test1.csv; + echo '2,Bob,200' >> /tmp/webdav-test/test1.csv; + + echo 'id,name,value' > /tmp/webdav-test/test2.csv; + echo '3,Charlie,300' >> /tmp/webdav-test/test2.csv; + echo '4,Diana,400' >> /tmp/webdav-test/test2.csv; + + echo 'id,name,value' > /tmp/webdav-test/test3.csv; + echo '5,Eve,500' >> /tmp/webdav-test/test3.csv; + echo '6,Frank,600' >> /tmp/webdav-test/test3.csv; + + echo 'id,year,data' > /tmp/webdav-test/data2023.csv; + echo '1,2023,test2023' >> /tmp/webdav-test/data2023.csv; + + echo 'id,year,data' > /tmp/webdav-test/data2024.csv; + echo '2,2024,test2024' >> /tmp/webdav-test/data2024.csv; + + # Upload test files using WebDAV PUT method + curl -u duckdb_webdav_user:duckdb_webdav_password -X PUT http://webdav:80/hello.txt --data-binary @/tmp/webdav-test/hello.txt; + curl -u duckdb_webdav_user:duckdb_webdav_password -X PUT http://webdav:80/test-dir/test1.csv --data-binary @/tmp/webdav-test/test1.csv; + curl -u duckdb_webdav_user:duckdb_webdav_password -X PUT http://webdav:80/test-dir/subdir1/test2.csv --data-binary @/tmp/webdav-test/test2.csv; + curl -u duckdb_webdav_user:duckdb_webdav_password -X PUT http://webdav:80/test-dir/subdir2/test3.csv --data-binary @/tmp/webdav-test/test3.csv; + curl -u duckdb_webdav_user:duckdb_webdav_password -X PUT http://webdav:80/glob-test/year=2023/data.csv --data-binary @/tmp/webdav-test/data2023.csv; + curl -u duckdb_webdav_user:duckdb_webdav_password -X PUT http://webdav:80/glob-test/year=2024/data.csv --data-binary @/tmp/webdav-test/data2024.csv; + + echo 'FINISHED SETTING UP WEBDAV'; + exit 0; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 70dfe09..59315c8 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -2,6 +2,7 @@ set(HTTPFS_SOURCES hffs.cpp s3fs.cpp httpfs.cpp + webdavfs.cpp http_state.cpp crypto.cpp hash_functions.cpp diff --git a/src/create_secret_functions.cpp b/src/create_secret_functions.cpp index b94f7f2..d270ad2 100644 --- a/src/create_secret_functions.cpp +++ b/src/create_secret_functions.cpp @@ -335,4 +335,55 @@ CreateBearerTokenFunctions::CreateHuggingFaceSecretFromCredentialChain(ClientCon auto token = TryReadTokenFile("~/.cache/huggingface/token", "", false); return CreateSecretFunctionInternal(context, input, token); } + +void CreateWebDAVSecretFunctions::Register(ExtensionLoader &loader) { + // WebDAV secret + SecretType secret_type_webdav; + secret_type_webdav.name = WEBDAV_TYPE; + secret_type_webdav.deserializer = KeyValueSecret::Deserialize; + secret_type_webdav.default_provider = "config"; + secret_type_webdav.extension = "httpfs"; + loader.RegisterSecretType(secret_type_webdav); + + // WebDAV config provider + CreateSecretFunction webdav_config_fun = {WEBDAV_TYPE, "config", CreateWebDAVSecretFromConfig}; + webdav_config_fun.named_parameters["username"] = LogicalType::VARCHAR; + webdav_config_fun.named_parameters["password"] = LogicalType::VARCHAR; + loader.RegisterFunction(webdav_config_fun); +} + +unique_ptr CreateWebDAVSecretFunctions::CreateSecretFunctionInternal(ClientContext &context, + CreateSecretInput &input) { + // Set scope to user provided scope or the default + auto scope = input.scope; + if (scope.empty()) { + // Default scope includes webdav://, webdavs://, storagebox://, and Hetzner Storage Box URLs + scope.push_back("webdav://"); + scope.push_back("webdavs://"); + scope.push_back("storagebox://"); // Hetzner Storage Box shorthand + scope.push_back("https://"); // For Hetzner Storage Boxes and other HTTPS WebDAV servers + } + auto return_value = make_uniq(scope, input.type, input.provider, input.name); + + //! Set key value map + for (const auto &named_param : input.options) { + auto lower_name = StringUtil::Lower(named_param.first); + if (lower_name == "username") { + return_value->secret_map["username"] = named_param.second.ToString(); + } else if (lower_name == "password") { + return_value->secret_map["password"] = named_param.second.ToString(); + } + } + + //! Set redact keys + return_value->redact_keys = {"password"}; + + return std::move(return_value); +} + +unique_ptr CreateWebDAVSecretFunctions::CreateWebDAVSecretFromConfig(ClientContext &context, + CreateSecretInput &input) { + return CreateSecretFunctionInternal(context, input); +} + } // namespace duckdb diff --git a/src/httpfs_curl_client.cpp b/src/httpfs_curl_client.cpp index 6de3c85..6ab006c 100644 --- a/src/httpfs_curl_client.cpp +++ b/src/httpfs_curl_client.cpp @@ -343,7 +343,15 @@ class HTTPFSCurlClient : public HTTPClient { CURLcode res; { curl_easy_setopt(*curl, CURLOPT_URL, request_info->url.c_str()); - curl_easy_setopt(*curl, CURLOPT_POST, 1L); + + // Check if a custom HTTP method is specified in extra_headers + auto method_it = info.params.extra_headers.find("X-DuckDB-HTTP-Method"); + if (method_it != info.params.extra_headers.end()) { + // Use custom HTTP method (e.g., PROPFIND for WebDAV) + curl_easy_setopt(*curl, CURLOPT_CUSTOMREQUEST, method_it->second.c_str()); + } else { + curl_easy_setopt(*curl, CURLOPT_POST, 1L); + } // Set POST body curl_easy_setopt(*curl, CURLOPT_POSTFIELDS, const_char_ptr_cast(info.buffer_in)); diff --git a/src/httpfs_extension.cpp b/src/httpfs_extension.cpp index 79d9923..d3c66e7 100644 --- a/src/httpfs_extension.cpp +++ b/src/httpfs_extension.cpp @@ -5,6 +5,7 @@ #include "duckdb.hpp" #include "s3fs.hpp" #include "hffs.hpp" +#include "webdavfs.hpp" #ifdef OVERRIDE_ENCRYPTION_UTILS #include "crypto.hpp" #endif // OVERRIDE_ENCRYPTION_UTILS @@ -41,6 +42,7 @@ static void LoadInternal(ExtensionLoader &loader) { fs.RegisterSubSystem(make_uniq()); fs.RegisterSubSystem(make_uniq()); fs.RegisterSubSystem(make_uniq(BufferManager::GetBufferManager(instance))); + fs.RegisterSubSystem(make_uniq()); auto &config = DBConfig::GetConfig(instance); @@ -137,6 +139,7 @@ static void LoadInternal(ExtensionLoader &loader) { CreateS3SecretFunctions::Register(loader); CreateBearerTokenFunctions::Register(loader); + CreateWebDAVSecretFunctions::Register(loader); #ifdef OVERRIDE_ENCRYPTION_UTILS // set pointer to OpenSSL encryption state diff --git a/src/httpfs_httplib_client.cpp b/src/httpfs_httplib_client.cpp index 239a112..ddea31a 100644 --- a/src/httpfs_httplib_client.cpp +++ b/src/httpfs_httplib_client.cpp @@ -88,7 +88,16 @@ class HTTPFSClient : public HTTPClient { } // We use a custom Request method here, because there is no Post call with a contentreceiver in httplib duckdb_httplib_openssl::Request req; - req.method = "POST"; + + // Check if a custom HTTP method is specified in extra_headers + auto method_it = info.params.extra_headers.find("X-DuckDB-HTTP-Method"); + if (method_it != info.params.extra_headers.end()) { + // Use custom HTTP method (e.g., PROPFIND for WebDAV) + req.method = method_it->second; + } else { + req.method = "POST"; + } + req.path = info.path; req.headers = TransformHeaders(info.headers, info.params); if (req.headers.find("Content-Type") == req.headers.end()) { diff --git a/src/include/create_secret_functions.hpp b/src/include/create_secret_functions.hpp index bd3bc4a..485af1d 100644 --- a/src/include/create_secret_functions.hpp +++ b/src/include/create_secret_functions.hpp @@ -52,4 +52,18 @@ struct CreateBearerTokenFunctions { CreateSecretInput &input); }; +struct CreateWebDAVSecretFunctions { +public: + static constexpr const char *WEBDAV_TYPE = "webdav"; + + //! Register all CreateSecretFunctions + static void Register(ExtensionLoader &loader); + +protected: + //! Internal function to create WebDAV secret + static unique_ptr CreateSecretFunctionInternal(ClientContext &context, CreateSecretInput &input); + //! Credential provider function + static unique_ptr CreateWebDAVSecretFromConfig(ClientContext &context, CreateSecretInput &input); +}; + } // namespace duckdb diff --git a/src/include/webdavfs.hpp b/src/include/webdavfs.hpp new file mode 100644 index 0000000..ada9769 --- /dev/null +++ b/src/include/webdavfs.hpp @@ -0,0 +1,105 @@ +#pragma once + +#include "httpfs.hpp" +#include "duckdb/common/file_opener.hpp" +#include "duckdb/common/case_insensitive_map.hpp" + +namespace duckdb { + +struct WebDAVAuthParams { + string username; + string password; + + static WebDAVAuthParams ReadFrom(optional_ptr opener, FileOpenerInfo &info); +}; + +struct ParsedWebDAVUrl { + string http_proto; + string host; + string path; + + string GetHTTPUrl() const; +}; + +class WebDAVFileHandle : public HTTPFileHandle { + friend class WebDAVFileSystem; + +public: + WebDAVFileHandle(FileSystem &fs, const OpenFileInfo &file, FileOpenFlags flags, + unique_ptr http_params_p, const WebDAVAuthParams &auth_params_p) + : HTTPFileHandle(fs, file, flags, std::move(http_params_p)), auth_params(auth_params_p) { + if (flags.OpenForReading() && flags.OpenForWriting()) { + throw NotImplementedException("Cannot open a WebDAV file for both reading and writing"); + } else if (flags.OpenForAppending()) { + throw NotImplementedException("Cannot open a WebDAV file for appending"); + } + } + ~WebDAVFileHandle() override; + + WebDAVAuthParams auth_params; + +public: + void Close() override; + void Initialize(optional_ptr opener) override; + +protected: + unique_ptr CreateClient() override; +}; + +class WebDAVFileSystem : public HTTPFileSystem { +public: + WebDAVFileSystem() = default; + + string GetName() const override; + +public: + // WebDAV-specific methods + duckdb::unique_ptr PropfindRequest(FileHandle &handle, string url, HTTPHeaders header_map, + int depth = 1); + duckdb::unique_ptr MkcolRequest(FileHandle &handle, string url, HTTPHeaders header_map); + duckdb::unique_ptr CustomRequest(FileHandle &handle, string url, HTTPHeaders header_map, + const string &method, char *buffer_in, idx_t buffer_in_len); + + // Override standard methods for WebDAV support + duckdb::unique_ptr HeadRequest(FileHandle &handle, string url, HTTPHeaders header_map) override; + duckdb::unique_ptr GetRequest(FileHandle &handle, string url, HTTPHeaders header_map) override; + duckdb::unique_ptr GetRangeRequest(FileHandle &handle, string url, HTTPHeaders header_map, + idx_t file_offset, char *buffer_out, + idx_t buffer_out_len) override; + duckdb::unique_ptr PutRequest(FileHandle &handle, string url, HTTPHeaders header_map, char *buffer_in, + idx_t buffer_in_len, string params = "") override; + duckdb::unique_ptr DeleteRequest(FileHandle &handle, string url, HTTPHeaders header_map) override; + + bool CanHandleFile(const string &fpath) override; + static bool IsWebDAVUrl(const string &url); + void RemoveFile(const string &filename, optional_ptr opener = nullptr) override; + void MoveFile(const string &source, const string &target, optional_ptr opener = nullptr) override; + void CreateDirectory(const string &directory, optional_ptr opener = nullptr) override; + void RemoveDirectory(const string &directory, optional_ptr opener = nullptr) override; + void FileSync(FileHandle &handle) override; + void Write(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) override; + + bool OnDiskFile(FileHandle &handle) override { + return false; + } + + bool DirectoryExists(const string &directory, optional_ptr opener = nullptr) override; + vector Glob(const string &glob_pattern, FileOpener *opener = nullptr) override; + bool ListFiles(const string &directory, const std::function &callback, + FileOpener *opener = nullptr) override; + + static ParsedWebDAVUrl ParseUrl(const string &url); + +protected: + duckdb::unique_ptr CreateHandle(const OpenFileInfo &file, FileOpenFlags flags, + optional_ptr opener) override; + + HTTPException GetHTTPError(FileHandle &, const HTTPResponse &response, const string &url) override; + +private: + void AddAuthHeaders(HTTPHeaders &headers, const WebDAVAuthParams &auth_params); + string Base64Encode(const string &input); + string DirectPropfindRequest(const string &url, const WebDAVAuthParams &auth_params, int depth); +}; + +} // namespace duckdb diff --git a/src/webdavfs.cpp b/src/webdavfs.cpp new file mode 100644 index 0000000..5eadaba --- /dev/null +++ b/src/webdavfs.cpp @@ -0,0 +1,719 @@ +#include "webdavfs.hpp" + +#include "crypto.hpp" +#include "duckdb.hpp" +#ifndef DUCKDB_AMALGAMATION +#include "duckdb/common/exception/http_exception.hpp" +#include "duckdb/common/helper.hpp" +#include "duckdb/common/http_util.hpp" +#include "duckdb/logging/log_type.hpp" +#include "duckdb/logging/file_system_logger.hpp" +#include "http_state.hpp" +#endif + +#include "duckdb/common/string_util.hpp" +#include "duckdb/function/scalar/string_common.hpp" +#include "duckdb/main/secret/secret_manager.hpp" +#include "httpfs_client.hpp" + +#include +#include +#include + +namespace duckdb { + +WebDAVFileHandle::~WebDAVFileHandle() = default; + +void WebDAVFileHandle::Close() { +} + +void WebDAVFileHandle::Initialize(optional_ptr opener) { + HTTPFileHandle::Initialize(opener); +} + +unique_ptr WebDAVFileHandle::CreateClient() { + return http_params.http_util.InitializeClient(http_params, path); +} + +WebDAVAuthParams WebDAVAuthParams::ReadFrom(optional_ptr opener, FileOpenerInfo &info) { + WebDAVAuthParams params; + + if (!opener) { + return params; + } + + KeyValueSecretReader secret_reader(*opener, &info, "webdav"); + secret_reader.TryGetSecretKey("username", params.username); + secret_reader.TryGetSecretKey("password", params.password); + + return params; +} + +string ParsedWebDAVUrl::GetHTTPUrl() const { + return http_proto + "://" + host + path; +} + +ParsedWebDAVUrl WebDAVFileSystem::ParseUrl(const string &url) { + ParsedWebDAVUrl result; + + // Check for storagebox:// protocol (Hetzner Storage Box shorthand) + if (StringUtil::StartsWith(url, "storagebox://")) { + result.http_proto = "https"; + // Extract username and path from storagebox://u123456/path/to/file + string remainder = url.substr(13); // Skip "storagebox://" + + auto slash_pos = remainder.find('/'); + string username; + if (slash_pos != string::npos) { + username = remainder.substr(0, slash_pos); + result.path = remainder.substr(slash_pos); + } else { + username = remainder; + result.path = "/"; + } + + // Build the Hetzner Storage Box hostname + result.host = username + ".your-storagebox.de"; + return result; + } + + // Check for webdav:// or webdavs:// protocol + if (StringUtil::StartsWith(url, "webdav://")) { + result.http_proto = "http"; + result.host = url.substr(9); + } else if (StringUtil::StartsWith(url, "webdavs://")) { + result.http_proto = "https"; + result.host = url.substr(10); + } else if (StringUtil::StartsWith(url, "https://")) { + result.http_proto = "https"; + result.host = url.substr(8); + } else if (StringUtil::StartsWith(url, "http://")) { + result.http_proto = "http"; + result.host = url.substr(7); + } else { + throw IOException("Invalid WebDAV URL: %s", url); + } + + // Split host and path + auto slash_pos = result.host.find('/'); + if (slash_pos != string::npos) { + result.path = result.host.substr(slash_pos); + result.host = result.host.substr(0, slash_pos); + } else { + result.path = "/"; + } + + return result; +} + +string WebDAVFileSystem::Base64Encode(const string &input) { + const string base64_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + string result; + int val = 0; + int valb = -6; + + for (unsigned char c : input) { + val = (val << 8) + c; + valb += 8; + while (valb >= 0) { + result.push_back(base64_chars[(val >> valb) & 0x3F]); + valb -= 6; + } + } + + if (valb > -6) { + result.push_back(base64_chars[((val << 8) >> (valb + 8)) & 0x3F]); + } + + while (result.size() % 4) { + result.push_back('='); + } + + return result; +} + +// Custom HTTP request using HTTP client infrastructure +duckdb::unique_ptr WebDAVFileSystem::CustomRequest(FileHandle &handle, string url, HTTPHeaders header_map, + const string &method, char *buffer_in, + idx_t buffer_in_len) { + auto &wfh = handle.Cast(); + auto &http_util = wfh.http_params.http_util; + + // Store the method in extra headers as a hint for custom processing + auto &http_params = wfh.http_params; + auto original_extra_headers = http_params.extra_headers; + http_params.extra_headers["X-DuckDB-HTTP-Method"] = method; + + // Create POST request + PostRequestInfo post_request(url, header_map, http_params, const_data_ptr_cast(buffer_in), buffer_in_len); + auto result = http_util.Request(post_request); + + // Copy the response body to the result + if (result) { + result->body = std::move(post_request.buffer_out); + } + + // Restore headers + http_params.extra_headers = original_extra_headers; + + return result; +} + +string WebDAVFileSystem::DirectPropfindRequest(const string &url, const WebDAVAuthParams &auth_params, int depth) { + // We need a file handle to make HTTP requests through the proper infrastructure + // Since we're being called from Glob which has an opener, we should create a temporary handle + // For now, we'll return empty and the caller should handle creating the handle properly + return ""; +} + +void WebDAVFileSystem::AddAuthHeaders(HTTPHeaders &headers, const WebDAVAuthParams &auth_params) { + if (!auth_params.username.empty() || !auth_params.password.empty()) { + string credentials = auth_params.username + ":" + auth_params.password; + string encoded = Base64Encode(credentials); + headers["Authorization"] = "Basic " + encoded; + } +} + +string WebDAVFileSystem::GetName() const { + return "WebDAVFileSystem"; +} + +bool WebDAVFileSystem::IsWebDAVUrl(const string &url) { + // Check for storagebox:// protocol (Hetzner Storage Box shorthand) + if (StringUtil::StartsWith(url, "storagebox://")) { + return true; + } + // Check for explicit WebDAV protocol + if (StringUtil::StartsWith(url, "webdav://") || StringUtil::StartsWith(url, "webdavs://")) { + return true; + } + // Check for Hetzner Storage Box URLs (these use WebDAV) + if (url.find(".your-storagebox.de/") != string::npos) { + return true; + } + return false; +} + +bool WebDAVFileSystem::CanHandleFile(const string &fpath) { + return IsWebDAVUrl(fpath); +} + +duckdb::unique_ptr WebDAVFileSystem::CreateHandle(const OpenFileInfo &file, FileOpenFlags flags, + optional_ptr opener) { + D_ASSERT(flags.Compression() == FileCompressionType::UNCOMPRESSED); + + // First, read auth params using ORIGINAL URL for secret matching + // This is critical for proper secret scoping - secrets are scoped to storagebox:// URLs, + // not the converted https:// URLs + FileOpenerInfo info; + info.file_path = file.path; // Use ORIGINAL URL (e.g., storagebox://u507042/file.parquet) + auto auth_params = WebDAVAuthParams::ReadFrom(opener, info); + + // Parse and convert the URL for actual HTTP operations (e.g., storagebox:// -> https://) + auto parsed_url = ParseUrl(file.path); + string converted_url = parsed_url.GetHTTPUrl(); + + // Create a modified file info with the converted URL for HTTP operations + OpenFileInfo converted_file = file; + converted_file.path = converted_url; + + auto params = HTTPFSUtil::GetHTTPUtil(opener)->InitializeParameters(opener, &info); + auto http_params_p = dynamic_cast(params.get()); + if (!http_params_p) { + throw InternalException("Failed to cast HTTP params"); + } + + return make_uniq(*this, converted_file, flags, std::move(params), auth_params); +} + +duckdb::unique_ptr WebDAVFileSystem::PropfindRequest(FileHandle &handle, string url, + HTTPHeaders header_map, int depth) { + auto &wfh = handle.Cast(); + AddAuthHeaders(header_map, wfh.auth_params); + header_map["Depth"] = to_string(depth); + header_map["Content-Type"] = "application/xml; charset=utf-8"; + + // Basic PROPFIND request body + string propfind_body = "" + "" + "" + "" + "" + "" + "" + ""; + + // Use CustomRequest which sets up PROPFIND properly + return CustomRequest(handle, url, header_map, "PROPFIND", const_cast(propfind_body.c_str()), + propfind_body.size()); +} + +duckdb::unique_ptr WebDAVFileSystem::MkcolRequest(FileHandle &handle, string url, + HTTPHeaders header_map) { + auto &wfh = handle.Cast(); + AddAuthHeaders(header_map, wfh.auth_params); + + // Use PUT request with a trailing slash to create directory + // This is a workaround since we don't have a MKCOL request type + return PutRequest(handle, url, header_map, nullptr, 0, ""); +} + +duckdb::unique_ptr WebDAVFileSystem::HeadRequest(FileHandle &handle, string url, HTTPHeaders header_map) { + auto &wfh = handle.Cast(); + AddAuthHeaders(header_map, wfh.auth_params); + return HTTPFileSystem::HeadRequest(handle, url, header_map); +} + +duckdb::unique_ptr WebDAVFileSystem::GetRequest(FileHandle &handle, string url, HTTPHeaders header_map) { + auto &wfh = handle.Cast(); + AddAuthHeaders(header_map, wfh.auth_params); + return HTTPFileSystem::GetRequest(handle, url, header_map); +} + +duckdb::unique_ptr WebDAVFileSystem::GetRangeRequest(FileHandle &handle, string url, + HTTPHeaders header_map, idx_t file_offset, + char *buffer_out, idx_t buffer_out_len) { + auto &wfh = handle.Cast(); + AddAuthHeaders(header_map, wfh.auth_params); + return HTTPFileSystem::GetRangeRequest(handle, url, header_map, file_offset, buffer_out, buffer_out_len); +} + +duckdb::unique_ptr WebDAVFileSystem::PutRequest(FileHandle &handle, string url, HTTPHeaders header_map, + char *buffer_in, idx_t buffer_in_len, string params) { + auto &wfh = handle.Cast(); + AddAuthHeaders(header_map, wfh.auth_params); + return HTTPFileSystem::PutRequest(handle, url, header_map, buffer_in, buffer_in_len, params); +} + +duckdb::unique_ptr WebDAVFileSystem::DeleteRequest(FileHandle &handle, string url, + HTTPHeaders header_map) { + auto &wfh = handle.Cast(); + AddAuthHeaders(header_map, wfh.auth_params); + return HTTPFileSystem::DeleteRequest(handle, url, header_map); +} + +void WebDAVFileSystem::RemoveFile(const string &filename, optional_ptr opener) { + auto parsed_url = ParseUrl(filename); + string http_url = parsed_url.GetHTTPUrl(); + + FileOpenerInfo info; + info.file_path = filename; + auto auth_params = WebDAVAuthParams::ReadFrom(opener, info); + + // Create a temporary handle for the delete operation + OpenFileInfo file_info; + file_info.path = filename; + auto handle = CreateHandle(file_info, FileOpenFlags::FILE_FLAGS_READ, opener); + handle->Initialize(opener); + + HTTPHeaders headers; + auto response = DeleteRequest(*handle, http_url, headers); + + if (response->status != HTTPStatusCode::OK_200 && response->status != HTTPStatusCode::NoContent_204 && + response->status != HTTPStatusCode::Accepted_202) { + throw IOException("Failed to delete file %s: HTTP %d", filename, static_cast(response->status)); + } +} + +void WebDAVFileSystem::MoveFile(const string &source, const string &target, optional_ptr opener) { + // WebDAV doesn't support atomic move, so we implement it as copy + delete + // For large files, this could be inefficient, but it works + + // Parse both URLs + auto source_parsed = ParseUrl(source); + auto target_parsed = ParseUrl(target); + string source_http_url = source_parsed.GetHTTPUrl(); + string target_http_url = target_parsed.GetHTTPUrl(); + + // Read the source file + OpenFileInfo source_file; + source_file.path = source; + auto source_handle = CreateHandle(source_file, FileOpenFlags::FILE_FLAGS_READ, opener); + source_handle->Initialize(opener); + + // Read all data from source + auto file_size = source_handle->length; + auto buffer = make_unsafe_uniq_array(file_size); + source_handle->Read(buffer.get(), file_size, 0); + + // Write to target + OpenFileInfo target_file; + target_file.path = target; + FileOpenFlags write_flags; + write_flags = FileOpenFlags::FILE_FLAGS_WRITE; + auto target_handle = CreateHandle(target_file, write_flags, opener); + + HTTPHeaders headers; + auto response = PutRequest(*target_handle, target_http_url, headers, buffer.get(), file_size, ""); + + if (response->status != HTTPStatusCode::OK_200 && response->status != HTTPStatusCode::Created_201 && + response->status != HTTPStatusCode::NoContent_204) { + throw IOException("Failed to write target file %s during move: HTTP %d", target, + static_cast(response->status)); + } + + // Delete source file + RemoveFile(source, opener); +} + +void WebDAVFileSystem::CreateDirectory(const string &directory, optional_ptr opener) { + auto parsed_url = ParseUrl(directory); + string http_url = parsed_url.GetHTTPUrl(); + + // Ensure the URL ends with a slash for directory creation + if (!StringUtil::EndsWith(http_url, "/")) { + http_url += "/"; + } + + FileOpenerInfo info; + info.file_path = directory; + auto auth_params = WebDAVAuthParams::ReadFrom(opener, info); + + // Create a temporary handle for the MKCOL operation + OpenFileInfo file_info; + file_info.path = directory; + auto handle = CreateHandle(file_info, FileOpenFlags::FILE_FLAGS_READ, opener); + handle->Initialize(opener); + + HTTPHeaders headers; + auto response = MkcolRequest(*handle, http_url, headers); + + if (response->status != HTTPStatusCode::Created_201 && response->status != HTTPStatusCode::OK_200 && + response->status != HTTPStatusCode::NoContent_204) { + // Directory might already exist + if (response->status != HTTPStatusCode::MethodNotAllowed_405) { + throw IOException("Failed to create directory %s: HTTP %d", directory, static_cast(response->status)); + } + } +} + +void WebDAVFileSystem::RemoveDirectory(const string &directory, optional_ptr opener) { + RemoveFile(directory, opener); +} + +bool WebDAVFileSystem::DirectoryExists(const string &directory, optional_ptr opener) { + auto parsed_url = ParseUrl(directory); + string http_url = parsed_url.GetHTTPUrl(); + + if (!StringUtil::EndsWith(http_url, "/")) { + http_url += "/"; + } + + FileOpenerInfo info; + info.file_path = directory; + + // Create a temporary handle for the HEAD operation + OpenFileInfo file_info; + file_info.path = directory; + auto handle = CreateHandle(file_info, FileOpenFlags::FILE_FLAGS_READ, opener); + handle->Initialize(opener); + + HTTPHeaders headers; + auto response = HeadRequest(*handle, http_url, headers); + + return response->status == HTTPStatusCode::OK_200 || response->status == HTTPStatusCode::NoContent_204; +} + +void WebDAVFileSystem::Write(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) { + auto &wfh = handle.Cast(); + auto parsed_url = ParseUrl(wfh.path); + string http_url = parsed_url.GetHTTPUrl(); + + HTTPHeaders headers; + auto response = PutRequest(handle, http_url, headers, static_cast(buffer), nr_bytes, ""); + + if (response->status != HTTPStatusCode::OK_200 && response->status != HTTPStatusCode::Created_201 && + response->status != HTTPStatusCode::NoContent_204) { + throw IOException("Failed to write to file %s: HTTP %d", wfh.path, static_cast(response->status)); + } + + wfh.file_offset += nr_bytes; +} + +void WebDAVFileSystem::FileSync(FileHandle &handle) { + // WebDAV PUT is synchronous, so no additional sync needed +} + +// Helper function to parse XML and extract file paths from PROPFIND response +static vector ParsePropfindResponse(const string &xml_response, const string &base_path) { + vector result; + + // Simple XML parsing - look for or tags + // WebDAV PROPFIND responses contain elements with child elements + size_t pos = 0; + while ((pos = xml_response.find("", pos)) != string::npos || + (pos = xml_response.find("", pos)) != string::npos) { + + string tag_open = xml_response.substr(pos, 8) == "" ? "" : ""; + string tag_close = tag_open == "" ? "" : ""; + + size_t start = pos + tag_open.length(); + size_t end = xml_response.find(tag_close, start); + + if (end == string::npos) { + break; + } + + string href = xml_response.substr(start, end - start); + + // URL decode the href + string decoded_href; + for (size_t i = 0; i < href.length(); i++) { + if (href[i] == '%' && i + 2 < href.length()) { + string hex = href.substr(i + 1, 2); + char ch = static_cast(std::stoi(hex, nullptr, 16)); + decoded_href += ch; + i += 2; + } else { + decoded_href += href[i]; + } + } + + // Skip the directory itself (entries ending with /) + if (!StringUtil::EndsWith(decoded_href, "/")) { + // Extract just the path portion (remove any host/port prefix) + // WebDAV servers often return absolute paths like /path/to/file + OpenFileInfo info; + info.path = decoded_href; + result.push_back(info); + } + + pos = end + tag_close.length(); + } + + return result; +} + +// Pattern matching helper (similar to S3) +static bool Match(vector::const_iterator key, vector::const_iterator key_end, + vector::const_iterator pattern, vector::const_iterator pattern_end) { + + while (key != key_end && pattern != pattern_end) { + if (*pattern == "**") { + if (std::next(pattern) == pattern_end) { + return true; + } + while (key != key_end) { + if (Match(key, key_end, std::next(pattern), pattern_end)) { + return true; + } + key++; + } + return false; + } + if (!Glob(key->data(), key->length(), pattern->data(), pattern->length())) { + return false; + } + key++; + pattern++; + } + return key == key_end && pattern == pattern_end; +} + +vector WebDAVFileSystem::Glob(const string &glob_pattern, FileOpener *opener) { + if (!opener) { + // Without an opener, we can't authenticate, so just return the pattern + return {glob_pattern}; + } + + // Parse the WebDAV URL + auto parsed_url = ParseUrl(glob_pattern); + string path = parsed_url.path; + + // Find the first wildcard character + auto first_wildcard_pos = path.find_first_of("*[\\"); + if (first_wildcard_pos == string::npos) { + // No wildcards, return as-is + return {glob_pattern}; + } + + // Extract the shared prefix path (up to the last '/' before the wildcard) + auto last_slash_before_wildcard = path.rfind('/', first_wildcard_pos); + string prefix_path; + if (last_slash_before_wildcard != string::npos) { + prefix_path = path.substr(0, last_slash_before_wildcard + 1); + } else { + prefix_path = "/"; + } + + // Construct the base URL for listing + string list_url_pattern = parsed_url.http_proto + "://" + parsed_url.host + prefix_path; + + // Create a file handle for the PROPFIND request + // Use a non-wildcard path to avoid recursive file opening + FileOpenerInfo info; + string non_wildcard_path; + if (StringUtil::StartsWith(glob_pattern, "storagebox://")) { + // Extract the username from the original pattern + string remainder = glob_pattern.substr(13); + auto slash_pos = remainder.find('/'); + string username = remainder.substr(0, slash_pos); + non_wildcard_path = "storagebox://" + username + prefix_path; + } else if (StringUtil::StartsWith(glob_pattern, "webdav://")) { + non_wildcard_path = "webdav://" + parsed_url.host + prefix_path; + } else if (StringUtil::StartsWith(glob_pattern, "webdavs://")) { + non_wildcard_path = "webdavs://" + parsed_url.host + prefix_path; + } else { + non_wildcard_path = parsed_url.http_proto + "://" + parsed_url.host + prefix_path; + } + + info.file_path = non_wildcard_path; + + OpenFileInfo file_info; + file_info.path = non_wildcard_path; + + unique_ptr handle; + try { + auto base_handle = CreateHandle(file_info, FileOpenFlags::FILE_FLAGS_READ, opener); + handle = unique_ptr_cast(std::move(base_handle)); + handle->Initialize(opener); + } catch (HTTPException &e) { + // If we can't create a handle, return empty result + return {}; + } + + // Make PROPFIND request to list files + // Note: We use depth=1 and recursively explore subdirectories + HTTPHeaders headers; + auto response = PropfindRequest(*handle, list_url_pattern, headers, 1); + + // WebDAV PROPFIND should return 207 Multi-Status + // Some servers might return 200 OK + if (!response || + (response->status != HTTPStatusCode::MultiStatus_207 && response->status != HTTPStatusCode::OK_200)) { + // PROPFIND failed, return empty result + return {}; + } + + // Check if we got any response body + if (response->body.empty()) { + return {}; + } + + // Parse the XML response + auto files = ParsePropfindResponse(response->body, prefix_path); + string response_body = response->body; + + // For depth=1, we need to recursively explore subdirectories + // Collect all subdirectories from the response + vector subdirs; + size_t pos = 0; + while ((pos = response_body.find("", pos)) != string::npos || + (pos = response_body.find("", pos)) != string::npos) { + + string tag_open = response_body.substr(pos, 8) == "" ? "" : ""; + string tag_close = tag_open == "" ? "" : ""; + + size_t start = pos + tag_open.length(); + size_t end = response_body.find(tag_close, start); + + if (end == string::npos) { + break; + } + + string href = response_body.substr(start, end - start); + + // URL decode + string decoded_href; + for (size_t i = 0; i < href.length(); i++) { + if (href[i] == '%' && i + 2 < href.length()) { + string hex = href.substr(i + 1, 2); + char ch = static_cast(std::stoi(hex, nullptr, 16)); + decoded_href += ch; + i += 2; + } else { + decoded_href += href[i]; + } + } + + // This is a directory if it ends with / + if (StringUtil::EndsWith(decoded_href, "/") && decoded_href != prefix_path) { + string subdir_url = parsed_url.http_proto + "://" + parsed_url.host + decoded_href; + subdirs.push_back(subdir_url); + } + + pos = end + tag_close.length(); + } + + // Recursively list subdirectories + for (const auto &subdir_url : subdirs) { + auto subdir_response = PropfindRequest(*handle, subdir_url, headers, 1); + if (subdir_response && (subdir_response->status == HTTPStatusCode::MultiStatus_207 || + subdir_response->status == HTTPStatusCode::OK_200)) { + auto subdir_files = ParsePropfindResponse(subdir_response->body, prefix_path); + files.insert(files.end(), subdir_files.begin(), subdir_files.end()); + } + } + + // Match the pattern against the file paths + vector pattern_splits = StringUtil::Split(path, "/"); + vector result; + + for (auto &file_info : files) { + // Extract the path component from the href + string file_path = file_info.path; + + // Remove any leading protocol/host if present + size_t path_start = file_path.find(parsed_url.host); + if (path_start != string::npos) { + file_path = file_path.substr(path_start + parsed_url.host.length()); + } + + vector key_splits = StringUtil::Split(file_path, "/"); + bool is_match = Match(key_splits.begin(), key_splits.end(), pattern_splits.begin(), pattern_splits.end()); + + if (is_match) { + // Reconstruct the full URL with the original protocol + string full_url; + if (StringUtil::StartsWith(glob_pattern, "storagebox://")) { + // Extract the username from the original pattern + string remainder = glob_pattern.substr(13); + auto slash_pos = remainder.find('/'); + string username = remainder.substr(0, slash_pos); + full_url = "storagebox://" + username + file_path; + } else if (StringUtil::StartsWith(glob_pattern, "webdav://")) { + full_url = "webdav://" + parsed_url.host + file_path; + } else if (StringUtil::StartsWith(glob_pattern, "webdavs://")) { + full_url = "webdavs://" + parsed_url.host + file_path; + } else { + full_url = parsed_url.http_proto + "://" + parsed_url.host + file_path; + } + + file_info.path = full_url; + result.push_back(file_info); + } + } + + return result; +} + +bool WebDAVFileSystem::ListFiles(const string &directory, const std::function &callback, + FileOpener *opener) { + string trimmed_dir = directory; + // Remove trailing slash if present + if (StringUtil::EndsWith(trimmed_dir, "/")) { + trimmed_dir = trimmed_dir.substr(0, trimmed_dir.length() - 1); + } + + // Use Glob with ** pattern to list all files recursively + auto glob_res = Glob(trimmed_dir + "/**", opener); + + if (glob_res.empty()) { + return false; + } + + for (const auto &file : glob_res) { + callback(file.path, false); + } + + return true; +} + +HTTPException WebDAVFileSystem::GetHTTPError(FileHandle &, const HTTPResponse &response, const string &url) { + auto status_message = HTTPUtil::GetStatusMessage(response.status); + string error = "WebDAV error on '" + url + "' (HTTP " + to_string(static_cast(response.status)) + " " + + status_message + ")"; + return HTTPException(response, error); +} + +} // namespace duckdb diff --git a/test/sql/secrets/create_secret_webdav.test b/test/sql/secrets/create_secret_webdav.test new file mode 100644 index 0000000..258bd32 --- /dev/null +++ b/test/sql/secrets/create_secret_webdav.test @@ -0,0 +1,48 @@ +# name: test/sql/secrets/create_secret_webdav.test +# description: Test WebDAV secret creation +# group: [secrets] + +# Require httpfs extension +require httpfs + +statement ok +CREATE SECRET webdav_test ( + TYPE WEBDAV, + username 'test_user', + password 'test_password', + SCOPE 'webdav://example.com' +); + +# Verify the secret exists +query I +SELECT count(*) FROM duckdb_secrets() WHERE name = 'webdav_test'; +---- +1 + +# Verify secret type +query I +SELECT type FROM duckdb_secrets() WHERE name = 'webdav_test'; +---- +webdav + +# Test storagebox:// scope matching +statement ok +CREATE SECRET storagebox_test ( + TYPE WEBDAV, + username 'u123456', + password 'secret_password', + SCOPE 'storagebox://u123456' +); + +# Verify storagebox secret exists +query I +SELECT count(*) FROM duckdb_secrets() WHERE name = 'storagebox_test'; +---- +1 + +# Cleanup +statement ok +DROP SECRET webdav_test; + +statement ok +DROP SECRET storagebox_test; diff --git a/test/sql/webdav/README.md b/test/sql/webdav/README.md new file mode 100644 index 0000000..b93717c --- /dev/null +++ b/test/sql/webdav/README.md @@ -0,0 +1,162 @@ +# WebDAV Tests + +This directory contains tests for the WebDAV filesystem implementation in the DuckDB httpfs extension. + +## Test Files + +- `webdav_docker_test.test` - Comprehensive WebDAV functionality test using a Docker test server + +## Running the Tests + +### Prerequisites + +- Docker and Docker Compose installed +- DuckDB httpfs extension built + +### Setup + +1. **Start the WebDAV test server:** + ```bash + ./scripts/run_webdav_test_server.sh + ``` + + This script will: + - Start a bytemark/webdav Docker container + - Create test directory structure with sample CSV files + - Set up test data for reading and globbing tests + - Configure authentication (username: `duckdb_webdav_user`, password: `duckdb_webdav_password`) + +2. **Set environment variables:** + ```bash + source ./scripts/set_webdav_test_server_variables.sh + ``` + + This sets: + - `WEBDAV_TEST_SERVER_AVAILABLE=1` + - `WEBDAV_TEST_USERNAME=duckdb_webdav_user` + - `WEBDAV_TEST_PASSWORD=duckdb_webdav_password` + - `WEBDAV_TEST_ENDPOINT=http://localhost:9100` + - `WEBDAV_TEST_BASE_URL=webdav://localhost:9100` + +3. **Run the tests:** + ```bash + # Run all WebDAV tests with environment variables + WEBDAV_TEST_SERVER_AVAILABLE=1 \ + WEBDAV_TEST_USERNAME=duckdb_webdav_user \ + WEBDAV_TEST_PASSWORD=duckdb_webdav_password \ + WEBDAV_TEST_BASE_URL=webdav://localhost:9100 \ + build/debug/test/unittest test/sql/webdav/*.test + + # Or run a specific test + WEBDAV_TEST_SERVER_AVAILABLE=1 \ + WEBDAV_TEST_USERNAME=duckdb_webdav_user \ + WEBDAV_TEST_PASSWORD=duckdb_webdav_password \ + WEBDAV_TEST_BASE_URL=webdav://localhost:9100 \ + build/debug/test/unittest test/sql/webdav/webdav_docker_test.test + ``` + +4. **Stop the test server (when done):** + ```bash + ./scripts/stop_webdav_test_server.sh + ``` + +## What the Tests Cover + +The `webdav_docker_test.test` file tests the following functionality: + +### Authentication +- Creating WebDAV secrets with username/password +- Verifying secret creation in `duckdb_secrets()` + +### Reading Operations +- Reading simple text files from WebDAV +- Reading CSV files from WebDAV +- Reading CSV files from subdirectories + +### Globbing and Pattern Matching +- Non-recursive globbing with `*.csv` patterns +- Recursive globbing with `**/*.csv` patterns +- Hive-style partitioning with glob patterns +- Filtering data from hive-partitioned directories +- Complex glob patterns like `subdir*/*.csv` + +### Writing Operations +- Writing CSV files to WebDAV +- Writing to subdirectories +- Writing multiple files and reading them back with glob patterns + +### Integration Tests +- Reading all files after write operations +- Verifying file counts with glob patterns +- End-to-end workflow: create table → write to WebDAV → read back + +## Test Data Structure + +The test server creates the following directory structure: + +``` +/data/ +├── hello.txt # Simple text file +├── test-dir/ +│ ├── test1.csv # Sample CSV with 2 rows +│ ├── subdir1/ +│ │ └── test2.csv # Sample CSV with 2 rows +│ └── subdir2/ +│ └── test3.csv # Sample CSV with 2 rows +└── glob-test/ + ├── year=2023/ + │ └── data.csv # Hive-partitioned data + └── year=2024/ + └── data.csv # Hive-partitioned data +``` + +Additional files are created during the test execution to verify write operations. + +## Docker Container Details + +- **Image**: `bytemark/webdav` +- **Port**: 9100 (maps to container port 80) +- **Authentication**: Basic HTTP authentication +- **Data Storage**: Ephemeral (inside container only, automatically cleaned on restart) +- **Note**: Port 9100 is used to avoid conflicts with other services + +## Troubleshooting + +### Container won't start +```bash +# Check if port 9100 is already in use +lsof -i :9100 + +# Check Docker logs +docker logs duckdb-webdav-webdav-1 +``` + +### Tests fail with authentication errors +Ensure the environment variables are set correctly: +```bash +source ./scripts/set_webdav_test_server_variables.sh +env | grep WEBDAV +``` + +### Tests fail with connection errors +Verify the WebDAV server is running: +```bash +curl -u duckdb_webdav_user:duckdb_webdav_password http://localhost:9100/ +``` + +### Clean slate restart +```bash +./scripts/stop_webdav_test_server.sh +./scripts/run_webdav_test_server.sh +source ./scripts/set_webdav_test_server_variables.sh +``` + +## Notes + +- The test server runs on `localhost:9100`, ensure this port is available +- Test data is stored ephemerally inside the container and is automatically cleaned up when the container is stopped +- The tests use the `webdav://` protocol scheme, which is handled by the WebDAV filesystem implementation +- All write operations during tests create actual files on the WebDAV server that can be inspected during the test run +- **Configuration**: The base URL is configurable via the `WEBDAV_TEST_BASE_URL` environment variable, allowing you to test against different WebDAV servers or ports +- **Colima users**: Port 9100 is used because Colima (Lima) requires specific port forwarding configuration. If using Docker Desktop, you can change the port in `scripts/webdav.yml` if needed +- **No cleanup needed**: Since no volumes are used, test data is automatically cleaned when the container restarts diff --git a/test/sql/webdav/webdav_docker_test.test b/test/sql/webdav/webdav_docker_test.test new file mode 100644 index 0000000..24854e6 --- /dev/null +++ b/test/sql/webdav/webdav_docker_test.test @@ -0,0 +1,159 @@ +# name: test/sql/webdav/webdav_docker_test.test +# description: Test WebDAV filesystem operations with Docker test server +# group: [webdav] + +require httpfs + +require-env WEBDAV_TEST_SERVER_AVAILABLE 1 + +require-env WEBDAV_TEST_USERNAME + +require-env WEBDAV_TEST_PASSWORD + +require-env WEBDAV_TEST_BASE_URL + +# Override default behaviour of skipping HTTP errors +set ignore_error_messages + +# Test 1: Create a WebDAV secret for authentication +statement ok +CREATE SECRET webdav_docker_test ( + TYPE WEBDAV, + USERNAME '${WEBDAV_TEST_USERNAME}', + PASSWORD '${WEBDAV_TEST_PASSWORD}', + SCOPE '${WEBDAV_TEST_BASE_URL}' +); + +# Verify the secret was created +query I +SELECT count(*) FROM duckdb_secrets() WHERE name = 'webdav_docker_test'; +---- +1 + +# Test 2: Read a simple text file from WebDAV +query I +SELECT content FROM read_text('${WEBDAV_TEST_BASE_URL}/hello.txt'); +---- +Hello from WebDAV + +# Test 3: Read CSV file from WebDAV +query III +SELECT * FROM read_csv_auto('${WEBDAV_TEST_BASE_URL}/test-dir/test1.csv'); +---- +1 Alice 100 +2 Bob 200 + +# Test 4: Read CSV files from subdirectory +query III +SELECT * FROM read_csv_auto('${WEBDAV_TEST_BASE_URL}/test-dir/subdir1/test2.csv'); +---- +3 Charlie 300 +4 Diana 400 + +# Test 5: Test globbing - read all CSV files in test-dir (non-recursive) +query III +SELECT * FROM read_csv_auto('${WEBDAV_TEST_BASE_URL}/test-dir/*.csv') ORDER BY id; +---- +1 Alice 100 +2 Bob 200 + +# Test 6: Test recursive globbing - read all CSV files in test-dir and subdirectories +query III +SELECT * FROM read_csv_auto('${WEBDAV_TEST_BASE_URL}/test-dir/**/*.csv') ORDER BY id; +---- +1 Alice 100 +2 Bob 200 +3 Charlie 300 +4 Diana 400 +5 Eve 500 +6 Frank 600 + +# Test 7: Test globbing with hive-style partitioning +query III +SELECT * FROM read_csv_auto('${WEBDAV_TEST_BASE_URL}/glob-test/*/*.csv', HIVE_PARTITIONING=1) ORDER BY id; +---- +1 2023 test2023 +2 2024 test2024 + +# Test 8: Test globbing with hive-style partitioning and filtering +query III +SELECT * FROM read_csv_auto('${WEBDAV_TEST_BASE_URL}/glob-test/*/*.csv', HIVE_PARTITIONING=1) WHERE year=2024; +---- +2 2024 test2024 + +# Test 9: Write a new CSV file to WebDAV +statement ok +CREATE TABLE test_write AS SELECT 7 as id, 'George' as name, 700 as value; + +statement ok +COPY test_write TO '${WEBDAV_TEST_BASE_URL}/test-dir/test_written.csv' (HEADER, DELIMITER ','); + +# Verify the written file can be read back +query III +SELECT * FROM read_csv_auto('${WEBDAV_TEST_BASE_URL}/test-dir/test_written.csv'); +---- +7 George 700 + +# Test 10: Write multiple files with glob pattern +statement ok +CREATE TABLE test_batch1 AS SELECT 10 as id, 'Harry' as name, 1000 as value; + +statement ok +CREATE TABLE test_batch2 AS SELECT 20 as id, 'Iris' as name, 2000 as value; + +statement ok +COPY test_batch1 TO '${WEBDAV_TEST_BASE_URL}/test-dir/upload-dir/batch1.csv' (HEADER, DELIMITER ','); + +statement ok +COPY test_batch2 TO '${WEBDAV_TEST_BASE_URL}/test-dir/upload-dir/batch2.csv' (HEADER, DELIMITER ','); + +# Verify both files can be read with glob pattern +query III +SELECT * FROM read_csv_auto('${WEBDAV_TEST_BASE_URL}/test-dir/upload-dir/batch*.csv') ORDER BY id; +---- +10 Harry 1000 +20 Iris 2000 + +# Test 11: Test reading from subdirectories with complex glob patterns +query III +SELECT * FROM read_csv_auto('${WEBDAV_TEST_BASE_URL}/test-dir/subdir*/*.csv') ORDER BY id; +---- +3 Charlie 300 +4 Diana 400 +5 Eve 500 +6 Frank 600 + +# Test 12: Write to a subdirectory +statement ok +CREATE TABLE test_subdir AS SELECT 99 as id, 'Zara' as name, 9900 as value; + +statement ok +COPY test_subdir TO '${WEBDAV_TEST_BASE_URL}/test-dir/subdir1/test_in_subdir.csv' (HEADER, DELIMITER ','); + +# Verify the file in subdirectory +query III +SELECT * FROM read_csv_auto('${WEBDAV_TEST_BASE_URL}/test-dir/subdir1/test_in_subdir.csv'); +---- +99 Zara 9900 + +# Test 13: Test glob with all files including the newly written ones +query I +SELECT count(*) FROM read_csv_auto('${WEBDAV_TEST_BASE_URL}/test-dir/**/*.csv'); +---- +10 + +# Cleanup +statement ok +DROP TABLE test_write; + +statement ok +DROP TABLE test_batch1; + +statement ok +DROP TABLE test_batch2; + +statement ok +DROP TABLE test_subdir; + +statement ok +DROP SECRET webdav_docker_test;