From 8d8999d2e7ea84b62dd075f975a39b4156d71d82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Brynjar=20Magn=C3=BAsson?= Date: Thu, 9 Oct 2025 21:01:18 +0000 Subject: [PATCH 1/2] Fix S3 COPY CSV FILE_SIZE_BYTES --- src/s3fs.cpp | 1 + test/sql/copy/s3/csv_s3_file_size_bytes.test | 46 ++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 test/sql/copy/s3/csv_s3_file_size_bytes.test diff --git a/src/s3fs.cpp b/src/s3fs.cpp index 2a92f22..88f7575 100644 --- a/src/s3fs.cpp +++ b/src/s3fs.cpp @@ -1003,6 +1003,7 @@ void S3FileSystem::Write(FileHandle &handle, void *buffer, int64_t nr_bytes, idx FlushBuffer(s3fh, write_buffer); } s3fh.file_offset += bytes_to_write; + s3fh.length += bytes_to_write; bytes_written += bytes_to_write; } diff --git a/test/sql/copy/s3/csv_s3_file_size_bytes.test b/test/sql/copy/s3/csv_s3_file_size_bytes.test new file mode 100644 index 0000000..b1c6415 --- /dev/null +++ b/test/sql/copy/s3/csv_s3_file_size_bytes.test @@ -0,0 +1,46 @@ +# name: test/sql/copy/s3/csv_s3_file_size_bytes.test +# description: Test FILE_SIZE_BYTES parameter for csv copy over s3 +# group: [s3] + +require httpfs + +require-env S3_TEST_SERVER_AVAILABLE 1 + +# Require that these environment variables are also set + +require-env AWS_DEFAULT_REGION + +require-env AWS_ACCESS_KEY_ID + +require-env AWS_SECRET_ACCESS_KEY + +require-env DUCKDB_S3_ENDPOINT + +require-env DUCKDB_S3_USE_SSL + +# override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues +set ignore_error_messages + +# different vector sizes result in different number of files +require no_vector_verification + +statement ok +CREATE TABLE bigdata AS SELECT i AS col_a, i AS col_b FROM range(0,10000) tbl(i); + +statement ok +set threads=1 + +# parameter in bytes +statement ok +COPY (FROM bigdata) TO 's3://test-bucket/file_size_bytes_csv1' (FORMAT CSV, FILE_SIZE_BYTES 1000); + +query I +SELECT COUNT(*) FROM read_csv_auto('s3://test-bucket/file_size_bytes_csv1/*.csv') +---- +10000 + +# should lead to 3 files +query I +SELECT count(*) FROM glob('s3://test-bucket/file_size_bytes_csv1/*.csv') +---- +3 \ No newline at end of file From 7262e98627abf90d2c37f8f2edaee6f176618456 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Brynjar=20Magn=C3=BAsson?= Date: Thu, 9 Oct 2025 21:02:58 +0000 Subject: [PATCH 2/2] Fix S3 COPY CSV FILE_SIZE_BYTES --- test/sql/copy/s3/csv_s3_file_size_bytes.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/sql/copy/s3/csv_s3_file_size_bytes.test b/test/sql/copy/s3/csv_s3_file_size_bytes.test index b1c6415..cd730f5 100644 --- a/test/sql/copy/s3/csv_s3_file_size_bytes.test +++ b/test/sql/copy/s3/csv_s3_file_size_bytes.test @@ -43,4 +43,4 @@ SELECT COUNT(*) FROM read_csv_auto('s3://test-bucket/file_size_bytes_csv1/*.csv' query I SELECT count(*) FROM glob('s3://test-bucket/file_size_bytes_csv1/*.csv') ---- -3 \ No newline at end of file +3