From ee6279f08afb2328584b40772caf6c961973f17c Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Mon, 10 Nov 2025 10:45:23 +0100 Subject: [PATCH 1/6] Add interval array --- CMakeLists.txt | 3 +- .../deserialize_interval_array.hpp | 71 +++++++++++++++++++ src/deserialize.cpp | 48 ++++++++++++- tests/test_de_serialization_with_files.cpp | 1 + 4 files changed, 121 insertions(+), 2 deletions(-) create mode 100644 include/sparrow_ipc/deserialize_interval_array.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 32cd1bd1..0c3cca4d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -120,10 +120,11 @@ set(SPARROW_IPC_HEADERS ${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/arrow_interface/arrow_schema/private_data.hpp ${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/chunk_memory_output_stream.hpp ${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/chunk_memory_serializer.hpp + ${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/compression.hpp ${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/config/config.hpp ${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/config/sparrow_ipc_version.hpp - ${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/compression.hpp ${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_fixedsizebinary_array.hpp + ${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_interval_array.hpp ${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_primitive_array.hpp ${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_utils.hpp ${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_variable_size_binary_array.hpp diff --git a/include/sparrow_ipc/deserialize_interval_array.hpp b/include/sparrow_ipc/deserialize_interval_array.hpp new file mode 100644 index 00000000..b8fc7252 --- /dev/null +++ b/include/sparrow_ipc/deserialize_interval_array.hpp @@ -0,0 +1,71 @@ +#pragma once + +#include +#include + +#include +#include + +#include "Message_generated.h" +#include "sparrow_ipc/arrow_interface/arrow_array.hpp" +#include "sparrow_ipc/arrow_interface/arrow_schema.hpp" +#include "sparrow_ipc/deserialize_utils.hpp" + +namespace sparrow_ipc +{ + template + [[nodiscard]] sparrow::interval_array deserialize_non_owning_interval_array( + const org::apache::arrow::flatbuf::RecordBatch& record_batch, + std::span body, + std::string_view name, + const std::optional>& metadata, + size_t& buffer_index + ) + { + const std::string_view format = data_type_to_format( + sparrow::detail::get_data_type_from_array>::get() + ); + ArrowSchema schema = make_non_owning_arrow_schema( + format, + name.data(), + metadata, + std::nullopt, + 0, + nullptr, + nullptr + ); + + const auto compression = record_batch.compression(); + std::vector buffers; + + auto validity_buffer_span = utils::get_buffer(record_batch, body, buffer_index); + auto data_buffer_span = utils::get_buffer(record_batch, body, buffer_index); + + if (compression) + { + buffers.push_back(utils::get_decompressed_buffer(validity_buffer_span, compression)); + buffers.push_back(utils::get_decompressed_buffer(data_buffer_span, compression)); + } + else + { + buffers.emplace_back(validity_buffer_span); + buffers.emplace_back(data_buffer_span); + } + + // TODO bitmap_ptr is not used anymore... Leave it for now, and remove later if no need confirmed + const auto [bitmap_ptr, null_count] = utils::get_bitmap_pointer_and_null_count(validity_buffer_span, record_batch.length()); + + ArrowArray array = make_arrow_array( + record_batch.length(), + null_count, + 0, + 0, + nullptr, + nullptr, + std::move(buffers) + ); + + sparrow::arrow_proxy ap{std::move(array), std::move(schema)}; + return sparrow::interval_array{std::move(ap)}; + } +} diff --git a/src/deserialize.cpp b/src/deserialize.cpp index 92063de1..d2e0b006 100644 --- a/src/deserialize.cpp +++ b/src/deserialize.cpp @@ -3,6 +3,7 @@ #include #include "sparrow_ipc/deserialize_fixedsizebinary_array.hpp" +#include "sparrow_ipc/deserialize_interval_array.hpp" #include "sparrow_ipc/deserialize_primitive_array.hpp" #include "sparrow_ipc/deserialize_variable_size_binary_array.hpp" #include "sparrow_ipc/encapsulated_message.hpp" @@ -205,6 +206,50 @@ namespace sparrow_ipc ) ); break; + case org::apache::arrow::flatbuf::Type::Interval: + { + const auto interval_type = field->type_as_Interval(); + org::apache::arrow::flatbuf::IntervalUnit interval_unit = interval_type->unit(); + switch (interval_unit) + { + case org::apache::arrow::flatbuf::IntervalUnit::YEAR_MONTH: + arrays.emplace_back( + deserialize_non_owning_interval_array( + record_batch, + encapsulated_message.body(), + name, + metadata, + buffer_index + ) + ); + break; + case org::apache::arrow::flatbuf::IntervalUnit::DAY_TIME: + arrays.emplace_back( + deserialize_non_owning_interval_array( + record_batch, + encapsulated_message.body(), + name, + metadata, + buffer_index + ) + ); + break; + case org::apache::arrow::flatbuf::IntervalUnit::MONTH_DAY_NANO: + arrays.emplace_back( + deserialize_non_owning_interval_array( + record_batch, + encapsulated_message.body(), + name, + metadata, + buffer_index + ) + ); + break; + default: + throw std::runtime_error("Unsupported interval unit."); + } + } + break; default: throw std::runtime_error("Unsupported type."); } @@ -222,7 +267,8 @@ namespace sparrow_ipc std::vector>> fields_metadata; do { - // Check for end-of-stream marker here as data could contain only that (if no record batches present/written) + // Check for end-of-stream marker here as data could contain only that (if no record batches + // present/written) if (data.size() >= 8 && is_end_of_stream(data.subspan(0, 8))) { break; diff --git a/tests/test_de_serialization_with_files.cpp b/tests/test_de_serialization_with_files.cpp index 8cb74e8f..0805508c 100644 --- a/tests/test_de_serialization_with_files.cpp +++ b/tests/test_de_serialization_with_files.cpp @@ -33,6 +33,7 @@ const std::vector files_paths_to_test = { tests_resources_files_path / "generated_large_binary", tests_resources_files_path / "generated_binary_zerolength", tests_resources_files_path / "generated_binary_no_batches", + tests_resources_files_path / "generated_interval", }; const std::vector files_paths_to_test_with_lz4_compression = { From a3ea35309fb8c92302427b6ae1aacfaa0aaefaa5 Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Mon, 10 Nov 2025 10:52:31 +0100 Subject: [PATCH 2/6] wip --- src/deserialize.cpp | 97 +++++++++++++++++++++++++++------------------ 1 file changed, 59 insertions(+), 38 deletions(-) diff --git a/src/deserialize.cpp b/src/deserialize.cpp index d2e0b006..8fa63635 100644 --- a/src/deserialize.cpp +++ b/src/deserialize.cpp @@ -12,11 +12,23 @@ namespace sparrow_ipc { + namespace + { + // Integer bit width constants + constexpr int32_t BIT_WIDTH_8 = 8; + constexpr int32_t BIT_WIDTH_16 = 16; + constexpr int32_t BIT_WIDTH_32 = 32; + constexpr int32_t BIT_WIDTH_64 = 64; + + // End-of-stream marker size in bytes + constexpr size_t END_OF_STREAM_MARKER_SIZE = 8; + } const org::apache::arrow::flatbuf::RecordBatch* deserialize_record_batch_message(std::span data, size_t& current_offset) { current_offset += sizeof(uint32_t); - const auto batch_message = org::apache::arrow::flatbuf::GetMessage(data.data() + current_offset); + const auto message_data = data.subspan(current_offset); + const auto* batch_message = org::apache::arrow::flatbuf::GetMessage(message_data.data()); if (batch_message->header_type() != org::apache::arrow::flatbuf::MessageHeader::RecordBatch) { throw std::runtime_error("Expected RecordBatch message, but got a different type."); @@ -29,21 +41,21 @@ namespace sparrow_ipc * * This function processes each field in the schema and deserializes the corresponding * data from the RecordBatch into sparrow::array objects. It handles various Arrow data - * types including primitive types (bool, integers, floating point), binary data, and - * string data with their respective size variants. + * types including primitive types (bool, integers, floating point), binary data, string + * data, fixed-size binary data, and interval types. * * @param record_batch The Apache Arrow FlatBuffer RecordBatch containing the serialized data * @param schema The Apache Arrow FlatBuffer Schema defining the structure and types of the data * @param encapsulated_message The message containing the binary data buffers - * @param field_metadata Metadata for each field + * @param field_metadata Metadata associated with each field in the schema * * @return std::vector A vector of deserialized arrays, one for each field in the schema * - * @throws std::runtime_error If an unsupported data type, integer bit width, or floating point precision - * is encountered + * @throws std::runtime_error If an unsupported data type, integer bit width, floating point precision, + * or interval unit is encountered * - * The function maintains a buffer index that is incremented as it processes each field - * to correctly map data buffers to their corresponding arrays. + * @note The function maintains a buffer index that is incremented as it processes each field + * to correctly map data buffers to their corresponding arrays. */ std::vector get_arrays_from_record_batch( const org::apache::arrow::flatbuf::RecordBatch& record_batch, @@ -70,7 +82,7 @@ namespace sparrow_ipc const std::string name = field->name() == nullptr ? "" : field->name()->str(); const bool nullable = field->nullable(); const auto field_type = field->type_type(); - // TODO rename all the deserialize_non_owning... fcts since this is not correct anymore + const auto deserialize_non_owning_primitive_array_lambda = [&]() { return deserialize_non_owning_primitive_array( @@ -91,7 +103,7 @@ namespace sparrow_ipc break; case org::apache::arrow::flatbuf::Type::Int: { - const auto int_type = field->type_as_Int(); + const auto* int_type = field->type_as_Int(); const auto bit_width = int_type->bitWidth(); const bool is_signed = int_type->is_signed(); @@ -100,11 +112,11 @@ namespace sparrow_ipc switch (bit_width) { // clang-format off - case 8: arrays.emplace_back(deserialize_non_owning_primitive_array_lambda.template operator()()); break; - case 16: arrays.emplace_back(deserialize_non_owning_primitive_array_lambda.template operator()()); break; - case 32: arrays.emplace_back(deserialize_non_owning_primitive_array_lambda.template operator()()); break; - case 64: arrays.emplace_back(deserialize_non_owning_primitive_array_lambda.template operator()()); break; - default: throw std::runtime_error("Unsupported integer bit width."); + case BIT_WIDTH_8: arrays.emplace_back(deserialize_non_owning_primitive_array_lambda.template operator()()); break; + case BIT_WIDTH_16: arrays.emplace_back(deserialize_non_owning_primitive_array_lambda.template operator()()); break; + case BIT_WIDTH_32: arrays.emplace_back(deserialize_non_owning_primitive_array_lambda.template operator()()); break; + case BIT_WIDTH_64: arrays.emplace_back(deserialize_non_owning_primitive_array_lambda.template operator()()); break; + default: throw std::runtime_error("Unsupported integer bit width: " + std::to_string(bit_width)); // clang-format on } } @@ -113,11 +125,11 @@ namespace sparrow_ipc switch (bit_width) { // clang-format off - case 8: arrays.emplace_back(deserialize_non_owning_primitive_array_lambda.template operator()()); break; - case 16: arrays.emplace_back(deserialize_non_owning_primitive_array_lambda.template operator()()); break; - case 32: arrays.emplace_back(deserialize_non_owning_primitive_array_lambda.template operator()()); break; - case 64: arrays.emplace_back(deserialize_non_owning_primitive_array_lambda.template operator()()); break; - default: throw std::runtime_error("Unsupported integer bit width."); + case BIT_WIDTH_8: arrays.emplace_back(deserialize_non_owning_primitive_array_lambda.template operator()()); break; + case BIT_WIDTH_16: arrays.emplace_back(deserialize_non_owning_primitive_array_lambda.template operator()()); break; + case BIT_WIDTH_32: arrays.emplace_back(deserialize_non_owning_primitive_array_lambda.template operator()()); break; + case BIT_WIDTH_64: arrays.emplace_back(deserialize_non_owning_primitive_array_lambda.template operator()()); break; + default: throw std::runtime_error("Unsupported integer bit width: " + std::to_string(bit_width)); // clang-format on } } @@ -125,7 +137,7 @@ namespace sparrow_ipc break; case org::apache::arrow::flatbuf::Type::FloatingPoint: { - const auto float_type = field->type_as_FloatingPoint(); + const auto* float_type = field->type_as_FloatingPoint(); switch (float_type->precision()) { // clang-format off @@ -139,14 +151,17 @@ namespace sparrow_ipc arrays.emplace_back(deserialize_non_owning_primitive_array_lambda.template operator()()); break; default: - throw std::runtime_error("Unsupported floating point precision."); + throw std::runtime_error( + "Unsupported floating point precision: " + + std::to_string(static_cast(float_type->precision())) + ); // clang-format on } break; } case org::apache::arrow::flatbuf::Type::FixedSizeBinary: { - const auto fixed_size_binary_field = field->type_as_FixedSizeBinary(); + const auto* fixed_size_binary_field = field->type_as_FixedSizeBinary(); arrays.emplace_back(deserialize_non_owning_fixedwidthbinary( record_batch, encapsulated_message.body(), @@ -208,8 +223,8 @@ namespace sparrow_ipc break; case org::apache::arrow::flatbuf::Type::Interval: { - const auto interval_type = field->type_as_Interval(); - org::apache::arrow::flatbuf::IntervalUnit interval_unit = interval_type->unit(); + const auto* interval_type = field->type_as_Interval(); + const org::apache::arrow::flatbuf::IntervalUnit interval_unit = interval_type->unit(); switch (interval_unit) { case org::apache::arrow::flatbuf::IntervalUnit::YEAR_MONTH: @@ -246,12 +261,18 @@ namespace sparrow_ipc ); break; default: - throw std::runtime_error("Unsupported interval unit."); + throw std::runtime_error( + "Unsupported interval unit: " + + std::to_string(static_cast(interval_unit)) + ); } } break; default: - throw std::runtime_error("Unsupported type."); + throw std::runtime_error( + "Unsupported field type: " + std::to_string(static_cast(field_type)) + + " for field '" + name + "'" + ); } } return arrays; @@ -265,11 +286,12 @@ namespace sparrow_ipc std::vector fields_nullable; std::vector field_types; std::vector>> fields_metadata; - do + + while (!data.empty()) { - // Check for end-of-stream marker here as data could contain only that (if no record batches - // present/written) - if (data.size() >= 8 && is_end_of_stream(data.subspan(0, 8))) + // Check for end-of-stream marker + if (data.size() >= END_OF_STREAM_MARKER_SIZE + && is_end_of_stream(data.subspan(0, END_OF_STREAM_MARKER_SIZE))) { break; } @@ -322,12 +344,12 @@ namespace sparrow_ipc { if (schema == nullptr) { - throw std::runtime_error("Schema message is missing."); + throw std::runtime_error("RecordBatch encountered before Schema message."); } - const auto record_batch = message->header_as_RecordBatch(); + const auto* record_batch = message->header_as_RecordBatch(); if (record_batch == nullptr) { - throw std::runtime_error("RecordBatch message is missing."); + throw std::runtime_error("RecordBatch message header is null."); } std::vector arrays = get_arrays_from_record_batch( *record_batch, @@ -335,8 +357,7 @@ namespace sparrow_ipc encapsulated_message, fields_metadata ); - auto names_copy = field_names; // TODO: Remove when issue with the to_vector of - // record_batch is fixed + auto names_copy = field_names; sparrow::record_batch sp_record_batch(std::move(names_copy), std::move(arrays)); record_batches.emplace_back(std::move(sp_record_batch)); } @@ -344,12 +365,12 @@ namespace sparrow_ipc case org::apache::arrow::flatbuf::MessageHeader::Tensor: case org::apache::arrow::flatbuf::MessageHeader::DictionaryBatch: case org::apache::arrow::flatbuf::MessageHeader::SparseTensor: - throw std::runtime_error("Not supported"); + throw std::runtime_error("Unsupported message type: Tensor, DictionaryBatch, or SparseTensor"); default: throw std::runtime_error("Unknown message header type."); } data = rest; - } while (!data.empty()); + } return record_batches; } } From 5378771300ad306beb5ff5ca94fe2e2bfc256502 Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Mon, 8 Dec 2025 13:13:19 +0100 Subject: [PATCH 3/6] wip --- include/sparrow_ipc/deserialize_interval_array.hpp | 11 ++++++++++- src/deserialize.cpp | 3 +++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/include/sparrow_ipc/deserialize_interval_array.hpp b/include/sparrow_ipc/deserialize_interval_array.hpp index b8fc7252..fd09c82c 100644 --- a/include/sparrow_ipc/deserialize_interval_array.hpp +++ b/include/sparrow_ipc/deserialize_interval_array.hpp @@ -19,17 +19,26 @@ namespace sparrow_ipc std::span body, std::string_view name, const std::optional>& metadata, + bool nullable, size_t& buffer_index ) { const std::string_view format = data_type_to_format( sparrow::detail::get_data_type_from_array>::get() ); + + // Set up flags based on nullable + std::optional> flags; + if (nullable) + { + flags = std::unordered_set{sparrow::ArrowFlag::NULLABLE}; + } + ArrowSchema schema = make_non_owning_arrow_schema( format, name.data(), metadata, - std::nullopt, + flags, 0, nullptr, nullptr diff --git a/src/deserialize.cpp b/src/deserialize.cpp index 8fa63635..f9de3c69 100644 --- a/src/deserialize.cpp +++ b/src/deserialize.cpp @@ -234,6 +234,7 @@ namespace sparrow_ipc encapsulated_message.body(), name, metadata, + nullable, buffer_index ) ); @@ -245,6 +246,7 @@ namespace sparrow_ipc encapsulated_message.body(), name, metadata, + nullable, buffer_index ) ); @@ -256,6 +258,7 @@ namespace sparrow_ipc encapsulated_message.body(), name, metadata, + nullable, buffer_index ) ); From 6b65ecc8e2ce442c7d5fcd049501339b225a74de Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Fri, 12 Dec 2025 13:46:37 +0100 Subject: [PATCH 4/6] Avoid code duplication --- .../sparrow_ipc/deserialize_array_impl.hpp | 98 +++++++++++++++++++ .../deserialize_interval_array.hpp | 64 ++---------- .../deserialize_primitive_array.hpp | 62 ++---------- 3 files changed, 112 insertions(+), 112 deletions(-) create mode 100644 include/sparrow_ipc/deserialize_array_impl.hpp diff --git a/include/sparrow_ipc/deserialize_array_impl.hpp b/include/sparrow_ipc/deserialize_array_impl.hpp new file mode 100644 index 00000000..1d359f9f --- /dev/null +++ b/include/sparrow_ipc/deserialize_array_impl.hpp @@ -0,0 +1,98 @@ +#pragma once + +#include +#include +#include + +#include + +#include "Message_generated.h" +#include "sparrow_ipc/arrow_interface/arrow_array.hpp" +#include "sparrow_ipc/arrow_interface/arrow_schema.hpp" +#include "sparrow_ipc/deserialize_utils.hpp" + +namespace sparrow_ipc::detail +{ + /** + * @brief Generic implementation for deserializing non-owning arrays with simple layout. + * + * This function provides the common deserialization logic for array types that have + * a validity buffer and a single data buffer (e.g., primitive_array, interval_array). + * + * @tparam ArrayType The array type template (e.g., sparrow::primitive_array) + * @tparam T The element type + * + * @param record_batch The FlatBuffer RecordBatch containing metadata + * @param body The raw buffer data + * @param name The array column name + * @param metadata Optional metadata pairs + * @param nullable Whether the array is nullable + * @param buffer_index The current buffer index (incremented by this function) + * + * @return The deserialized array of type ArrayType + */ + template class ArrayType, typename T> + [[nodiscard]] ArrayType deserialize_non_owning_simple_array( + const org::apache::arrow::flatbuf::RecordBatch& record_batch, + std::span body, + std::string_view name, + const std::optional>& metadata, + bool nullable, + size_t& buffer_index + ) + { + const std::string_view format = data_type_to_format( + sparrow::detail::get_data_type_from_array>::get() + ); + + // Set up flags based on nullable + std::optional> flags; + if (nullable) + { + flags = std::unordered_set{sparrow::ArrowFlag::NULLABLE}; + } + + ArrowSchema schema = make_non_owning_arrow_schema( + format, + name.data(), + metadata, + flags, + 0, + nullptr, + nullptr + ); + + const auto compression = record_batch.compression(); + std::vector buffers; + + auto validity_buffer_span = utils::get_buffer(record_batch, body, buffer_index); + auto data_buffer_span = utils::get_buffer(record_batch, body, buffer_index); + + if (compression) + { + buffers.push_back(utils::get_decompressed_buffer(validity_buffer_span, compression)); + buffers.push_back(utils::get_decompressed_buffer(data_buffer_span, compression)); + } + else + { + buffers.emplace_back(validity_buffer_span); + buffers.emplace_back(data_buffer_span); + } + + // TODO bitmap_ptr is not used anymore... Leave it for now, and remove later if no need confirmed + const auto [bitmap_ptr, null_count] = utils::get_bitmap_pointer_and_null_count(validity_buffer_span, record_batch.length()); + + ArrowArray array = make_arrow_array( + record_batch.length(), + null_count, + 0, + 0, + nullptr, + nullptr, + std::move(buffers) + ); + + sparrow::arrow_proxy ap{std::move(array), std::move(schema)}; + return ArrayType{std::move(ap)}; + } +} diff --git a/include/sparrow_ipc/deserialize_interval_array.hpp b/include/sparrow_ipc/deserialize_interval_array.hpp index fd09c82c..6cf6b23c 100644 --- a/include/sparrow_ipc/deserialize_interval_array.hpp +++ b/include/sparrow_ipc/deserialize_interval_array.hpp @@ -1,15 +1,10 @@ #pragma once -#include -#include - #include #include #include "Message_generated.h" -#include "sparrow_ipc/arrow_interface/arrow_array.hpp" -#include "sparrow_ipc/arrow_interface/arrow_schema.hpp" -#include "sparrow_ipc/deserialize_utils.hpp" +#include "sparrow_ipc/deserialize_array_impl.hpp" namespace sparrow_ipc { @@ -23,58 +18,13 @@ namespace sparrow_ipc size_t& buffer_index ) { - const std::string_view format = data_type_to_format( - sparrow::detail::get_data_type_from_array>::get() - ); - - // Set up flags based on nullable - std::optional> flags; - if (nullable) - { - flags = std::unordered_set{sparrow::ArrowFlag::NULLABLE}; - } - - ArrowSchema schema = make_non_owning_arrow_schema( - format, - name.data(), + return detail::deserialize_non_owning_simple_array( + record_batch, + body, + name, metadata, - flags, - 0, - nullptr, - nullptr + nullable, + buffer_index ); - - const auto compression = record_batch.compression(); - std::vector buffers; - - auto validity_buffer_span = utils::get_buffer(record_batch, body, buffer_index); - auto data_buffer_span = utils::get_buffer(record_batch, body, buffer_index); - - if (compression) - { - buffers.push_back(utils::get_decompressed_buffer(validity_buffer_span, compression)); - buffers.push_back(utils::get_decompressed_buffer(data_buffer_span, compression)); - } - else - { - buffers.emplace_back(validity_buffer_span); - buffers.emplace_back(data_buffer_span); - } - - // TODO bitmap_ptr is not used anymore... Leave it for now, and remove later if no need confirmed - const auto [bitmap_ptr, null_count] = utils::get_bitmap_pointer_and_null_count(validity_buffer_span, record_batch.length()); - - ArrowArray array = make_arrow_array( - record_batch.length(), - null_count, - 0, - 0, - nullptr, - nullptr, - std::move(buffers) - ); - - sparrow::arrow_proxy ap{std::move(array), std::move(schema)}; - return sparrow::interval_array{std::move(ap)}; } } diff --git a/include/sparrow_ipc/deserialize_primitive_array.hpp b/include/sparrow_ipc/deserialize_primitive_array.hpp index 01949ac8..6904f036 100644 --- a/include/sparrow_ipc/deserialize_primitive_array.hpp +++ b/include/sparrow_ipc/deserialize_primitive_array.hpp @@ -1,16 +1,13 @@ #pragma once #include -#include #include #include #include #include "Message_generated.h" -#include "sparrow_ipc/arrow_interface/arrow_array.hpp" -#include "sparrow_ipc/arrow_interface/arrow_schema.hpp" -#include "sparrow_ipc/deserialize_utils.hpp" +#include "sparrow_ipc/deserialize_array_impl.hpp" namespace sparrow_ipc { @@ -24,58 +21,13 @@ namespace sparrow_ipc size_t& buffer_index ) { - const std::string_view format = data_type_to_format( - sparrow::detail::get_data_type_from_array>::get() - ); - - // Set up flags based on nullable - std::optional> flags; - if (nullable) - { - flags = std::unordered_set{sparrow::ArrowFlag::NULLABLE}; - } - - ArrowSchema schema = make_non_owning_arrow_schema( - format, - name.data(), + return detail::deserialize_non_owning_simple_array( + record_batch, + body, + name, metadata, - flags, - 0, - nullptr, - nullptr - ); - - const auto compression = record_batch.compression(); - std::vector buffers; - - auto validity_buffer_span = utils::get_buffer(record_batch, body, buffer_index); - auto data_buffer_span = utils::get_buffer(record_batch, body, buffer_index); - - if (compression) - { - buffers.push_back(utils::get_decompressed_buffer(validity_buffer_span, compression)); - buffers.push_back(utils::get_decompressed_buffer(data_buffer_span, compression)); - } - else - { - buffers.push_back(validity_buffer_span); - buffers.push_back(data_buffer_span); - } - - // TODO bitmap_ptr is not used anymore... Leave it for now, and remove later if no need confirmed - const auto [bitmap_ptr, null_count] = utils::get_bitmap_pointer_and_null_count(validity_buffer_span, record_batch.length()); - - ArrowArray array = make_arrow_array( - record_batch.length(), - null_count, - 0, - 0, - nullptr, - nullptr, - std::move(buffers) + nullable, + buffer_index ); - - sparrow::arrow_proxy ap{std::move(array), std::move(schema)}; - return sparrow::primitive_array{std::move(ap)}; } } From ad9666db0c3f55a1b67c43c6385e1fe3b9e8dd44 Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Fri, 12 Dec 2025 14:56:46 +0100 Subject: [PATCH 5/6] try fix --- include/sparrow_ipc/deserialize_primitive_array.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/sparrow_ipc/deserialize_primitive_array.hpp b/include/sparrow_ipc/deserialize_primitive_array.hpp index 6904f036..15b2a3d0 100644 --- a/include/sparrow_ipc/deserialize_primitive_array.hpp +++ b/include/sparrow_ipc/deserialize_primitive_array.hpp @@ -21,7 +21,7 @@ namespace sparrow_ipc size_t& buffer_index ) { - return detail::deserialize_non_owning_simple_array( + return sparrow_ipc::detail::deserialize_non_owning_simple_array( record_batch, body, name, From bb061524d8a9bf57f2a49753eeee77edad46a172 Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Fri, 12 Dec 2025 15:29:59 +0100 Subject: [PATCH 6/6] try fix --- include/sparrow_ipc/deserialize_array_impl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/sparrow_ipc/deserialize_array_impl.hpp b/include/sparrow_ipc/deserialize_array_impl.hpp index 1d359f9f..ee781223 100644 --- a/include/sparrow_ipc/deserialize_array_impl.hpp +++ b/include/sparrow_ipc/deserialize_array_impl.hpp @@ -31,7 +31,7 @@ namespace sparrow_ipc::detail * * @return The deserialized array of type ArrayType */ - template class ArrayType, typename T> + template class ArrayType, typename T> [[nodiscard]] ArrayType deserialize_non_owning_simple_array( const org::apache::arrow::flatbuf::RecordBatch& record_batch, std::span body,