Skip to content

Commit 6164a91

Browse files
authored
Add interval array (#60)
1 parent fd49f83 commit 6164a91

File tree

6 files changed

+242
-89
lines changed

6 files changed

+242
-89
lines changed

CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,10 +120,12 @@ set(SPARROW_IPC_HEADERS
120120
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/arrow_interface/arrow_schema/private_data.hpp
121121
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/chunk_memory_output_stream.hpp
122122
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/chunk_memory_serializer.hpp
123+
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/compression.hpp
123124
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/config/config.hpp
124125
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/config/sparrow_ipc_version.hpp
125-
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/compression.hpp
126+
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_array_impl.hpp
126127
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_fixedsizebinary_array.hpp
128+
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_interval_array.hpp
127129
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_primitive_array.hpp
128130
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_utils.hpp
129131
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_variable_size_binary_array.hpp
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
#pragma once
2+
3+
#include <optional>
4+
#include <unordered_set>
5+
#include <vector>
6+
7+
#include <sparrow/arrow_interface/arrow_array_schema_proxy.hpp>
8+
9+
#include "Message_generated.h"
10+
#include "sparrow_ipc/arrow_interface/arrow_array.hpp"
11+
#include "sparrow_ipc/arrow_interface/arrow_schema.hpp"
12+
#include "sparrow_ipc/deserialize_utils.hpp"
13+
14+
namespace sparrow_ipc::detail
15+
{
16+
/**
17+
* @brief Generic implementation for deserializing non-owning arrays with simple layout.
18+
*
19+
* This function provides the common deserialization logic for array types that have
20+
* a validity buffer and a single data buffer (e.g., primitive_array, interval_array).
21+
*
22+
* @tparam ArrayType The array type template (e.g., sparrow::primitive_array)
23+
* @tparam T The element type
24+
*
25+
* @param record_batch The FlatBuffer RecordBatch containing metadata
26+
* @param body The raw buffer data
27+
* @param name The array column name
28+
* @param metadata Optional metadata pairs
29+
* @param nullable Whether the array is nullable
30+
* @param buffer_index The current buffer index (incremented by this function)
31+
*
32+
* @return The deserialized array of type ArrayType<T>
33+
*/
34+
template <template<typename...> class ArrayType, typename T>
35+
[[nodiscard]] ArrayType<T> deserialize_non_owning_simple_array(
36+
const org::apache::arrow::flatbuf::RecordBatch& record_batch,
37+
std::span<const uint8_t> body,
38+
std::string_view name,
39+
const std::optional<std::vector<sparrow::metadata_pair>>& metadata,
40+
bool nullable,
41+
size_t& buffer_index
42+
)
43+
{
44+
const std::string_view format = data_type_to_format(
45+
sparrow::detail::get_data_type_from_array<ArrayType<T>>::get()
46+
);
47+
48+
// Set up flags based on nullable
49+
std::optional<std::unordered_set<sparrow::ArrowFlag>> flags;
50+
if (nullable)
51+
{
52+
flags = std::unordered_set<sparrow::ArrowFlag>{sparrow::ArrowFlag::NULLABLE};
53+
}
54+
55+
ArrowSchema schema = make_non_owning_arrow_schema(
56+
format,
57+
name.data(),
58+
metadata,
59+
flags,
60+
0,
61+
nullptr,
62+
nullptr
63+
);
64+
65+
const auto compression = record_batch.compression();
66+
std::vector<arrow_array_private_data::optionally_owned_buffer> buffers;
67+
68+
auto validity_buffer_span = utils::get_buffer(record_batch, body, buffer_index);
69+
auto data_buffer_span = utils::get_buffer(record_batch, body, buffer_index);
70+
71+
if (compression)
72+
{
73+
buffers.push_back(utils::get_decompressed_buffer(validity_buffer_span, compression));
74+
buffers.push_back(utils::get_decompressed_buffer(data_buffer_span, compression));
75+
}
76+
else
77+
{
78+
buffers.emplace_back(validity_buffer_span);
79+
buffers.emplace_back(data_buffer_span);
80+
}
81+
82+
// TODO bitmap_ptr is not used anymore... Leave it for now, and remove later if no need confirmed
83+
const auto [bitmap_ptr, null_count] = utils::get_bitmap_pointer_and_null_count(validity_buffer_span, record_batch.length());
84+
85+
ArrowArray array = make_arrow_array<arrow_array_private_data>(
86+
record_batch.length(),
87+
null_count,
88+
0,
89+
0,
90+
nullptr,
91+
nullptr,
92+
std::move(buffers)
93+
);
94+
95+
sparrow::arrow_proxy ap{std::move(array), std::move(schema)};
96+
return ArrayType<T>{std::move(ap)};
97+
}
98+
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#pragma once
2+
3+
#include <sparrow/arrow_interface/arrow_array_schema_proxy.hpp>
4+
#include <sparrow/interval_array.hpp>
5+
6+
#include "Message_generated.h"
7+
#include "sparrow_ipc/deserialize_array_impl.hpp"
8+
9+
namespace sparrow_ipc
10+
{
11+
template <typename T>
12+
[[nodiscard]] sparrow::interval_array<T> deserialize_non_owning_interval_array(
13+
const org::apache::arrow::flatbuf::RecordBatch& record_batch,
14+
std::span<const uint8_t> body,
15+
std::string_view name,
16+
const std::optional<std::vector<sparrow::metadata_pair>>& metadata,
17+
bool nullable,
18+
size_t& buffer_index
19+
)
20+
{
21+
return detail::deserialize_non_owning_simple_array<sparrow::interval_array, T>(
22+
record_batch,
23+
body,
24+
name,
25+
metadata,
26+
nullable,
27+
buffer_index
28+
);
29+
}
30+
}
Lines changed: 7 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,13 @@
11
#pragma once
22

33
#include <optional>
4-
#include <unordered_set>
54
#include <vector>
65

76
#include <sparrow/arrow_interface/arrow_array_schema_proxy.hpp>
87
#include <sparrow/primitive_array.hpp>
98

109
#include "Message_generated.h"
11-
#include "sparrow_ipc/arrow_interface/arrow_array.hpp"
12-
#include "sparrow_ipc/arrow_interface/arrow_schema.hpp"
13-
#include "sparrow_ipc/deserialize_utils.hpp"
10+
#include "sparrow_ipc/deserialize_array_impl.hpp"
1411

1512
namespace sparrow_ipc
1613
{
@@ -24,58 +21,13 @@ namespace sparrow_ipc
2421
size_t& buffer_index
2522
)
2623
{
27-
const std::string_view format = data_type_to_format(
28-
sparrow::detail::get_data_type_from_array<sparrow::primitive_array<T>>::get()
29-
);
30-
31-
// Set up flags based on nullable
32-
std::optional<std::unordered_set<sparrow::ArrowFlag>> flags;
33-
if (nullable)
34-
{
35-
flags = std::unordered_set<sparrow::ArrowFlag>{sparrow::ArrowFlag::NULLABLE};
36-
}
37-
38-
ArrowSchema schema = make_non_owning_arrow_schema(
39-
format,
40-
name.data(),
24+
return detail::deserialize_non_owning_simple_array<sparrow::primitive_array, T>(
25+
record_batch,
26+
body,
27+
name,
4128
metadata,
42-
flags,
43-
0,
44-
nullptr,
45-
nullptr
46-
);
47-
48-
const auto compression = record_batch.compression();
49-
std::vector<arrow_array_private_data::optionally_owned_buffer> buffers;
50-
51-
auto validity_buffer_span = utils::get_buffer(record_batch, body, buffer_index);
52-
auto data_buffer_span = utils::get_buffer(record_batch, body, buffer_index);
53-
54-
if (compression)
55-
{
56-
buffers.push_back(utils::get_decompressed_buffer(validity_buffer_span, compression));
57-
buffers.push_back(utils::get_decompressed_buffer(data_buffer_span, compression));
58-
}
59-
else
60-
{
61-
buffers.push_back(validity_buffer_span);
62-
buffers.push_back(data_buffer_span);
63-
}
64-
65-
// TODO bitmap_ptr is not used anymore... Leave it for now, and remove later if no need confirmed
66-
const auto [bitmap_ptr, null_count] = utils::get_bitmap_pointer_and_null_count(validity_buffer_span, record_batch.length());
67-
68-
ArrowArray array = make_arrow_array<arrow_array_private_data>(
69-
record_batch.length(),
70-
null_count,
71-
0,
72-
0,
73-
nullptr,
74-
nullptr,
75-
std::move(buffers)
29+
nullable,
30+
buffer_index
7631
);
77-
78-
sparrow::arrow_proxy ap{std::move(array), std::move(schema)};
79-
return sparrow::primitive_array<T>{std::move(ap)};
8032
}
8133
}

0 commit comments

Comments
 (0)