Skip to content

Commit a8ad90d

Browse files
authored
[Variant]: Implement DataType::ListView/LargeListView support for cast_to_variant kernel (#8241)
# Which issue does this PR close? - Closes #8236. # Rationale for this change # What changes are included in this PR? Implement `ListView/LargeListView` for cast_to_variant # Are these changes tested? Yes # Are there any user-facing changes? New cast type supported
1 parent 9eabd32 commit a8ad90d

File tree

2 files changed

+224
-27
lines changed

2 files changed

+224
-27
lines changed

parquet-variant-compute/src/arrow_to_variant.rs

Lines changed: 69 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,10 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use std::collections::HashMap;
19-
2018
use crate::type_conversion::{decimal_to_variant_decimal, CastOptions};
2119
use arrow::array::{
22-
Array, AsArray, GenericBinaryArray, GenericStringArray, OffsetSizeTrait, PrimitiveArray,
20+
Array, AsArray, GenericBinaryArray, GenericListArray, GenericListViewArray, GenericStringArray,
21+
OffsetSizeTrait, PrimitiveArray,
2322
};
2423
use arrow::compute::kernels::cast;
2524
use arrow::datatypes::{
@@ -36,6 +35,8 @@ use parquet_variant::{
3635
ObjectFieldBuilder, Variant, VariantBuilderExt, VariantDecimal16, VariantDecimal4,
3736
VariantDecimal8,
3837
};
38+
use std::collections::HashMap;
39+
use std::ops::Range;
3940

4041
// ============================================================================
4142
// Row-oriented builders for efficient Arrow-to-Variant conversion
@@ -77,8 +78,10 @@ pub(crate) enum ArrowToVariantRowBuilder<'a> {
7778
Utf8(StringArrowToVariantBuilder<'a, i32>),
7879
LargeUtf8(StringArrowToVariantBuilder<'a, i64>),
7980
Utf8View(StringViewArrowToVariantBuilder<'a>),
80-
List(ListArrowToVariantBuilder<'a, i32>),
81-
LargeList(ListArrowToVariantBuilder<'a, i64>),
81+
List(ListArrowToVariantBuilder<'a, GenericListArray<i32>>),
82+
LargeList(ListArrowToVariantBuilder<'a, GenericListArray<i64>>),
83+
ListView(ListArrowToVariantBuilder<'a, GenericListViewArray<i32>>),
84+
LargeListView(ListArrowToVariantBuilder<'a, GenericListViewArray<i64>>),
8285
Struct(StructArrowToVariantBuilder<'a>),
8386
Map(MapArrowToVariantBuilder<'a>),
8487
Union(UnionArrowToVariantBuilder<'a>),
@@ -133,6 +136,8 @@ impl<'a> ArrowToVariantRowBuilder<'a> {
133136
Utf8View(b) => b.append_row(builder, index),
134137
List(b) => b.append_row(builder, index),
135138
LargeList(b) => b.append_row(builder, index),
139+
ListView(b) => b.append_row(builder, index),
140+
LargeListView(b) => b.append_row(builder, index),
136141
Struct(b) => b.append_row(builder, index),
137142
Map(b) => b.append_row(builder, index),
138143
Union(b) => b.append_row(builder, index),
@@ -238,8 +243,18 @@ pub(crate) fn make_arrow_to_variant_row_builder<'a>(
238243
DataType::Utf8 => Utf8(StringArrowToVariantBuilder::new(array)),
239244
DataType::LargeUtf8 => LargeUtf8(StringArrowToVariantBuilder::new(array)),
240245
DataType::Utf8View => Utf8View(StringViewArrowToVariantBuilder::new(array)),
241-
DataType::List(_) => List(ListArrowToVariantBuilder::new(array, options)?),
242-
DataType::LargeList(_) => LargeList(ListArrowToVariantBuilder::new(array, options)?),
246+
DataType::List(_) => List(ListArrowToVariantBuilder::new(array.as_list(), options)?),
247+
DataType::LargeList(_) => {
248+
LargeList(ListArrowToVariantBuilder::new(array.as_list(), options)?)
249+
}
250+
DataType::ListView(_) => ListView(ListArrowToVariantBuilder::new(
251+
array.as_list_view(),
252+
options,
253+
)?),
254+
DataType::LargeListView(_) => LargeListView(ListArrowToVariantBuilder::new(
255+
array.as_list_view(),
256+
options,
257+
)?),
243258
DataType::Struct(_) => Struct(StructArrowToVariantBuilder::new(
244259
array.as_struct(),
245260
options,
@@ -425,7 +440,7 @@ define_row_builder!(
425440
options: &'a CastOptions,
426441
has_time_zone: bool,
427442
},
428-
|array| -> arrow::array::PrimitiveArray<T> { array.as_primitive() },
443+
|array| -> PrimitiveArray<T> { array.as_primitive() },
429444
|value| -> Option<_> {
430445
// Convert using Arrow's temporal conversion functions
431446
as_datetime::<T>(value).map(|naive_datetime| {
@@ -508,21 +523,20 @@ impl NullArrowToVariantBuilder {
508523
}
509524
}
510525

511-
/// Generic list builder for List and LargeList types
512-
pub(crate) struct ListArrowToVariantBuilder<'a, O: OffsetSizeTrait> {
513-
list_array: &'a arrow::array::GenericListArray<O>,
526+
/// Generic list builder for List, LargeList, ListView, and LargeListView types
527+
pub(crate) struct ListArrowToVariantBuilder<'a, L: ListLikeArray> {
528+
list_array: &'a L,
514529
values_builder: Box<ArrowToVariantRowBuilder<'a>>,
515530
}
516531

517-
impl<'a, O: OffsetSizeTrait> ListArrowToVariantBuilder<'a, O> {
518-
pub(crate) fn new(array: &'a dyn Array, options: &'a CastOptions) -> Result<Self, ArrowError> {
519-
let list_array = array.as_list();
520-
let values = list_array.values();
532+
impl<'a, L: ListLikeArray> ListArrowToVariantBuilder<'a, L> {
533+
pub(crate) fn new(array: &'a L, options: &'a CastOptions) -> Result<Self, ArrowError> {
534+
let values = array.values();
521535
let values_builder =
522-
make_arrow_to_variant_row_builder(values.data_type(), values.as_ref(), options)?;
536+
make_arrow_to_variant_row_builder(values.data_type(), values, options)?;
523537

524538
Ok(Self {
525-
list_array,
539+
list_array: array,
526540
values_builder: Box::new(values_builder),
527541
})
528542
}
@@ -537,12 +551,10 @@ impl<'a, O: OffsetSizeTrait> ListArrowToVariantBuilder<'a, O> {
537551
return Ok(());
538552
}
539553

540-
let offsets = self.list_array.offsets();
541-
let start = offsets[index].as_usize();
542-
let end = offsets[index + 1].as_usize();
554+
let range = self.list_array.element_range(index);
543555

544556
let mut list_builder = builder.try_new_list()?;
545-
for value_index in start..end {
557+
for value_index in range {
546558
self.values_builder
547559
.append_row(&mut list_builder, value_index)?;
548560
}
@@ -551,6 +563,42 @@ impl<'a, O: OffsetSizeTrait> ListArrowToVariantBuilder<'a, O> {
551563
}
552564
}
553565

566+
/// Trait for list-like arrays that can provide element ranges
567+
pub(crate) trait ListLikeArray: Array {
568+
/// Get the values array
569+
fn values(&self) -> &dyn Array;
570+
571+
/// Get the start and end indices for a list element
572+
fn element_range(&self, index: usize) -> Range<usize>;
573+
}
574+
575+
impl<O: OffsetSizeTrait> ListLikeArray for GenericListArray<O> {
576+
fn values(&self) -> &dyn Array {
577+
self.values()
578+
}
579+
580+
fn element_range(&self, index: usize) -> Range<usize> {
581+
let offsets = self.offsets();
582+
let start = offsets[index].as_usize();
583+
let end = offsets[index + 1].as_usize();
584+
start..end
585+
}
586+
}
587+
588+
impl<O: OffsetSizeTrait> ListLikeArray for GenericListViewArray<O> {
589+
fn values(&self) -> &dyn Array {
590+
self.values()
591+
}
592+
593+
fn element_range(&self, index: usize) -> Range<usize> {
594+
let offsets = self.value_offsets();
595+
let sizes = self.value_sizes();
596+
let offset = offsets[index].as_usize();
597+
let size = sizes[index].as_usize();
598+
offset..(offset + size)
599+
}
600+
}
601+
554602
/// Struct builder for StructArray
555603
pub(crate) struct StructArrowToVariantBuilder<'a> {
556604
struct_array: &'a arrow::array::StructArray,

parquet-variant-compute/src/cast_to_variant.rs

Lines changed: 155 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -94,11 +94,11 @@ mod tests {
9494
FixedSizeBinaryBuilder, Float16Array, Float32Array, Float64Array, GenericByteBuilder,
9595
GenericByteViewBuilder, Int16Array, Int32Array, Int64Array, Int8Array,
9696
IntervalDayTimeArray, IntervalMonthDayNanoArray, IntervalYearMonthArray, LargeListArray,
97-
LargeStringArray, ListArray, MapArray, NullArray, StringArray, StringRunBuilder,
98-
StringViewArray, StructArray, Time32MillisecondArray, Time32SecondArray,
99-
Time64MicrosecondArray, Time64NanosecondArray, TimestampMicrosecondArray,
100-
TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, UInt16Array,
101-
UInt32Array, UInt64Array, UInt8Array, UnionArray,
97+
LargeListViewBuilder, LargeStringArray, ListArray, ListViewBuilder, MapArray, NullArray,
98+
StringArray, StringRunBuilder, StringViewArray, StructArray, Time32MillisecondArray,
99+
Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
100+
TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
101+
TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array, UnionArray,
102102
};
103103
use arrow::buffer::{NullBuffer, OffsetBuffer, ScalarBuffer};
104104
use arrow::datatypes::{
@@ -112,7 +112,8 @@ mod tests {
112112
use chrono::{DateTime, NaiveDate, NaiveTime};
113113
use half::f16;
114114
use parquet_variant::{
115-
Variant, VariantBuilder, VariantDecimal16, VariantDecimal4, VariantDecimal8,
115+
Variant, VariantBuilder, VariantBuilderExt, VariantDecimal16, VariantDecimal4,
116+
VariantDecimal8,
116117
};
117118
use std::{sync::Arc, vec};
118119

@@ -1258,6 +1259,154 @@ mod tests {
12581259
);
12591260
}
12601261

1262+
#[test]
1263+
fn test_cast_to_variant_list_view() {
1264+
// Create a ListViewArray with some data
1265+
let mut builder = ListViewBuilder::new(Int32Array::builder(0));
1266+
builder.append_value(&Int32Array::from(vec![Some(0), None, Some(2)]));
1267+
builder.append_value(&Int32Array::from(vec![Some(3), Some(4)]));
1268+
builder.append_null();
1269+
builder.append_value(&Int32Array::from(vec![None, None]));
1270+
let list_view_array = builder.finish();
1271+
1272+
// Expected values
1273+
let (metadata, value) = {
1274+
let mut builder = VariantBuilder::new();
1275+
let mut list = builder.new_list();
1276+
list.append_value(0i32);
1277+
list.append_null();
1278+
list.append_value(2i32);
1279+
list.finish();
1280+
builder.finish()
1281+
};
1282+
let variant0 = Variant::new(&metadata, &value);
1283+
1284+
let (metadata, value) = {
1285+
let mut builder = VariantBuilder::new();
1286+
let mut list = builder.new_list();
1287+
list.append_value(3i32);
1288+
list.append_value(4i32);
1289+
list.finish();
1290+
builder.finish()
1291+
};
1292+
let variant1 = Variant::new(&metadata, &value);
1293+
1294+
let (metadata, value) = {
1295+
let mut builder = VariantBuilder::new();
1296+
let mut list = builder.new_list();
1297+
list.append_null();
1298+
list.append_null();
1299+
list.finish();
1300+
builder.finish()
1301+
};
1302+
let variant3 = Variant::new(&metadata, &value);
1303+
1304+
run_test(
1305+
Arc::new(list_view_array),
1306+
vec![Some(variant0), Some(variant1), None, Some(variant3)],
1307+
);
1308+
}
1309+
1310+
#[test]
1311+
fn test_cast_to_variant_sliced_list_view() {
1312+
// Create a ListViewArray with some data
1313+
let mut builder = ListViewBuilder::new(Int32Array::builder(0));
1314+
builder.append_value(&Int32Array::from(vec![Some(0), Some(1), Some(2)]));
1315+
builder.append_value(&Int32Array::from(vec![Some(3), None]));
1316+
builder.append_null();
1317+
let list_view_array = builder.finish();
1318+
1319+
// Expected value for slice(1, 2) - should get the second and third elements
1320+
let (metadata, value) = {
1321+
let mut builder = VariantBuilder::new();
1322+
let mut list = builder.new_list();
1323+
list.append_value(3i32);
1324+
list.append_null();
1325+
list.finish();
1326+
builder.finish()
1327+
};
1328+
let variant = Variant::new(&metadata, &value);
1329+
1330+
run_test(
1331+
Arc::new(list_view_array.slice(1, 2)),
1332+
vec![Some(variant), None],
1333+
);
1334+
}
1335+
1336+
#[test]
1337+
fn test_cast_to_variant_large_list_view() {
1338+
// Create a LargeListViewArray with some data
1339+
let mut builder = LargeListViewBuilder::new(Int64Array::builder(0));
1340+
builder.append_value(&Int64Array::from(vec![Some(0), None, Some(2)]));
1341+
builder.append_value(&Int64Array::from(vec![Some(3), Some(4)]));
1342+
builder.append_null();
1343+
builder.append_value(&Int64Array::from(vec![None, None]));
1344+
let large_list_view_array = builder.finish();
1345+
1346+
// Expected values
1347+
let (metadata, value) = {
1348+
let mut builder = VariantBuilder::new();
1349+
let mut list = builder.new_list();
1350+
list.append_value(0i64);
1351+
list.append_null();
1352+
list.append_value(2i64);
1353+
list.finish();
1354+
builder.finish()
1355+
};
1356+
let variant0 = Variant::new(&metadata, &value);
1357+
1358+
let (metadata, value) = {
1359+
let mut builder = VariantBuilder::new();
1360+
let mut list = builder.new_list();
1361+
list.append_value(3i64);
1362+
list.append_value(4i64);
1363+
list.finish();
1364+
builder.finish()
1365+
};
1366+
let variant1 = Variant::new(&metadata, &value);
1367+
1368+
let (metadata, value) = {
1369+
let mut builder = VariantBuilder::new();
1370+
let mut list = builder.new_list();
1371+
list.append_null();
1372+
list.append_null();
1373+
list.finish();
1374+
builder.finish()
1375+
};
1376+
let variant3 = Variant::new(&metadata, &value);
1377+
1378+
run_test(
1379+
Arc::new(large_list_view_array),
1380+
vec![Some(variant0), Some(variant1), None, Some(variant3)],
1381+
);
1382+
}
1383+
1384+
#[test]
1385+
fn test_cast_to_variant_sliced_large_list_view() {
1386+
// Create a LargeListViewArray with some data
1387+
let mut builder = LargeListViewBuilder::new(Int64Array::builder(0));
1388+
builder.append_value(&Int64Array::from(vec![Some(0), Some(1), Some(2)]));
1389+
builder.append_value(&Int64Array::from(vec![Some(3), None]));
1390+
builder.append_null();
1391+
let large_list_view_array = builder.finish();
1392+
1393+
// Expected value for slice(1, 2) - should get the second and third elements
1394+
let (metadata, value) = {
1395+
let mut builder = VariantBuilder::new();
1396+
let mut list = builder.new_list();
1397+
list.append_value(3i64);
1398+
list.append_null();
1399+
list.finish();
1400+
builder.finish()
1401+
};
1402+
let variant = Variant::new(&metadata, &value);
1403+
1404+
run_test(
1405+
Arc::new(large_list_view_array.slice(1, 2)),
1406+
vec![Some(variant), None],
1407+
);
1408+
}
1409+
12611410
#[test]
12621411
fn test_cast_to_variant_struct() {
12631412
// Test a simple struct with two fields: id (int64) and age (int32)

0 commit comments

Comments
 (0)