Skip to content

Commit 8394659

Browse files
authored
[Variant] Fix NULL handling for shredded object fields (#8395)
# Which issue does this PR close? - Fast-follow for #8366 - Related to #8392 # Rationale for this change Somehow, #8392 exposes a latent bug in #8366, which has improper NULL handling for shredded object fields. The shredding PR originally attempted to handle this case, but somehow the test did not trigger the bug and so the (admittedly incomplete) code was removed. See #8366 (comment). To be honest, I have no idea how the original ever worked correctly, nor why the new PR is able to expose the problem. # What changes are included in this PR? When used as a top-level builder, `VariantToShreddedVariantRowBuilder::append_null` must append NULL to its own `NullBufferBuilder`; but when used as a shredded object field builder, it must append non-NULL. Plumb a new `top_level` parameter through the various functions and into the two sub-builders it relies on, so they can implement the correct semantics. # Are these changes tested? In theory, yes (I don't know how the object shredding test ever passed). And it fixes the breakage in #8392. # Are there any user-facing changes? No
1 parent 06c638f commit 8394659

File tree

1 file changed

+32
-8
lines changed

1 file changed

+32
-8
lines changed

parquet-variant-compute/src/shred_variant.rs

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,12 @@ pub fn shred_variant(array: &VariantArray, as_type: &DataType) -> Result<Variant
7676
};
7777

7878
let cast_options = CastOptions::default();
79-
let mut builder =
80-
make_variant_to_shredded_variant_arrow_row_builder(as_type, &cast_options, array.len())?;
79+
let mut builder = make_variant_to_shredded_variant_arrow_row_builder(
80+
as_type,
81+
&cast_options,
82+
array.len(),
83+
true,
84+
)?;
8185
for i in 0..array.len() {
8286
if array.is_null(i) {
8387
builder.append_null()?;
@@ -98,11 +102,16 @@ pub(crate) fn make_variant_to_shredded_variant_arrow_row_builder<'a>(
98102
data_type: &'a DataType,
99103
cast_options: &'a CastOptions,
100104
capacity: usize,
105+
top_level: bool,
101106
) -> Result<VariantToShreddedVariantRowBuilder<'a>> {
102107
let builder = match data_type {
103108
DataType::Struct(fields) => {
104-
let typed_value_builder =
105-
VariantToShreddedObjectVariantRowBuilder::try_new(fields, cast_options, capacity)?;
109+
let typed_value_builder = VariantToShreddedObjectVariantRowBuilder::try_new(
110+
fields,
111+
cast_options,
112+
capacity,
113+
top_level,
114+
)?;
106115
VariantToShreddedVariantRowBuilder::Object(typed_value_builder)
107116
}
108117
DataType::List(_)
@@ -118,7 +127,7 @@ pub(crate) fn make_variant_to_shredded_variant_arrow_row_builder<'a>(
118127
let builder =
119128
make_primitive_variant_to_arrow_row_builder(data_type, cast_options, capacity)?;
120129
let typed_value_builder =
121-
VariantToShreddedPrimitiveVariantRowBuilder::new(builder, capacity);
130+
VariantToShreddedPrimitiveVariantRowBuilder::new(builder, capacity, top_level);
122131
VariantToShreddedVariantRowBuilder::Primitive(typed_value_builder)
123132
}
124133
};
@@ -160,21 +169,26 @@ pub(crate) struct VariantToShreddedPrimitiveVariantRowBuilder<'a> {
160169
value_builder: VariantValueArrayBuilder,
161170
typed_value_builder: PrimitiveVariantToArrowRowBuilder<'a>,
162171
nulls: NullBufferBuilder,
172+
top_level: bool,
163173
}
164174

165175
impl<'a> VariantToShreddedPrimitiveVariantRowBuilder<'a> {
166176
pub(crate) fn new(
167177
typed_value_builder: PrimitiveVariantToArrowRowBuilder<'a>,
168178
capacity: usize,
179+
top_level: bool,
169180
) -> Self {
170181
Self {
171182
value_builder: VariantValueArrayBuilder::new(capacity),
172183
typed_value_builder,
173184
nulls: NullBufferBuilder::new(capacity),
185+
top_level,
174186
}
175187
}
176188
fn append_null(&mut self) -> Result<()> {
177-
self.nulls.append_null();
189+
// Only the top-level struct that represents the variant can be nullable; object fields and
190+
// array elements are non-nullable.
191+
self.nulls.append(!self.top_level);
178192
self.value_builder.append_null();
179193
self.typed_value_builder.append_null()
180194
}
@@ -201,15 +215,22 @@ pub(crate) struct VariantToShreddedObjectVariantRowBuilder<'a> {
201215
typed_value_builders: IndexMap<&'a str, VariantToShreddedVariantRowBuilder<'a>>,
202216
typed_value_nulls: NullBufferBuilder,
203217
nulls: NullBufferBuilder,
218+
top_level: bool,
204219
}
205220

206221
impl<'a> VariantToShreddedObjectVariantRowBuilder<'a> {
207-
fn try_new(fields: &'a Fields, cast_options: &'a CastOptions, capacity: usize) -> Result<Self> {
222+
fn try_new(
223+
fields: &'a Fields,
224+
cast_options: &'a CastOptions,
225+
capacity: usize,
226+
top_level: bool,
227+
) -> Result<Self> {
208228
let typed_value_builders = fields.iter().map(|field| {
209229
let builder = make_variant_to_shredded_variant_arrow_row_builder(
210230
field.data_type(),
211231
cast_options,
212232
capacity,
233+
false,
213234
)?;
214235
Ok((field.name().as_str(), builder))
215236
});
@@ -218,11 +239,14 @@ impl<'a> VariantToShreddedObjectVariantRowBuilder<'a> {
218239
typed_value_builders: typed_value_builders.collect::<Result<_>>()?,
219240
typed_value_nulls: NullBufferBuilder::new(capacity),
220241
nulls: NullBufferBuilder::new(capacity),
242+
top_level,
221243
})
222244
}
223245

224246
fn append_null(&mut self) -> Result<()> {
225-
self.nulls.append_null();
247+
// Only the top-level struct that represents the variant can be nullable; object fields and
248+
// array elements are non-nullable.
249+
self.nulls.append(!self.top_level);
226250
self.value_builder.append_null();
227251
self.typed_value_nulls.append_null();
228252
for (_, typed_value_builder) in &mut self.typed_value_builders {

0 commit comments

Comments
 (0)