Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
94d5365
feat: impl display for `DataType::List`
irenjj Jan 31, 2025
7ed104b
fix
irenjj Jan 31, 2025
d0ed2db
add nest test
irenjj Feb 1, 2025
aef44fa
fix
irenjj Feb 1, 2025
73e8ce9
fix
irenjj Feb 1, 2025
e381095
fix test
irenjj Feb 11, 2025
550955d
Merge remote-tracking branch 'upstream/main' into display_for_list
irenjj Apr 12, 2025
bb7ada1
fix
irenjj Apr 12, 2025
02384b9
More compact formatting, and distinguish List and LargeList
emilk Sep 7, 2025
fe3eb60
Merge branch 'main' into emilk/display_for_list
emilk Sep 7, 2025
3f0e66b
Break out formatting to own file
emilk Sep 7, 2025
0661f1f
Add a comment with design goals
emilk Sep 7, 2025
b4e9be5
Use `: ` syntax instead of ` = `
emilk Sep 7, 2025
0179d58
Improve `Debug` format of `Field`
emilk Sep 7, 2025
2e9d645
tidy-up
emilk Sep 7, 2025
be144f4
Make sure structs write the nullability of its fields
emilk Sep 7, 2025
f27e349
Update test
emilk Sep 7, 2025
71d9cb6
Update some error reporting
emilk Sep 7, 2025
931c5f9
Add license header
emilk Sep 7, 2025
ef0d080
Merge branch 'main' into emilk/display_for_list
emilk Sep 10, 2025
34d65c8
Rename datatype_format.rs to datatype_display.rs
emilk Sep 11, 2025
68b4eb4
Add comment motivating why we do what we do
emilk Sep 11, 2025
079e7bc
Revert to auto-derived Debug impl
emilk Sep 15, 2025
ae626bb
Merge branch 'main' into emilk/display_for_list
emilk Sep 15, 2025
2992b56
Use Display formatting of Field
emilk Sep 15, 2025
44b5e02
Less Debug-formatting of DataType in error messages
emilk Sep 15, 2025
58c9c2c
Use Display formatting of Field in most error messages
emilk Sep 15, 2025
5cb977a
Less Debug
emilk Sep 15, 2025
941286f
Update test/panic output
emilk Sep 15, 2025
2cbd723
A few more places
emilk Sep 15, 2025
513676a
Another place
emilk Sep 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion arrow-array/src/array/fixed_size_list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ impl From<ArrayData> for FixedSizeListArray {
let value_length = match data.data_type() {
DataType::FixedSizeList(_, len) => *len,
data_type => {
panic!("FixedSizeListArray data should contain a FixedSizeList data type, got {data_type:?}")
panic!("FixedSizeListArray data should contain a FixedSizeList data type, got {data_type}")
}
};

Expand Down
6 changes: 3 additions & 3 deletions arrow-array/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -824,20 +824,20 @@ pub fn make_array(data: ArrayData) -> ArrayRef {
DataType::UInt16 => Arc::new(DictionaryArray::<UInt16Type>::from(data)) as ArrayRef,
DataType::UInt32 => Arc::new(DictionaryArray::<UInt32Type>::from(data)) as ArrayRef,
DataType::UInt64 => Arc::new(DictionaryArray::<UInt64Type>::from(data)) as ArrayRef,
dt => panic!("Unexpected dictionary key type {dt:?}"),
dt => panic!("Unexpected dictionary key type {dt}"),
},
DataType::RunEndEncoded(ref run_ends_type, _) => match run_ends_type.data_type() {
DataType::Int16 => Arc::new(RunArray::<Int16Type>::from(data)) as ArrayRef,
DataType::Int32 => Arc::new(RunArray::<Int32Type>::from(data)) as ArrayRef,
DataType::Int64 => Arc::new(RunArray::<Int64Type>::from(data)) as ArrayRef,
dt => panic!("Unexpected data type for run_ends array {dt:?}"),
dt => panic!("Unexpected data type for run_ends array {dt}"),
},
DataType::Null => Arc::new(NullArray::from(data)) as ArrayRef,
DataType::Decimal32(_, _) => Arc::new(Decimal32Array::from(data)) as ArrayRef,
DataType::Decimal64(_, _) => Arc::new(Decimal64Array::from(data)) as ArrayRef,
DataType::Decimal128(_, _) => Arc::new(Decimal128Array::from(data)) as ArrayRef,
DataType::Decimal256(_, _) => Arc::new(Decimal256Array::from(data)) as ArrayRef,
dt => panic!("Unexpected data type {dt:?}"),
dt => panic!("Unexpected data type {dt}"),
}
}

Expand Down
6 changes: 3 additions & 3 deletions arrow-array/src/array/primitive_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1290,7 +1290,7 @@ impl<T: ArrowPrimitiveType> std::fmt::Debug for PrimitiveArray<T> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
let data_type = self.data_type();

write!(f, "PrimitiveArray<{data_type:?}>\n[\n")?;
write!(f, "PrimitiveArray<{data_type}>\n[\n")?;
print_long_array(self, f, |array, index, f| match data_type {
DataType::Date32 | DataType::Date64 => {
let v = self.value(index).to_i64().unwrap();
Expand All @@ -1299,7 +1299,7 @@ impl<T: ArrowPrimitiveType> std::fmt::Debug for PrimitiveArray<T> {
None => {
write!(
f,
"Cast error: Failed to convert {v} to temporal for {data_type:?}"
"Cast error: Failed to convert {v} to temporal for {data_type}"
)
}
}
Expand All @@ -1311,7 +1311,7 @@ impl<T: ArrowPrimitiveType> std::fmt::Debug for PrimitiveArray<T> {
None => {
write!(
f,
"Cast error: Failed to convert {v} to temporal for {data_type:?}"
"Cast error: Failed to convert {v} to temporal for {data_type}"
)
}
}
Expand Down
8 changes: 4 additions & 4 deletions arrow-array/src/builder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,7 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
.with_values_field(fields[1].clone()),
)
}
t => panic!("The field of Map data type {t:?} should have a child Struct field"),
t => panic!("The field of Map data type {t} should have a child Struct field"),
},
DataType::Struct(fields) => Box::new(StructBuilder::from_fields(fields.clone(), capacity)),
t @ DataType::Dictionary(key_type, value_type) => {
Expand All @@ -594,7 +594,7 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
LargeBinaryDictionaryBuilder::with_capacity(capacity, 256, 1024);
Box::new(dict_builder)
}
t => panic!("Dictionary value type {t:?} is not currently supported"),
t => panic!("Dictionary value type {t} is not currently supported"),
}
};
}
Expand All @@ -604,10 +604,10 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
DataType::Int32 => dict_builder!(Int32Type),
DataType::Int64 => dict_builder!(Int64Type),
_ => {
panic!("Data type {t:?} with key type {key_type:?} is not currently supported")
panic!("Data type {t} with key type {key_type} is not currently supported")
}
}
}
t => panic!("Data type {t:?} is not currently supported"),
t => panic!("Data type {t} is not currently supported"),
}
}
6 changes: 3 additions & 3 deletions arrow-array/src/builder/struct_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ use std::sync::Arc;
///
/// // We can't obtain the ListBuilder<StructBuilder> with the expected generic types, because under the hood
/// // the StructBuilder was returned as a Box<dyn ArrayBuilder> and passed as such to the ListBuilder constructor
///
///
/// // This panics in runtime, even though we know that the builder is a ListBuilder<StructBuilder>.
/// // let sb = col_struct_builder
/// // .field_builder::<ListBuilder<StructBuilder>>(0)
Expand Down Expand Up @@ -267,7 +267,7 @@ impl StructBuilder {
let schema = builder.finish();

panic!("{}", format!(
"StructBuilder ({:?}) and field_builder with index {} ({:?}) are of unequal lengths: ({} != {}).",
"StructBuilder ({}) and field_builder with index {} ({}) are of unequal lengths: ({} != {}).",
schema,
idx,
self.fields[idx].data_type(),
Expand Down Expand Up @@ -648,7 +648,7 @@ mod tests {

#[test]
#[should_panic(
expected = "StructBuilder (Schema { fields: [Field { name: \"f1\", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: \"f2\", data_type: Boolean, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }], metadata: {} }) and field_builder with index 1 (Boolean) are of unequal lengths: (2 != 1)."
expected = "StructBuilder (Field { \"f1\": Int32 }, Field { \"f2\": Boolean }) and field_builder with index 1 (Boolean) are of unequal lengths: (2 != 1)."
)]
fn test_struct_array_builder_unequal_field_builders_lengths() {
let mut int_builder = Int32Builder::with_capacity(10);
Expand Down
22 changes: 11 additions & 11 deletions arrow-array/src/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -146,11 +146,11 @@ fn bit_width(data_type: &DataType, i: usize) -> Result<usize> {
if let Some(primitive) = data_type.primitive_width() {
return match i {
0 => Err(ArrowError::CDataInterface(format!(
"The datatype \"{data_type:?}\" doesn't expect buffer at index 0. Please verify that the C data interface is correctly implemented."
"The datatype \"{data_type}\" doesn't expect buffer at index 0. Please verify that the C data interface is correctly implemented."
))),
1 => Ok(primitive * 8),
i => Err(ArrowError::CDataInterface(format!(
"The datatype \"{data_type:?}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
"The datatype \"{data_type}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
))),
};
}
Expand All @@ -159,7 +159,7 @@ fn bit_width(data_type: &DataType, i: usize) -> Result<usize> {
(DataType::Boolean, 1) => 1,
(DataType::Boolean, _) => {
return Err(ArrowError::CDataInterface(format!(
"The datatype \"{data_type:?}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
"The datatype \"{data_type}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
)))
}
(DataType::FixedSizeBinary(num_bytes), 1) => *num_bytes as usize * u8::BITS as usize,
Expand All @@ -169,7 +169,7 @@ fn bit_width(data_type: &DataType, i: usize) -> Result<usize> {
},
(DataType::FixedSizeBinary(_), _) | (DataType::FixedSizeList(_, _), _) => {
return Err(ArrowError::CDataInterface(format!(
"The datatype \"{data_type:?}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
"The datatype \"{data_type}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
)))
},
// Variable-size list and map have one i32 buffer.
Expand All @@ -179,12 +179,12 @@ fn bit_width(data_type: &DataType, i: usize) -> Result<usize> {
(DataType::Utf8, 2) | (DataType::Binary, 2) => u8::BITS as _,
(DataType::List(_), _) | (DataType::Map(_, _), _) => {
return Err(ArrowError::CDataInterface(format!(
"The datatype \"{data_type:?}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
"The datatype \"{data_type}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
)))
}
(DataType::Utf8, _) | (DataType::Binary, _) => {
return Err(ArrowError::CDataInterface(format!(
"The datatype \"{data_type:?}\" expects 3 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
"The datatype \"{data_type}\" expects 3 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
)))
}
// Variable-sized binaries: have two buffers.
Expand All @@ -193,7 +193,7 @@ fn bit_width(data_type: &DataType, i: usize) -> Result<usize> {
(DataType::LargeUtf8, 2) | (DataType::LargeBinary, 2) | (DataType::LargeList(_), 2)=> u8::BITS as _,
(DataType::LargeUtf8, _) | (DataType::LargeBinary, _) | (DataType::LargeList(_), _)=> {
return Err(ArrowError::CDataInterface(format!(
"The datatype \"{data_type:?}\" expects 3 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
"The datatype \"{data_type}\" expects 3 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
)))
}
// Variable-sized views: have 3 or more buffers.
Expand All @@ -209,24 +209,24 @@ fn bit_width(data_type: &DataType, i: usize) -> Result<usize> {
(DataType::Union(_, UnionMode::Dense), 1) => i32::BITS as _,
(DataType::Union(_, UnionMode::Sparse), _) => {
return Err(ArrowError::CDataInterface(format!(
"The datatype \"{data_type:?}\" expects 1 buffer, but requested {i}. Please verify that the C data interface is correctly implemented."
"The datatype \"{data_type}\" expects 1 buffer, but requested {i}. Please verify that the C data interface is correctly implemented."
)))
}
(DataType::Union(_, UnionMode::Dense), _) => {
return Err(ArrowError::CDataInterface(format!(
"The datatype \"{data_type:?}\" expects 2 buffer, but requested {i}. Please verify that the C data interface is correctly implemented."
"The datatype \"{data_type}\" expects 2 buffer, but requested {i}. Please verify that the C data interface is correctly implemented."
)))
}
(_, 0) => {
// We don't call this `bit_width` to compute buffer length for null buffer. If any types that don't have null buffer like
// UnionArray, they should be handled above.
return Err(ArrowError::CDataInterface(format!(
"The datatype \"{data_type:?}\" doesn't expect buffer at index 0. Please verify that the C data interface is correctly implemented."
"The datatype \"{data_type}\" doesn't expect buffer at index 0. Please verify that the C data interface is correctly implemented."
)))
}
_ => {
return Err(ArrowError::CDataInterface(format!(
"The datatype \"{data_type:?}\" is still not supported in Rust implementation"
"The datatype \"{data_type}\" is still not supported in Rust implementation"
)))
}
})
Expand Down
4 changes: 2 additions & 2 deletions arrow-array/src/record_batch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ impl RecordBatch {

if let Some((i, (col_type, field_type))) = not_match {
return Err(ArrowError::InvalidArgumentError(format!(
"column types must match schema types, expected {field_type:?} but found {col_type:?} at column index {i}")));
"column types must match schema types, expected {field_type} but found {col_type} at column index {i}")));
}

Ok(RecordBatch {
Expand Down Expand Up @@ -422,7 +422,7 @@ impl RecordBatch {
/// // Insert a key-value pair into the metadata
/// batch.schema_metadata_mut().insert("key".into(), "value".into());
/// assert_eq!(batch.schema().metadata().get("key"), Some(&String::from("value")));
/// ```
/// ```
pub fn schema_metadata_mut(&mut self) -> &mut std::collections::HashMap<String, String> {
let schema = Arc::make_mut(&mut self.schema);
&mut schema.metadata
Expand Down
4 changes: 2 additions & 2 deletions arrow-cast/src/cast/dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ pub(crate) fn dictionary_cast<K: ArrowDictionaryKeyType>(
UInt64 => Arc::new(DictionaryArray::<UInt64Type>::from(data)),
_ => {
return Err(ArrowError::CastError(format!(
"Unsupported type {to_index_type:?} for dictionary index"
"Unsupported type {to_index_type} for dictionary index"
)));
}
};
Expand Down Expand Up @@ -313,7 +313,7 @@ pub(crate) fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
pack_byte_to_fixed_size_dictionary::<K>(array, cast_options, byte_size)
}
_ => Err(ArrowError::CastError(format!(
"Unsupported output type for dictionary packing: {dict_value_type:?}"
"Unsupported output type for dictionary packing: {dict_value_type}"
))),
}
}
Expand Down
Loading
Loading